# Inversion of 2 by 2 matrices using an operator recurrent neural network

We use a simplified version of the network architecture proposed in the preprint

> Maarten V. de Hoop, Matti Lassas, Christopher A. Wong. _Deep learning architectures for nonlinear operator functions and nonlinear inverse problems_. [arXiv:1912.11090](https://arxiv.org/abs/1912.11090)

and teach it to invert matrices $X$ of the form $X = R D R^T$ where

$$
R = \begin{pmatrix}
c & -s
\\
s & c
\end{pmatrix},
\quad
D = \begin{pmatrix}
\lambda_1 & 0
\\
0 & \lambda_2
\end{pmatrix},
$$
$c = \cos(\alpha)$ and $s = \sin(\alpha)$ for some $\alpha \in (0,2\pi)$,
and $\lambda_j \in (1/2, 3/2)$, $j=1,2$.

We use notations as in version 3 of the preprint (revised 3 Jan 2022). The notation is different in earlier version.

In the code, variables have the same meaning as in the [Quickstart](https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html) guige of PyTorch.

# Initialization

The operator recurrent architecture is implemented in `opnet` module, and 
generation of learning data in `simple_inversion_data`. 

File `PATH` is used to save the parameters of the network. 

In [None]:
import numpy as np
import torch
import os #help navigate files in folder
import matplotlib.pyplot as plt #plotting
import pandas as pd #data frames = nice table

import opnet
from simple_inversion_data import generate_data, save_data, load_data

PATH = './simple_inversion_netTEST.pth' #network that can be overwritten
#PATH = './simple_inversion_net4.pth'
# PATH = './simple_inversion_netPlusMinusReLU.pth'
# PATH = './simple_inversion_netNEGA.pth'
# PATH = './simple_inversion_netReLU.pth'


Specify the network model and the loss function.

In [None]:

dim = 2 # use 2 x 2 matrices
num_layers = 5
# luodaan uusi neuroverkko
model = opnet.OperatorNet(dim, 2*num_layers, useReLU=False)
#model = opnet.OperatorNet(dim, num_layers, useReLU=True) 
loss_fn = torch.nn.MSELoss()

# Generation of training data

Training data consists of pairs $(X,y)$ where $X$ is an invertible $2 \times 2$ matrix and $y = X^{-1} v$
where $v = (1,1) \in \mathbb{R}^2$.

In [None]:
#save_data(*generate_data(60000), "simple_inversion_train_dataPOSNEG.npz")
#save_data(*generate_data(10000), "simple_inversion_test_dataPOSNEG.npz")

train_data_path = "simple_inversion_train_dataPOSNEG.npz"
test_data_path = "simple_inversion_test_dataPOSNEG.npz"

# Training

In [None]:
import training_and_testing
import os.path

# update changes
from importlib import reload 
reload(training_and_testing)
reload(opnet)

# Liva:
#lr_list = [1e-4, 1e-3, 1e-2, 1e-1]
lr_list = [1e-1]
losses_list=[]
# same as below
# upload the neural network used earlier:
# if os.path.exists(PATH):
#     model.load_state_dict(torch.load(PATH))
# else:
#     print("No old path, creating new one")
#     torch.save(model.state_dict(), PATH)
#     model.load_state_dict(torch.load(PATH))

#calculates the average loss to the learning rate(s) above and a new row of avr loss into table
for lr in lr_list:
    losses_list.append(training_and_testing.training_and_testing(model, loss_fn,lr, PATH, train_data_path, test_data_path))
#Liva


# lr=1e-1
# # upload the neural network used earlier:
# if os.path.exists(PATH):
#     #print("vittu jeejee")
#     model.load_state_dict(torch.load(PATH)) 
# else:
#     print("no old PATH, I'll make a new one")

# training_and_testing.training_and_testing(model, loss_fn, lr)

# #chech if network uses relu
# doesit = model.does_it_use_relu()
# #layer = model.layers[0].A
# #print("testi layer on ", layer)

# Load the training data
# train_loader = torch.utils.data.DataLoader(
#     load_data("simple_inversion_train_data.npz"), 
#     batch_size=64)

In [None]:
#Plotting
#Liva
# x-coordinates: HOW MANY YOU HAVE?!
# now from 2 to 20 and every other
epo = np.arange(0,40,2)+2
print(epo)

# plot the figure
from matplotlib.lines import lineStyles
plt.plot(epo, losses_list[0], label = 'lr=1e-1', linestyle='--')
# plt.plot(epo, losses_list[1], label = 'lr=1e-3', linestyle='--')
# plt.plot(epo, losses_list[2], label = 'lr=1e-2', linestyle='--')
# plt.plot(epo, losses_list[3], label = 'lr=1e-1', linestyle='--')
plt.xlabel("Epochs")
plt.ylabel("Average Loss")
plt.grid()
plt.legend()

Choose the optimization method.

In [None]:
# Learning rate parameter is from the quickstart guide 
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

Loop over the training data multiple times (epochs) and 
save the optimized parameters. 

In [None]:
# for epoch in range(2): 
#     print(f"Epoch {epoch+1}\n-------------------------------")
#     for batch, (X, y) in enumerate(train_loader):
#         # Compute prediction error
#         pred = model(X)
#         loss = loss_fn(pred, y)
#         # Backpropagation
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
#         # Print statistics
#         if batch % 100 == 0:
#             n, N = (batch + 1) * len(X), len(train_loader.dataset)
#             print(f"loss: {loss.item():>7f}  [{n:>5d}/{N:>5d}]")

torch.save(model.state_dict(), PATH)

# Testing

If we have already trained the network, we can just load its parameters. (Note that we still need to run the initialization.)

In [None]:
## Load trained variables
# model.load_state_dict(torch.load(PATH))

In [None]:
# Load the testing data
# test_loader = torch.utils.data.DataLoader(
#     load_data("simple_inversion_test_data.npz"), 
#     batch_size=64)

Compute a couple of samples.

In [None]:
# dataiter = iter(test_loader)
# X, y = dataiter.next()
# with torch.no_grad():
#     pred = model(X)
# print("True: ")
# print(y[:2])
# print("Prediction: ")
# print(pred[:2])

In [None]:
# num_batches = len(test_loader)
# test_loss = 0
# with torch.no_grad():
#     for X, y in test_loader:
#         pred = model(X)
#         test_loss += loss_fn(pred, y).item()
# test_loss /= num_batches
# print(f"Avg loss: {test_loss:>8f} \n")