## Matrix Approximation using Neural Networks
*July 10th, 2024*

In [1]:
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

# Learning Parameters
num_epochs = 10                 # Number of Epochs for training phase
batch_size = 100                # Amount of Training observations per batch
momentum = 0.9                  # Accelerate optimization during training

initial_learn_rate = 0.25       # Learning rate employed at the beginning of training
learn_rate_drop_factor = 0.5    # Multiplies learning rate each period
learn_rate_drop_period = 5      # Period of epochs needed to update the learning parameter

# Training / Test Datasets Dimensions
train_vectors_num = 3000
test_vectors_num = 100

# Network input / output dimensions
input_features = 11             # Vectors dimensions for Training and Test dataset
output_features = 7

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device") 

Using cuda device


### Neural Network Architecture

Where our $\Phi$ is a $(10 \times 10)$ matrix

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self, hidden_size, output_size, input_size):
        super().__init__()
        self.flatten = nn.Flatten()
        self.net = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, output_features)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.net(x)
        return F.pad(logits, (2, 2))
    
model = NeuralNetwork(hidden_size=100, output_size=output_features, input_size=input_features).to(device)
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (net): Sequential(
    (0): Linear(in_features=11, out_features=100, bias=True)
    (1): Tanh()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Tanh()
    (4): Linear(in_features=100, out_features=7, bias=True)
  )
)

In [4]:
def row_to_matrix(row):
    n = np.size(row)
    k = n // 2
    M = np.zeros((n, n))
    for i in range(n):
        M[i,max(0,i - k):min(n, k + 1 + i)] = row[max(0, n - (k + i + 1)):min(n + k - i, n)]
    return M

def torch_row_to_matrix(row):
    n = torch.Tensor.size(row, 0)
    k = n // 2
    M = torch.zeros((n, n))
    for i in range(n):
        M[i,max(0,i - k):min(n, k + 1 + i)] = row[max(0, n - (k + i + 1)):min(n + k - i, n)]
    return M.to(device)

w = row_to_matrix(np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
v = torch_row_to_matrix(torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=torch.float32).to(device))
v

tensor([[ 5.,  6.,  7.,  8.,  9., 10.,  0.,  0.,  0.,  0.,  0.],
        [ 4.,  5.,  6.,  7.,  8.,  9., 10.,  0.,  0.,  0.,  0.],
        [ 3.,  4.,  5.,  6.,  7.,  8.,  9., 10.,  0.,  0.,  0.],
        [ 2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.,  0.,  0.],
        [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.,  0.],
        [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 0.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
        [ 0.,  0.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.],
        [ 0.,  0.,  0.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 0.,  0.,  0.,  0.,  0.,  1.,  2.,  3.,  4.,  5.,  6.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  2.,  3.,  4.,  5.]],
       device='cuda:0')

In [5]:
# Training Dataset
Xs = np.array(list(map(lambda _: np.random.rand(input_features) * input_features, range(train_vectors_num))))
Xs = torch.tensor(Xs, dtype=torch.float32).to(device)
train_dataloader = DataLoader(Xs, batch_size=batch_size, shuffle=True)

# Testing Vectors
ys = np.array([np.random.rand(input_features) * np.random.rand(1) * input_features for _ in range(test_vectors_num)])
ys = torch.tensor(ys, dtype = torch.float32).to(device)

In [6]:
# Custom Loss Function
class Spectral_Loss(nn.Module):
    def __init__(self, ys):
        super(Spectral_Loss, self).__init__()
        self.ys = ys

    def forward(self, target, v):
        X = torch_row_to_matrix(target)
        A = torch_row_to_matrix(v)
        return sum([(torch.linalg.norm(torch.matmul(X, yk) - torch.matmul(A, yk)) ** 2) / (torch.linalg.norm(yk) ** 2) for yk in self.ys]) / test_vectors_num * train_vectors_num

    def _backward_hook(self, module, grad_input, grad_output):
        pass

loss_fn = Spectral_Loss(ys)
optimizer = torch.optim.Adagrad(model.parameters(), lr = initial_learn_rate)

In [7]:
# Training Loop
for epoch in range(1, num_epochs + 1):
    running_loss = 0.0
    for batch in train_dataloader:
        vs = model(batch)

        # Calculate loss
        loss = sum(loss_fn(batch[i], vs[i]) for i in range(batch_size)) 
        
        # Backward pass and weight update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Updates the loss per batch
        running_loss += loss.item()
    
    # if (epoch % 10 == 0):
    print(f"Epoch {epoch} Loss: {loss.item()}")

Epoch 1 Loss: 3.335092544555664
Epoch 2 Loss: 3.834090232849121
Epoch 3 Loss: 3.7282156944274902
Epoch 4 Loss: 3.411004066467285
Epoch 5 Loss: 3.9240288734436035
Epoch 6 Loss: 3.490097999572754
Epoch 7 Loss: 3.589089870452881
Epoch 8 Loss: 3.698190212249756
Epoch 9 Loss: 3.679845094680786
Epoch 10 Loss: 3.5490472316741943


In [8]:
Xtest = torch.tensor(np.array([0, 0, 0, 1, 2, 4, 2, 1, 0, 0, 0]), dtype=torch.float32)
torch_row_to_matrix(Xtest)

tensor([[4., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [2., 4., 2., 1., 0., 0., 0., 0., 0., 0., 0.],
        [1., 2., 4., 2., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 2., 4., 2., 1., 0., 0., 0., 0., 0.],
        [0., 0., 1., 2., 4., 2., 1., 0., 0., 0., 0.],
        [0., 0., 0., 1., 2., 4., 2., 1., 0., 0., 0.],
        [0., 0., 0., 0., 1., 2., 4., 2., 1., 0., 0.],
        [0., 0., 0., 0., 0., 1., 2., 4., 2., 1., 0.],
        [0., 0., 0., 0., 0., 0., 1., 2., 4., 2., 1.],
        [0., 0., 0., 0., 0., 0., 0., 1., 2., 4., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 4.]], device='cuda:0')

In [9]:
# Test Iteration
with torch.no_grad():
    a = model(Xtest.reshape(1, input_features).to(device))[0]
    loss = loss_fn(Xtest, a)
    print(loss.item())

0.3322944641113281


In [12]:
print(torch.linalg.matrix_norm(torch_row_to_matrix(Xtest) - torch_row_to_matrix(a)))

tensor(48.8121, device='cuda:0')
