In [1]:
import torch as t
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

In [33]:
class Net(nn.Module):
    def __init__(self, N):
        super(Net, self).__init__()
        self.N = N
        self.fc1 = nn.Linear(N*N, 64)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 64)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, 64)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(64, 1)
        
        
    def forward(self, x: "(B, N, N)"):
        B = x.shape[0]
        x = x.reshape(B, self.N**2)
        assert x.shape == (B, self.N**2)
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
        assert x.shape == (B, 1)
        return x[:,0]

In [40]:
def weights_init(model):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            # initialize the weight tensor, here we use a normal distribution
            m.weight.data.normal_(0, 1)

In [41]:
N = 4 # square matrices only!
model = Net(N)
#loss_func = nn.MSELoss(reduction='mean')
loss_func = nn.MSELoss()

In [42]:
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

In [43]:
# Specify training data
#weights_init(model) 
training_size = 100000

max_val = 2.5
#X = np.random.randint(-5, 5, size=(training_size, N, N))
X = (np.random.rand(training_size, N, N)-0.5) * 2* 2.5
min_X = np.min(X)
max_X = np.max(X)
print(min_X, max_X)

# Get all data between -1 and 1
X = X/abs(max_X-min_X)
Y = np.linalg.det(X)

-2.4999957501328582 2.499998217033419


In [None]:
B = 32
#assert training_size%B == 0

losses = []
epochs = 100
for i in range(epochs):
    for j in range(training_size//B):
        x_var = X[j*B:(j+1)*B]
        y_var = Y[j*B:(j+1)*B]
        assert x_var.shape == (B,N,N)
        assert y_var.shape == (B,)
        x_var = t.Tensor(x_var)
        y_var = t.Tensor(y_var)
        
        optimizer.zero_grad()
        y_hat = model(x_var)
        loss = loss_func.forward(y_hat, y_var)
        loss.backward()
        optimizer.step()
        if j % 100 == 0:
            losses.append(float(loss))

        
    if i % 1 == 0:
        print("Epoch: {0}, Loss: {1}, ".format(i, loss.data.numpy()))

Epoch: 0, Loss: 0.0003826708416454494, 
Epoch: 1, Loss: 0.00038578116800636053, 
Epoch: 2, Loss: 0.0003881795273628086, 
Epoch: 3, Loss: 0.00039194972487166524, 
Epoch: 4, Loss: 0.000394556118408218, 
Epoch: 5, Loss: 0.00039573258254677057, 
Epoch: 6, Loss: 0.0003942964831367135, 
Epoch: 7, Loss: 0.000389164692023769, 
Epoch: 8, Loss: 0.00038555945502594113, 
Epoch: 9, Loss: 0.0003738387313205749, 
Epoch: 10, Loss: 0.00037618575152009726, 
Epoch: 11, Loss: 0.0003753187775146216, 
Epoch: 12, Loss: 0.00037962832720950246, 
Epoch: 13, Loss: 0.0003756730002351105, 
Epoch: 14, Loss: 0.0003676509659271687, 
Epoch: 15, Loss: 0.0003594807058107108, 
Epoch: 16, Loss: 0.00035420863423496485, 
Epoch: 17, Loss: 0.0003470347437541932, 
Epoch: 18, Loss: 0.0003386999887879938, 
Epoch: 19, Loss: 0.0003320043033454567, 
Epoch: 20, Loss: 0.0003268476575613022, 
Epoch: 21, Loss: 0.00032594811636954546, 
Epoch: 22, Loss: 0.0003183815279044211, 
Epoch: 23, Loss: 0.00031993549782782793, 
Epoch: 24, Loss: 0.

In [None]:
plt.plot(losses)

In [None]:
mean_losses = [sum(losses[i:i+100])/100 for i in range(len(losses)-100)]
zero_line = [0]*(len(losses)-100)
plt.plot(mean_losses)
plt.plot(zero_line)
print(min(mean_losses))

In [None]:
import time
# Generate test set
testing_size = 10000

max_val = 2.5
X = (np.random.rand(testing_size, N, N)-0.5) * 2* 2.5
min_X = np.min(X)
max_X = np.max(X)
print(min_X, max_X)

# Get all data between -1 and 1
X = X/(max_X-min_X)
npTbeg = time.perf_counter()
Y = np.linalg.det(X)
npTend = time.perf_counter()

error = [0]*testing_size # prediction error

y_var = t.Tensor(Y)
x_var = t.Tensor(X)

mlTbeg = time.perf_counter()
y_hat = model(x_var)
mlTend = time.perf_counter()
error = (y_hat - y_var)**2
    
tot_error = sum(error)
print("Total MSE over testing set : ", np.sqrt(tot_error.data.numpy())/testing_size)

print("Time for numpy to solve : ", npTend-npTbeg)
print("Time for NN to solve : ", mlTend-mlTbeg)