In [None]:
import torch

In [None]:
# Generating data of 3 dim input and 2 dim output
n, d = 500, 3
x = torch.FloatTensor(n, d).uniform_(-1, 1)
weights_true = torch.tensor([[5,1,5],[1,2,1]]).float()
weights_true = torch.transpose(weights_true,0,1)
bias_true = torch.tensor([1,2])
y_true = torch.mm(x**2,weights_true) + torch.mm(x,weights_true) + bias_true
print(f'x: {x.shape}, weights: {weights_true.shape}, bias: {bias_true.shape}, y: {y_true.shape}')

# From scratch

In [None]:
class MSE:
    def __call__(self, y_pred, y_true):
        self.y_pred = y_pred
        self.y_true = y_true
        return torch.mean((y_pred - y_true) ** 2)

    def backward(self): 
        n = self.y_true.shape[0]
        self.gradient = 2. * (self.y_pred - self.y_true) / n
        return self.gradient

In [None]:
class Linear:
    def __init__(self, input_dim: int, num_hidden: int = 1):
        self.weights = torch.rand(input_dim, num_hidden)
        self.bias = torch.zeros(num_hidden,)
    
    def __call__(self, x):
        self.x = x
        return torch.mm(x, self.weights) + self.bias

    def backward(self, gradient):
        self.weights_gradient = torch.mm(torch.transpose(self.x,0,1), gradient)
        self.bias_gradient = gradient.sum(0)
        self.x_gradient = torch.mm(gradient, torch.transpose(self.weights,0,1))
        return self.x_gradient

    def update(self, lr):
        self.weights = self.weights - lr * self.weights_gradient
        self.bias = self.bias - lr * self.bias_gradient



In [None]:
class Relu:
    def __call__(self, input_):
      self.input_ = input_
      self.output = torch.clamp(self.input_, min=0, out=None)
      return self.output
    
    def backward(self, output_gradient):
      self.input_gradient = (self.input_ > 0) * output_gradient 
      return self.input_gradient

In [None]:
from typing import Callable
class Model:
    def __init__(self, input_dim, num_hidden):
        self.linear1 = Linear(input_dim, num_hidden)
        self.relu1 = Relu()
        self.linear2 = Linear(num_hidden, 12) # Pick 12 as number of neurons in hidden layer
        self.relu2 = Relu()
        self.linear3 = Linear(12,2)
    
    # Forward pass
    def __call__(self, x):
        l1 = self.linear1(x)
        r1 = self.relu1(l1)
        l2 = self.linear2(r1)
        r2 = self.relu2(l2)
        l3 = self.linear3(r2)
        return l3
    
    def backward(self, output_gradient):
        linear3_gradient = self.linear3.backward(output_gradient)
        relu2_gradient = self.relu2.backward(linear3_gradient)
        linear2_gradient = self.linear2.backward(relu2_gradient)
        relu1_gradient = self.relu1.backward(linear2_gradient)
        linear1_gradient = self.linear1.backward(relu1_gradient)
        return linear1_gradient

    def update(self, lr):
        self.linear3.update(lr)
        self.linear2.update(lr)
        self.linear1.update(lr)

In [None]:
def fit(x, y, model: Callable, loss: Callable, lr: float, num_epochs: int):
    for epoch in range(num_epochs):
        y_pred = model(x)
        loss_value = loss(y_pred, y)
        if epoch % 50 == 0:
            print(f'Epoch {epoch}, loss {loss_value}')
        gradient_from_loss = loss.backward()
        model.backward(gradient_from_loss)
        model.update(lr)

loss = MSE()
model = Model(d, 20)
fit(x, y_true, model=model, loss=loss, lr=0.0035, num_epochs=1000)

In [None]:
import plotly.graph_objects as go
def plot_intereactive_3d(x, y, y_pred=None):
    fig = go.Figure()
    fig.add_trace(go.Scatter3d(x = x[:,0],
                        y = x[:,1],
                        z = y.reshape([-1]),
                        opacity=0.5, mode='markers', name='Underlying Function'
                        ))
    if y_pred is not None:
        fig.add_trace(go.Scatter3d(x = x[:,0],
                    y = x[:,1],
                    z = y_pred.reshape([-1]),
                    opacity=0.5, mode='markers', name='Predicted Function'
                    ))
    fig.update_layout(scene = dict(
                        xaxis_title='X1',
                        yaxis_title='X2',
                        zaxis_title='Y'),
                        width=700,
                        margin=dict(r=20, b=10, l=10, t=10))
    fig.show()

In [None]:
from sklearn.manifold import TSNE
X_reduced = TSNE(n_components=2).fit_transform(x)
y_true_reduced = TSNE(n_components=1).fit_transform(y_true)
y_pred_reduced = TSNE(n_components=1).fit_transform(model(x))
print(f'X_reduced: {X_reduced.shape}, y_true_reduced: {y_true_reduced.shape}, y_pred_reduced: {y_pred_reduced.shape}')
plot_intereactive_3d(X_reduced,y_true_reduced,y_pred_reduced)

# Using built in functionality

In [None]:
import torch.nn as nn
class Linear(nn.Module):
    def __init__(self, input_dim, num_hidden):
        super(Linear, self).__init__()
        self.init = torch.rand(input_dim, num_hidden).float()
        self.weights = torch.nn.Parameter(self.init, requires_grad=True)
        self.bias = torch.zeros(num_hidden,)
    
    def __call__(self, x):
        self.x = x
        return torch.mm(x, self.weights) + self.bias

In [None]:
class TorchModel(nn.Module):
    def __init__(self, input_dim, num_hidden):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, num_hidden)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(num_hidden, 12)
        self.relu2 = nn.ReLU()
        self.linear3 = nn.Linear(12, 2)
    
    def forward(self, x):
        l1 = self.linear1(x)
        r1 = self.relu1(l1)
        l2 = self.linear2(r1)
        r2 = self.relu2(l2)
        l3 = self.linear3(r2)
        return l3

In [None]:
def torch_fit(x, y, model: Callable, loss: Callable, lr: float, num_epochs: int):
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    for epoch in range(num_epochs):
        optimizer.zero_grad() # Initialize gradient as zero
        y_pred = model(x) # Forward Pass
        loss_value = loss(y_pred, y) # Compute loss with MSE
        if epoch % 50 == 0:
            print(f'Epoch {epoch}, loss {loss_value}')
        loss_value.backward() # Use autogradient to compute backward pass
        optimizer.step() # Update weights 

loss = nn.MSELoss()
model = TorchModel(d, 22)
torch_fit(x, y_true, model=model, loss=loss, lr=0.035, num_epochs=1000)

In [None]:
X_reduced = TSNE(n_components=2).fit_transform(x)
y_true_reduced = TSNE(n_components=1).fit_transform(y_true)
y_pred_reduced = TSNE(n_components=1).fit_transform(model(x).detach())
print(f'X_reduced: {X_reduced.shape}, y_true_reduced: {y_true_reduced.shape}, y_pred_reduced: {y_pred_reduced.shape}')
plot_intereactive_3d(X_reduced,y_true_reduced,y_pred_reduced)