In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn

In [None]:
X = torch.tensor([1.00, 2.5])
X

In [None]:
Y = torch.tensor([[1.7, 2.3, 3.1], [4.8, 5.3, 6.5]])
Y

In [None]:
Z = torch.tensor([[[1.7, 2.3, 3.1], [4.8, 5.3, 6.5]],[[8.6, 2.0, 9.2], [1.4, 7.2, 3.5]]])
Z

In [None]:
Y[0,2]

In [None]:
Y**2

In [None]:
X@Y

In [None]:
Y*Y

In [None]:
torch.inverse(Y.t() @ Y)

In [None]:
torch.cuda.is_available()

In [None]:
Y.to('cpu')

Gradient calculation

In [None]:
X = torch.randint(10, (1,2), dtype=torch.float)
display(X)
print("X.requires_grad :", X.requires_grad)

Lets define a function $f(x,y) = $ sin $(x.y)$

In [None]:
X = torch.Tensor([1, 2, 3]).requires_grad_(True)
Y = torch.Tensor([5, 6, 7]).requires_grad_(True)

f = torch.sin(torch.dot(X,Y))
print("f =", f)

In [None]:
print("X.grad :", X.grad)
print("Y.grad :", Y.grad)

In [None]:
f.backward()
print("X.grad :", X.grad)
print("Y.grad :", Y.grad)

We can also do the calculation manually

In [None]:
dfdx = torch.cos(torch.dot(X,Y)) * Y
dfdx

In [None]:
dfdy = torch.cos(torch.dot(X,Y)) * X
dfdy

In [None]:
A = torch.Tensor([[1, 2], [3, 4]])
A.requires_grad_()

B = 5 * (A + A)
C = B.mean()

In [None]:
print(A.grad_fn)
print(B.grad_fn)
print(C.grad_fn)

In [None]:
C.grad_fn.next_functions

Using optimisers

In [None]:
def square_function(x):
    return x ** 2 

In [None]:
x0 = 8
lr = 0.5

x = torch.Tensor([x0]).requires_grad_()
optimizer = torch.optim.SGD([x], lr=lr)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.8)

In [None]:
for i in range(4):
    optimizer.zero_grad()
    y = square_function(x)
    y.backward()
    optimizer.step()
    scheduler.step()
    print(y.data, " | lr : ", optimizer.param_groups[0]['lr'])

In [None]:
def more_difficult_function(x):
    return x ** 2 / 20 + x.sin().tanh()

In [None]:
x0 = 8
lr = 0.5

x = torch.Tensor([x0]).requires_grad_()
optimizer = torch.optim.SGD([x], lr=lr)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.8)

In [None]:
for i in range(15):
    optimizer.zero_grad()
    y = more_difficult_function(x)
    y.backward()
    optimizer.step()
    scheduler.step()
    print(y.data, " | lr : ", optimizer.param_groups[0]['lr'])

In [None]:
fig = None
ax = None

def init_2dplot(x_range, func):
    global fig, ax
    fig = plt.figure(figsize=(8, 6), dpi=120)
    ax = fig.gca()
    fig.canvas.draw()
    
    y = func(x_range)
    ax.plot(x_range.numpy(), y.numpy(), 'b', alpha=0.2)
    
def add_point_2d(x, y, i, colour):
    ax.scatter(x.data.numpy(), y.data.numpy(), c=colour, edgecolors='k', linewidth=.5, alpha=0.4)
    ax.text(x.data.numpy()[0] + 0.1, y.data.numpy()[0] + 0.1, str(i))
    fig.canvas.draw()

In [None]:
x0 = 8
lr = 1
iterations = 20

x_range = torch.arange(-10, 10, 0.1)
init_2dplot(x_range, more_difficult_function)

x = torch.Tensor([x0]).requires_grad_()
optimizer = torch.optim.Adam([x], lr=lr)
# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 1)

for i in range(iterations):
    optimizer.zero_grad()
    f = more_difficult_function(x)
    f.backward()
    add_point_2d(x, f, i, 'r')
    optimizer.step()

In [None]:
class MyFirstNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MyFirstNN, self).__init__()  
        self.linear = nn.Linear(input_size, num_classes) 
    
    def forward(self, x):
        out = self.linear(x)
        return out

In [None]:
model = MyFirstNN(input_size=20, num_classes=5)
print(model)

In [None]:
x = torch.rand(10000, 20)
out = model(x)
out[:3,:]

In [None]:
for name, p in model.named_parameters():
    print(name, ":\n", p)  

In [None]:
def train_model(iter, model, x, y, lr):
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr = lr)
    loss_vector = []

    for epoch in range(iter):
        y1 = model(x)
        loss = criterion(y1, y)
        loss_vector.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return loss_vector

In [None]:
y = torch.rand(10000, 5)
y[:,0] = np.round(x[:,3] + x[:,5] + x[:,7])
y[:,1] = np.round(x[:,13] + x[:,15] + x[:,17] + x[:,19])
y[:,2] = np.round(x[:,2] + x[:,4])
y[:,3] = np.round(x[:,0] + x[:,1] - x[:,6])
y[:,4] = np.round(x[:,8] + x[:,9] + x[:,10] + x[:,11] - x[:,12] - x[:,14])

In [None]:
losses = train_model(200, model, x, y, 0.2)

In [None]:
plt.plot(losses)

In [None]:
out = model(x)
display(y[:3,:])
display(out[:3,:])

In [None]:
class MyMultilayerPerceptron(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MyMultilayerPerceptron, self).__init__()
        
        self.input_size = input_size
        self.num_classes = num_classes
        
        self.linear_1 = nn.Linear(input_size, 75)
        self.linear_2 = nn.Linear(75, 50)
        self.linear_3 = nn.Linear(50, num_classes)
    
    def forward(self, x):
        x = torch.nn.functional.relu(self.linear_1(x))
        x = torch.nn.functional.relu(self.linear_2(x))
        x = self.linear_3(x)
        return x

In [None]:
model_2 = MyMultilayerPerceptron(20, 5)

In [None]:
losses = train_model(2000, model_2, x, y, 0.3)
plt.plot(losses)

In [None]:
out_2 = model_2(x)
display(y[:3,:])
display(out[:3,:])
display(out_2[:3,:])

In [None]:
print(np.mean(np.abs(y.numpy()-out.detach().numpy().tolist()), axis=0))
print(np.mean(np.abs(y.numpy()-out_2.detach().numpy().tolist()), axis=0))

In [None]:
class NewNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NewNN, self).__init__()
        
        self.input_size = input_size
        self.num_classes = num_classes
        
        self.linear_1 = nn.Linear(input_size, 10)
        self.linear_2 = nn.Linear(5, 5)
        self.linear_3 = nn.Linear(5, 5)
        self.linear_4 = nn.Linear(10, num_classes)
    
    def forward(self, x):
        x0 = torch.nn.functional.relu(self.linear_1(x))
        x1 = torch.nn.functional.relu(self.linear_2(x0[:,5:]))
        x2 = self.linear_3(x0[:,:5])
        x3 = self.linear_4(torch.cat((x2, x1), dim=1))
        return x3

In [None]:
model_3 = NewNN(20, 5)

In [None]:
losses = train_model(200, model_3, x, y, 0.2)
plt.plot(losses)

In [None]:
out_3 = model_3(x)

In [None]:
print(np.mean(np.abs(y.numpy()-out.detach().numpy().tolist()), axis=0))
print(np.mean(np.abs(y.numpy()-out_2.detach().numpy().tolist()), axis=0))
print(np.mean(np.abs(y.numpy()-out_3.detach().numpy().tolist()), axis=0))

Other layers

In [None]:
#Strength: Reduces overfitting by randomly setting a fraction of input units to zero during training.
class DropoutExample(nn.Module):
    def __init__(self):
        super(DropoutExample, self).__init__()
        self.fc = nn.Linear(10, 5)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = self.fc(x)
        return self.dropout(x)

model = DropoutExample()
input_data = torch.randn(1, 10)
output = model(input_data)
print(output)  # Output with dropout applied

In [None]:
# Strength: Normalizes the output of a previous activation layer, improving convergence and stability.
class BatchNormExample(nn.Module):
    def __init__(self):
        super(BatchNormExample, self).__init__()
        self.fc = nn.Linear(10, 5)
        self.batch_norm = nn.BatchNorm1d(5)
        self.fc2 = nn.Linear(5, 1)

    def forward(self, x):
        x = self.fc(x)
        x = self.batch_norm(x)
        x = torch.nn.functional.relu(x)
        return self.fc2(x)

model = BatchNormExample()
input_data = torch.randn(2, 10)
output = model(input_data)
print(output)  # Output after batch normalization

Time series

In [None]:
#Strength: Good for processing sequential data, such as time series
class Conv1DExample(nn.Module):
    def __init__(self):
        super(Conv1DExample, self).__init__()
        self.conv1d = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3)

    def forward(self, x):
        return self.conv1d(x)

model = Conv1DExample()
input_data = torch.randn(1, 1, 10)  # Batch size of 1, 1 channel, sequence length of 10
output = model(input_data)
print(output.shape)  # Output shape: (1, 16, 8) (due to kernel size)

In [None]:
#Strength: Effective for sequence data, capturing long-term dependencies.
class LSTMExample(nn.Module):
    def __init__(self):
        super(LSTMExample, self).__init__()
        self.lstm = nn.LSTM(input_size=10, hidden_size=5)

    def forward(self, x):
        x, _ = self.lstm(x)  # x shape: (sequence_length, batch_size, input_size)
        return x

model = LSTMExample()
input_data = torch.randn(5, 1, 10)  # Sequence length of 5, batch size of 1, input size of 10
output = model(input_data)
print(output.shape)  # Output shape: (5, 1, 5)

Image/video data

In [None]:
#Strength: Reduces dimensionality, helps with translational invariance.
class PoolingExample(nn.Module):
    def __init__(self):
        super(PoolingExample, self).__init__()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        return self.pool(x)

model = PoolingExample()
input_data = torch.randn(1, 16, 28, 28)  # 16 channels, 28x28 feature map
output = model(input_data)
print(output.shape)  # Output shape: (1, 16, 14, 14)

In [None]:
#Strength: Effective for image data, capturing spatial hierarchies.
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)  # 1 input channel, 16 output channels

    def forward(self, x):
        return self.conv1(x)

model = SimpleCNN()
input_data = torch.randn(1, 1, 28, 28)  # Batch of 1, 1 channel, 28x28 image
output = model(input_data)
print(output.shape)  # Output shape: (1, 16, 28, 28)

Text data

In [None]:
#Strength: Captures relationships between all elements in a sequence, enhancing performance in tasks like translation.
class AttentionExample(nn.Module):
    def __init__(self):
        super(AttentionExample, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=10, num_heads=2)

    def forward(self, x):
        x, _ = self.attention(x, x, x)  # x shape: (sequence_length, batch_size, embed_dim)
        return x

model = AttentionExample()
input_data = torch.randn(5, 1, 10)  # Sequence length of 5, batch size of 1, embedding size of 10
output = model(input_data)
print(output.shape)  # Output shape: (5, 1, 10)

In [None]:
# Strength: Efficiently converts categorical variables into dense vectors, especially useful in NLP tasks.
class EmbeddingExample(nn.Module):
    def __init__(self):
        super(EmbeddingExample, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=10, embedding_dim=3)  # 10 categories, 3-dimensional vectors

    def forward(self, x):
        return self.embedding(x)

model = EmbeddingExample()
input_data = torch.tensor([1, 2, 3])  # Indices of categories
output = model(input_data)
print(output)  # Output: Dense vector representation