# Empty NN

In [1]:
import torch

class NeuralNetwork(torch.nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(input_size, 30),
            torch.nn.ReLU(),
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),
            torch.nn.Linear(20, output_size)
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

torch.manual_seed(123)
model = NeuralNetwork(50, 3)
print(model.layers[0].weight)

torch.manual_seed(123)
X = torch.rand((1, 50))
with torch.no_grad():
    out = torch.softmax(model(X), dim=1)
print(out)

Parameter containing:
tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],
        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],
        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],
        ...,
        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],
        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],
        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],
       requires_grad=True)
tensor([[0.3113, 0.3934, 0.2952]])


# Dataset Basics

In [2]:
from torch.utils.data import Dataset

class ToyDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [3]:
train_X = torch.tensor([[-1.2,3.1], [-0.5, 2.3], [-0.2, 1.9], [1.3, -3.2], [1.5, -3.5]])
train_y = torch.tensor([0, 0, 0, 1, 1])

test_X = torch.tensor([[-0.5, 2.0], [0.3, -1.5]])
test_y = torch.tensor([0, 1])

train_dataset = ToyDataset(train_X, train_y)
test_dataset = ToyDataset(test_X, test_y)

In [4]:
from torch.utils.data import DataLoader

torch.manual_seed(123)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=0, drop_last=True)

for batch_idx, (batch_X, batch_y) in enumerate(train_loader):
    print(f"Batch {batch_idx+1}: {batch_X} {batch_y}")

Batch 1: tensor([[ 1.3000, -3.2000],
        [-0.5000,  2.3000]]) tensor([1, 0])
Batch 2: tensor([[-1.2000,  3.1000],
        [-0.2000,  1.9000]]) tensor([0, 0])


# Training

In [7]:
import torch.nn.functional as F

torch.manual_seed(123)

model = NeuralNetwork(input_size=2, output_size=2)
model = model.to("cuda")
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)

num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    for batch_idx, (features, labels) in enumerate(train_loader):
        features = features.to("cuda")
        labels = labels.to("cuda")
        logits = model(features)

        loss = F.cross_entropy(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Epoch: {epoch+1:03d}/{num_epochs:03d} | Batch {batch_idx+1:03d}/{len(train_loader):03d} | Train Loss: {loss:.2f}")

    model.eval()


Epoch: 001/005 | Batch 001/002 | Train Loss: 0.77
Epoch: 001/005 | Batch 002/002 | Train Loss: 0.53
Epoch: 002/005 | Batch 001/002 | Train Loss: 0.24
Epoch: 002/005 | Batch 002/002 | Train Loss: 0.15
Epoch: 003/005 | Batch 001/002 | Train Loss: 0.00
Epoch: 003/005 | Batch 002/002 | Train Loss: 0.02
Epoch: 004/005 | Batch 001/002 | Train Loss: 0.01
Epoch: 004/005 | Batch 002/002 | Train Loss: 0.00
Epoch: 005/005 | Batch 001/002 | Train Loss: 0.00
Epoch: 005/005 | Batch 002/002 | Train Loss: 0.01


# Prediction

In [9]:
model.eval()
with torch.no_grad():
    outputs = model(train_X.to("cuda"))

print(outputs)

torch.set_printoptions(sci_mode=False)
probas = torch.softmax(outputs, dim=1)
predictions = torch.argmax(outputs, dim=1)
print(predictions)

tensor([[ 3.1143, -4.4012],
        [ 2.2527, -3.2476],
        [ 1.8369, -2.6932],
        [-2.6072,  2.6559],
        [-2.8267,  2.8766]], device='cuda:0')
tensor([0, 0, 0, 1, 1], device='cuda:0')


# Accuracy check

In [11]:
def compute_accuracy(model: NeuralNetwork, data_loader: DataLoader):
    model = model.eval()
    correct = 0.0
    total_examples = 0.0

    for idx, (features, labels) in enumerate(data_loader):
        features = features.to("cuda")
        labels = labels.to("cuda")
        with torch.no_grad():
            logits = model(features)
        
        predictions = torch.argmax(logits, dim=1)
        compare = predictions == labels
        correct += torch.sum(compare)
        total_examples += len(compare)

    return (correct / total_examples).item()

print(compute_accuracy(model, train_loader))

1.0


# Save and load model

In [12]:
torch.save(model.state_dict(), "toy_model.pth")

model = NeuralNetwork(input_size=2, output_size=2)
model.load_state_dict(torch.load("toy_model.pth"))

print(model.state_dict())

OrderedDict([('layers.0.weight', tensor([[-0.3019,  0.0973],
        [-0.3558,  0.2890],
        [-0.6030,  0.5213],
        [-0.5200, -0.5465],
        [-0.4574,  0.3799],
        [-0.2740,  0.3343],
        [-0.5567, -0.5347],
        [-0.2396, -0.1879],
        [-0.5362,  0.4429],
        [-0.1750,  0.2742],
        [ 0.4953,  0.1550],
        [ 0.1864,  0.7209],
        [-0.3247,  0.2536],
        [-0.3240,  0.5583],
        [ 0.5893, -0.7109],
        [ 0.5970,  0.3706],
        [ 0.3037,  0.3125],
        [ 0.5897, -0.0972],
        [-0.5998, -0.1983],
        [-0.5086,  0.1027],
        [-0.0954, -0.4681],
        [-0.0715,  0.7096],
        [-0.0329, -0.0578],
        [-0.1236,  0.1872],
        [ 0.6216,  0.4837],
        [ 0.2510,  0.1537],
        [ 0.1525,  0.3560],
        [-0.0808, -0.6816],
        [-0.4417,  0.6238],
        [ 0.0595, -0.6132]])), ('layers.0.bias', tensor([ 0.1459,  0.5943, -0.6668, -0.4813, -0.2536,  0.0609, -0.1324, -0.0626,
        -0.6990,  0.3534, 

# Matrix Mult Diff CPU vs GPU

In [13]:
# import torch

# # Generate random matrices
# N = 50  # Change this size to test different dimensions
# A = torch.randn(N, N)
# B = torch.randn(N, N)

# # CPU timing
# print("CPU timing:")
# %timeit A @ B

In [14]:
# import torch

# # Generate random matrices
# N = 50  # Change this size to test different dimensions
# A = torch.randn(N, N).to("cuda")
# B = torch.randn(N, N).to("cuda")

# # CPU timing
# print("GPU timing:")
# %timeit A @ B