# Defined the model

In [1]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(MLP, self).__init__()
        layers = [nn.Linear(input_size, hidden_size), nn.ReLU()]
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_size, output_size))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [2]:
model = MLP(input_size=10, hidden_size=256, output_size=97, num_layers=8)
sample_input = torch.randn(1, 10)  
output = model(sample_input)
print(output)


tensor([[ 0.0050, -0.0040,  0.0423,  0.0034, -0.0156,  0.0745, -0.0224,  0.0519,
          0.0624,  0.0015, -0.0329,  0.0334,  0.0453, -0.0052,  0.0237, -0.0336,
          0.0279,  0.0869,  0.0472,  0.0602, -0.0537, -0.0458, -0.0439,  0.0099,
         -0.0476, -0.0374, -0.0012, -0.0093,  0.0200,  0.0359, -0.0043, -0.0044,
          0.0654,  0.0250, -0.0319, -0.0491, -0.0202, -0.0198,  0.0104, -0.0177,
         -0.0314, -0.0605, -0.0163, -0.0198,  0.0086, -0.0102,  0.0054, -0.0130,
          0.0522,  0.0267, -0.0290,  0.0001, -0.0479, -0.0420, -0.0515,  0.0466,
          0.0401,  0.0008, -0.0765, -0.0303, -0.0483,  0.0001, -0.0512, -0.0166,
         -0.0476, -0.0589, -0.0391,  0.0041,  0.0170, -0.0191, -0.0482,  0.0103,
         -0.0692, -0.0455, -0.0141,  0.0134, -0.0367,  0.0209, -0.0073, -0.0293,
          0.0084,  0.0078,  0.0209,  0.0183,  0.0430,  0.0799,  0.0680, -0.0251,
         -0.0284,  0.0609,  0.0417,  0.0313, -0.0603, -0.0116,  0.0107,  0.0194,
          0.0462]], grad_fn=

In [3]:
print(output.shape)


torch.Size([1, 97])


In [4]:
import torch.nn.functional as F
print(F.softmax(output, dim=1))


tensor([[0.0104, 0.0103, 0.0108, 0.0104, 0.0102, 0.0111, 0.0101, 0.0109, 0.0110,
         0.0103, 0.0100, 0.0107, 0.0108, 0.0103, 0.0106, 0.0100, 0.0106, 0.0113,
         0.0108, 0.0110, 0.0098, 0.0099, 0.0099, 0.0104, 0.0098, 0.0099, 0.0103,
         0.0102, 0.0105, 0.0107, 0.0103, 0.0103, 0.0110, 0.0106, 0.0100, 0.0098,
         0.0101, 0.0101, 0.0104, 0.0101, 0.0100, 0.0097, 0.0102, 0.0101, 0.0104,
         0.0102, 0.0104, 0.0102, 0.0109, 0.0106, 0.0100, 0.0103, 0.0098, 0.0099,
         0.0098, 0.0108, 0.0107, 0.0103, 0.0096, 0.0100, 0.0098, 0.0103, 0.0098,
         0.0101, 0.0098, 0.0097, 0.0099, 0.0104, 0.0105, 0.0101, 0.0098, 0.0104,
         0.0096, 0.0099, 0.0102, 0.0105, 0.0099, 0.0105, 0.0102, 0.0100, 0.0104,
         0.0104, 0.0105, 0.0105, 0.0108, 0.0112, 0.0110, 0.0101, 0.0100, 0.0110,
         0.0108, 0.0106, 0.0097, 0.0102, 0.0104, 0.0105, 0.0108]],
       grad_fn=<SoftmaxBackward0>)


In [5]:
sample_input = torch.randn(1, 10)
target = torch.tensor([1])  # Assume class 1 as target

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

optimizer.zero_grad()
output = model(sample_input)
loss = criterion(output, target)  # No need for unsqueeze()
loss.backward()
optimizer.step()

print("Loss after one step:", loss.item())


Loss after one step: 4.577897548675537


In [6]:
!jupyter nbconvert --to script models.ipynb
!jupyter nbconvert --to script train.ipynb
!jupyter nbconvert --to script utils.ipynb


[NbConvertApp] Converting notebook models.ipynb to script
[NbConvertApp] Writing 1511 bytes to models.py
[NbConvertApp] Converting notebook train.ipynb to script
[NbConvertApp] Writing 6276 bytes to train.py
[NbConvertApp] Converting notebook utils.ipynb to script
[NbConvertApp] Writing 826 bytes to utils.py
