# Defined the model

In [1]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(MLP, self).__init__()
        layers = [nn.Linear(input_size, hidden_size), nn.ReLU()]
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_size, output_size))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [2]:
model = MLP(input_size=10, hidden_size=256, output_size=97, num_layers=8)
sample_input = torch.randn(1, 10)  
output = model(sample_input)
print(output)


tensor([[ 0.0353,  0.0568,  0.0020,  0.0078,  0.0643, -0.0054, -0.0188, -0.0138,
          0.0348, -0.0178,  0.0016, -0.0197,  0.0134,  0.0418,  0.0549,  0.0062,
         -0.0917, -0.0244, -0.0387,  0.0370,  0.0453,  0.0281, -0.0024, -0.0069,
          0.0228,  0.0113,  0.0230, -0.0478, -0.0411, -0.0343, -0.0273,  0.0158,
          0.0525, -0.0626, -0.0410,  0.0286,  0.0034,  0.0208,  0.0718, -0.0180,
          0.0339, -0.0008,  0.0492,  0.0303,  0.0689,  0.0144, -0.0317,  0.0271,
         -0.0574, -0.0385, -0.0117,  0.0140,  0.0024, -0.0536, -0.0284,  0.0580,
         -0.0129,  0.0264,  0.0150,  0.0581,  0.0851, -0.0467,  0.0120,  0.0434,
          0.0587, -0.0404, -0.0155, -0.0310,  0.0584,  0.0155,  0.0697, -0.0320,
          0.0243,  0.0347,  0.0395, -0.0535, -0.0026,  0.0200,  0.0077,  0.0069,
          0.0478,  0.0396, -0.0754,  0.0214,  0.0688,  0.0502, -0.0188,  0.0305,
          0.0210,  0.0047, -0.0113, -0.0908, -0.0573,  0.0527,  0.0093, -0.0924,
          0.0459]], grad_fn=

In [3]:
print(output.shape)


torch.Size([1, 97])


In [4]:
import torch.nn.functional as F
print(F.softmax(output, dim=1))


tensor([[0.0106, 0.0108, 0.0103, 0.0103, 0.0109, 0.0102, 0.0100, 0.0101, 0.0106,
         0.0101, 0.0103, 0.0100, 0.0104, 0.0107, 0.0108, 0.0103, 0.0093, 0.0100,
         0.0098, 0.0106, 0.0107, 0.0105, 0.0102, 0.0102, 0.0105, 0.0104, 0.0105,
         0.0098, 0.0098, 0.0099, 0.0100, 0.0104, 0.0108, 0.0096, 0.0098, 0.0105,
         0.0103, 0.0104, 0.0110, 0.0101, 0.0106, 0.0102, 0.0108, 0.0105, 0.0110,
         0.0104, 0.0099, 0.0105, 0.0097, 0.0098, 0.0101, 0.0104, 0.0103, 0.0097,
         0.0099, 0.0108, 0.0101, 0.0105, 0.0104, 0.0108, 0.0111, 0.0098, 0.0104,
         0.0107, 0.0109, 0.0098, 0.0101, 0.0099, 0.0108, 0.0104, 0.0110, 0.0099,
         0.0105, 0.0106, 0.0106, 0.0097, 0.0102, 0.0104, 0.0103, 0.0103, 0.0107,
         0.0106, 0.0095, 0.0105, 0.0110, 0.0108, 0.0100, 0.0106, 0.0105, 0.0103,
         0.0101, 0.0093, 0.0097, 0.0108, 0.0103, 0.0093, 0.0107]],
       grad_fn=<SoftmaxBackward0>)


In [5]:
sample_input = torch.randn(1, 10)
target = torch.tensor([1])  

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

optimizer.zero_grad()
output = model(sample_input)
loss = criterion(output, target)  
loss.backward()
optimizer.step()

print("Loss after one step:", loss.item())


Loss after one step: 4.524849891662598


In [6]:
!jupyter nbconvert --to script models.ipynb
!jupyter nbconvert --to script train.ipynb
!jupyter nbconvert --to script utils.ipynb


[NbConvertApp] Converting notebook models.ipynb to script
[NbConvertApp] Writing 1460 bytes to models.py
[NbConvertApp] Converting notebook train.ipynb to script
[NbConvertApp] Writing 3658 bytes to train.py
[NbConvertApp] Converting notebook utils.ipynb to script
[NbConvertApp] Writing 826 bytes to utils.py
