In [1]:
import torch
import torch.nn as nn

In [7]:
class Network(nn.Module): # line 1
    def __init__(self):
        super().__init__() # line 3
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):
        # (1) input layer
        t = t

        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)

        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)

        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)

        return t

In [8]:
network = Network()
print(network)

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)


In [9]:
network.conv1.weight

Parameter containing:
tensor([[[[ 0.1023, -0.1311,  0.1738,  0.1840,  0.0746],
          [ 0.0403,  0.0217, -0.1621, -0.1650, -0.1859],
          [-0.0626,  0.0943,  0.1114, -0.1727, -0.0863],
          [-0.1328,  0.1407,  0.0209,  0.0351,  0.0996],
          [ 0.0621,  0.0615, -0.1279,  0.0913, -0.1967]]],


        [[[-0.0865,  0.1470, -0.0813,  0.1386,  0.1826],
          [-0.0130,  0.1709,  0.1822, -0.1651,  0.0053],
          [ 0.1550,  0.0429,  0.0872,  0.1088,  0.1410],
          [ 0.0690,  0.1905,  0.0655,  0.1373,  0.1283],
          [ 0.1205, -0.1286, -0.1162,  0.1069,  0.0712]]],


        [[[-0.0354, -0.0203, -0.1192,  0.0292, -0.1325],
          [ 0.1895, -0.0730,  0.1036,  0.1530,  0.0496],
          [ 0.1922,  0.0338, -0.0542, -0.1205, -0.1873],
          [-0.1580, -0.0137, -0.1874,  0.1612,  0.0094],
          [ 0.0818,  0.1888,  0.1519, -0.0760,  0.0982]]],


        [[[-0.1860,  0.0783, -0.0201,  0.0156,  0.0825],
          [ 0.0180,  0.0060,  0.0340, -0.0976, -0.1954

In [10]:
network.conv1

Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))

In [11]:
network.conv1.weight.shape

torch.Size([6, 1, 5, 5])

In [12]:
for param in network.parameters():
    print(param.shape)

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([12, 6, 5, 5])
torch.Size([12])
torch.Size([120, 192])
torch.Size([120])
torch.Size([60, 120])
torch.Size([60])
torch.Size([10, 60])
torch.Size([10])


In [13]:
in_features = torch.tensor([1,2,3,4], dtype=torch.float32)
weight_matrix = torch.tensor([
    [1,2,3,4],
    [2,3,4,5],
    [3,4,5,6]
], dtype=torch.float32)
weight_matrix.matmul(in_features)

tensor([30., 40., 50.])

In [14]:
import torch
import torchvision
import torchvision.transforms as transforms

train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

image, label = train_set[0]

print(image.shape)

torch.Size([1, 28, 28])


In [15]:
sample = next(iter(train_set))

In [16]:
image, label = sample

In [17]:
image.shape

torch.Size([1, 28, 28])

In [18]:
image.unsqueeze(0).shape

torch.Size([1, 1, 28, 28])

In [19]:
import torch.nn.functional as F
pred = network(image.unsqueeze(0))

In [20]:
pred

tensor([[ 0.0383, -0.0668, -0.0240,  0.0176, -0.0266,  0.1023, -0.1141,  0.1147,
         -0.1150, -0.0335]], grad_fn=<AddmmBackward>)

In [21]:
pred.shape

torch.Size([1, 10])

In [22]:
label

9

In [23]:
pred.argmax(dim=1)

tensor([7])

In [24]:
F.softmax(pred, dim=1)

tensor([[0.1047, 0.0943, 0.0984, 0.1026, 0.0981, 0.1116, 0.0899, 0.1130, 0.0898,
         0.0975]], grad_fn=<SoftmaxBackward>)

In [25]:
F.softmax(pred, dim=1).sum()

tensor(1., grad_fn=<SumBackward0>)

In [26]:
net1 = Network()

In [27]:
net1(image.unsqueeze(0))

tensor([[ 0.0895, -0.0095,  0.0118, -0.0547, -0.0729,  0.0171,  0.1351, -0.1391,
         -0.0711, -0.0069]], grad_fn=<AddmmBackward>)

In [28]:
net2 = Network()

In [29]:
net2(image.unsqueeze(0))

tensor([[-0.1147, -0.0087,  0.0403, -0.0019, -0.1280,  0.1161, -0.0629, -0.0701,
         -0.1003, -0.1356]], grad_fn=<AddmmBackward>)

In [30]:
data_loader = torch.utils.data.DataLoader(
     train_set, batch_size=10
)

In [31]:
batch = next(iter(data_loader))

In [32]:
images, labels = batch

In [33]:
images.shape

torch.Size([10, 1, 28, 28])

In [34]:
labels.shape

torch.Size([10])

In [35]:
preds = network(images)

In [36]:
preds.shape

torch.Size([10, 10])

In [37]:
preds

tensor([[ 0.0383, -0.0668, -0.0240,  0.0176, -0.0266,  0.1023, -0.1141,  0.1147,
         -0.1150, -0.0335],
        [ 0.0231, -0.0649, -0.0088,  0.0121, -0.0376,  0.1043, -0.1118,  0.1063,
         -0.1054, -0.0290],
        [ 0.0427, -0.0915, -0.0257, -0.0284, -0.0110,  0.0952, -0.1125,  0.0843,
         -0.1088, -0.0443],
        [ 0.0402, -0.0836, -0.0159, -0.0130, -0.0143,  0.0948, -0.1107,  0.0877,
         -0.1082, -0.0418],
        [ 0.0334, -0.0592, -0.0056,  0.0078, -0.0383,  0.1026, -0.1138,  0.1084,
         -0.1134, -0.0357],
        [ 0.0257, -0.0592, -0.0108,  0.0135, -0.0278,  0.1031, -0.1166,  0.1087,
         -0.1173, -0.0457],
        [ 0.0331, -0.0648, -0.0012,  0.0151, -0.0246,  0.0996, -0.1231,  0.1043,
         -0.1110, -0.0370],
        [ 0.0275, -0.0514, -0.0149,  0.0259, -0.0426,  0.1060, -0.1248,  0.1160,
         -0.1163, -0.0455],
        [ 0.0469, -0.1023, -0.0415, -0.0378, -0.0167,  0.0961, -0.1060,  0.0807,
         -0.0960, -0.0343],
        [ 0.0438, -

In [38]:
preds.argmax(dim=1)

tensor([7, 7, 5, 5, 7, 7, 7, 7, 5, 5])

In [39]:
labels

tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5])

In [40]:
preds.argmax(dim=1).eq(labels)

tensor([False, False, False, False, False, False,  True, False,  True,  True])

In [41]:
preds.argmax(dim=1).eq(labels).sum()

tensor(3)

In [42]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [43]:
 get_num_correct(preds, labels)

3

In [44]:
network = Network()

In [45]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)

In [46]:
batch = next(iter(train_loader)) # Getting a batch

In [47]:
images, labels = batch

In [48]:
preds = network(images)

In [49]:
loss = F.cross_entropy(preds, labels) # Calculating the loss

In [50]:
loss.item()

2.313504219055176

In [51]:
get_num_correct(preds, labels)

8

In [52]:
network.conv1.weight.grad

In [53]:
loss.backward() # Calculating the gradients

In [54]:
network.conv1.weight.grad.shape

torch.Size([6, 1, 5, 5])

In [55]:
import torch.optim as optim
optimizer = optim.Adam(network.parameters(), lr=0.01)

In [56]:
optimizer.step() # Updating the weights

In [57]:
preds = network(images)

In [58]:
loss.item()

2.313504219055176

In [59]:
loss = F.cross_entropy(preds, labels)

In [60]:
get_num_correct(preds, labels)

15

In [61]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

batch = next(iter(train_loader)) # Get Batch
images, labels = batch

preds = network(images) # Pass Batch
loss = F.cross_entropy(preds, labels) # Calculate Loss

loss.backward() # Calculate Gradients
optimizer.step() # Update Weights

print('loss1:', loss.item())
preds = network(images)
loss = F.cross_entropy(preds, labels)
print('loss2:', loss.item())

loss1: 2.302359104156494
loss2: 2.260479211807251


In [62]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(10):

    total_loss = 0
    total_correct = 0

    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print(
        "epoch", epoch, 
        "total_correct:", total_correct, 
        "loss:", total_loss
    )

epoch 0 total_correct: 46698 loss: 352.7399991750717
epoch 1 total_correct: 51257 loss: 236.682806417346
epoch 2 total_correct: 51996 loss: 216.49779860675335
epoch 3 total_correct: 52359 loss: 205.14514309167862
epoch 4 total_correct: 52547 loss: 199.6464980840683
epoch 5 total_correct: 52680 loss: 198.60929857939482
epoch 6 total_correct: 52723 loss: 197.28840070962906
epoch 7 total_correct: 53007 loss: 188.86546172201633
epoch 8 total_correct: 52891 loss: 191.56919303536415
epoch 9 total_correct: 53110 loss: 184.3559466674924
