In [1]:
import torch
import torch.nn as nn

In [2]:
class Network(nn.Module): # line 1
    def __init__(self):
        super().__init__() # line 3
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):
        # (1) input layer
        t = t

        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)

        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)

        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)

        return t

In [3]:
network = Network()
print(network)

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)


In [4]:
network.conv1.weight

Parameter containing:
tensor([[[[ 0.0564, -0.1151,  0.0698, -0.0239,  0.0261],
          [-0.1239, -0.1419, -0.1524,  0.1216,  0.1305],
          [-0.1344,  0.0261,  0.0581,  0.1742, -0.1810],
          [-0.1065, -0.1023,  0.1844,  0.0747, -0.0813],
          [ 0.1111, -0.0055,  0.1381,  0.1526, -0.1498]]],


        [[[-0.1633, -0.0187, -0.0600,  0.1951, -0.1332],
          [-0.1123, -0.0191, -0.1614,  0.1760, -0.1332],
          [-0.0541,  0.1415, -0.1794,  0.0771,  0.0728],
          [-0.1125,  0.0329, -0.0207,  0.1133, -0.1104],
          [-0.1009,  0.0226,  0.1809, -0.1356, -0.0249]]],


        [[[-0.0956,  0.0973, -0.1210,  0.1265, -0.1015],
          [ 0.1245, -0.0057,  0.0342,  0.0855, -0.0614],
          [-0.0532, -0.1708,  0.1353,  0.0737, -0.1501],
          [-0.1299,  0.1292, -0.0800,  0.0566,  0.0559],
          [ 0.1778,  0.1460, -0.1109, -0.0745,  0.1373]]],


        [[[ 0.0967,  0.1335, -0.0588, -0.1110,  0.1073],
          [ 0.1626,  0.0066, -0.1869,  0.0977,  0.1560

In [5]:
network.conv1

Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))

In [6]:
network.conv1.weight.shape

torch.Size([6, 1, 5, 5])

In [7]:
for param in network.parameters():
    print(param.shape)

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([12, 6, 5, 5])
torch.Size([12])
torch.Size([120, 192])
torch.Size([120])
torch.Size([60, 120])
torch.Size([60])
torch.Size([10, 60])
torch.Size([10])


In [8]:
in_features = torch.tensor([1,2,3,4], dtype=torch.float32)
weight_matrix = torch.tensor([
    [1,2,3,4],
    [2,3,4,5],
    [3,4,5,6]
], dtype=torch.float32)
weight_matrix.matmul(in_features)

tensor([30., 40., 50.])

In [9]:
import torch
import torchvision
import torchvision.transforms as transforms

train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

image, label = train_set[0]

print(image.shape)

torch.Size([1, 28, 28])


In [10]:
sample = next(iter(train_set))

In [11]:
image, label = sample

In [12]:
image.shape

torch.Size([1, 28, 28])

In [13]:
image.unsqueeze(0).shape

torch.Size([1, 1, 28, 28])

In [14]:
import torch.nn.functional as F
pred = network(image.unsqueeze(0))

In [15]:
pred

tensor([[ 0.0348, -0.0018, -0.0473,  0.0164, -0.0707, -0.0216, -0.0397, -0.0127,
          0.1149,  0.0682]], grad_fn=<AddmmBackward>)

In [16]:
pred.shape

torch.Size([1, 10])

In [17]:
label

9

In [18]:
pred.argmax(dim=1)

tensor([8])

In [19]:
F.softmax(pred, dim=1)

tensor([[0.1030, 0.0993, 0.0949, 0.1011, 0.0927, 0.0973, 0.0956, 0.0982, 0.1116,
         0.1065]], grad_fn=<SoftmaxBackward>)

In [20]:
F.softmax(pred, dim=1).sum()

tensor(1.0000, grad_fn=<SumBackward0>)

In [21]:
net1 = Network()

In [22]:
net1(image.unsqueeze(0))

tensor([[ 0.1341,  0.0850,  0.1585, -0.0465,  0.0904,  0.0630, -0.1199,  0.1102,
         -0.0686,  0.0241]], grad_fn=<AddmmBackward>)

In [23]:
net2 = Network()

In [24]:
net2(image.unsqueeze(0))

tensor([[ 0.0301, -0.0564, -0.0473,  0.0453,  0.0887,  0.1169, -0.0273, -0.0470,
          0.0727,  0.1437]], grad_fn=<AddmmBackward>)

In [25]:
data_loader = torch.utils.data.DataLoader(
     train_set, batch_size=10
)

In [26]:
batch = next(iter(data_loader))

In [27]:
images, labels = batch

In [28]:
images.shape

torch.Size([10, 1, 28, 28])

In [29]:
labels.shape

torch.Size([10])

In [30]:
preds = network(images)

In [31]:
preds.shape

torch.Size([10, 10])

In [32]:
preds

tensor([[ 0.0348, -0.0018, -0.0473,  0.0164, -0.0707, -0.0216, -0.0397, -0.0127,
          0.1149,  0.0682],
        [ 0.0449,  0.0009, -0.0440,  0.0102, -0.0621, -0.0162, -0.0436, -0.0184,
          0.1062,  0.0704],
        [ 0.0405,  0.0045, -0.0483,  0.0147, -0.0465, -0.0257, -0.0411, -0.0342,
          0.1116,  0.0769],
        [ 0.0397,  0.0033, -0.0502,  0.0162, -0.0475, -0.0225, -0.0455, -0.0284,
          0.1114,  0.0765],
        [ 0.0461,  0.0076, -0.0465,  0.0112, -0.0577, -0.0204, -0.0454, -0.0207,
          0.1106,  0.0680],
        [ 0.0388,  0.0042, -0.0487,  0.0184, -0.0585, -0.0193, -0.0453, -0.0184,
          0.1124,  0.0714],
        [ 0.0468, -0.0024, -0.0488,  0.0222, -0.0510, -0.0277, -0.0415, -0.0222,
          0.1226,  0.0751],
        [ 0.0417,  0.0035, -0.0462,  0.0126, -0.0729, -0.0198, -0.0434, -0.0144,
          0.1157,  0.0730],
        [ 0.0350,  0.0060, -0.0527,  0.0224, -0.0458, -0.0308, -0.0428, -0.0310,
          0.1160,  0.0777],
        [ 0.0302,  

In [33]:
preds.argmax(dim=1)

tensor([8, 8, 8, 8, 8, 8, 8, 8, 8, 8])

In [34]:
labels

tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5])

In [35]:
preds.argmax(dim=1).eq(labels)

tensor([False, False, False, False, False, False, False, False, False, False])

In [36]:
preds.argmax(dim=1).eq(labels).sum()

tensor(0)

In [37]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [38]:
 get_num_correct(preds, labels)

0

In [39]:
network = Network()

In [40]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)

In [41]:
batch = next(iter(train_loader)) # Getting a batch

In [42]:
images, labels = batch

In [43]:
preds = network(images)

In [44]:
loss = F.cross_entropy(preds, labels) # Calculating the loss

In [45]:
loss.item()

2.3226828575134277

In [46]:
get_num_correct(preds, labels)

9

In [47]:
network.conv1.weight.grad

In [48]:
loss.backward() # Calculating the gradients

In [49]:
network.conv1.weight.grad.shape

torch.Size([6, 1, 5, 5])

In [50]:
import torch.optim as optim
optimizer = optim.Adam(network.parameters(), lr=0.01)

In [51]:
optimizer.step() # Updating the weights

In [52]:
preds = network(images)

In [53]:
loss.item()

2.3226828575134277

In [54]:
loss = F.cross_entropy(preds, labels)

In [55]:
get_num_correct(preds, labels)

11

### Training With A Single Batch

In [56]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

batch = next(iter(train_loader)) # Get Batch
images, labels = batch

preds = network(images) # Pass Batch
loss = F.cross_entropy(preds, labels) # Calculate Loss

loss.backward() # Calculate Gradients
optimizer.step() # Update Weights

print('loss1:', loss.item())
preds = network(images)
loss = F.cross_entropy(preds, labels)
print('loss2:', loss.item())

loss1: 2.30739164352417
loss2: 2.2707362174987793


### Training With All Batches (Single Epoch)

In [58]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

total_loss = 0
total_correct = 0

for batch in train_loader: # Get Batch
    images, labels = batch 

    preds = network(images) # Pass Batch
    loss = F.cross_entropy(preds, labels) # Calculate Loss

    optimizer.zero_grad()
    loss.backward() # Calculate Gradients
    optimizer.step() # Update Weights

    total_loss += loss.item()
    total_correct += get_num_correct(preds, labels)

print(
    "epoch:", 0, 
    "total_correct:", total_correct, 
    "loss:", total_loss
)

epoch: 0 total_correct: 46370 loss: 356.8754049241543


In [57]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(10):

    total_loss = 0
    total_correct = 0

    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print(
        "epoch", epoch, 
        "total_correct:", total_correct, 
        "loss:", total_loss
    )

epoch 0 total_correct: 47338 loss: 336.3997004330158
epoch 1 total_correct: 51539 loss: 231.2600298523903
epoch 2 total_correct: 52353 loss: 207.17116010189056
epoch 3 total_correct: 52704 loss: 198.48707570135593
epoch 4 total_correct: 52957 loss: 189.3464087843895
epoch 5 total_correct: 53072 loss: 188.02525487542152
epoch 6 total_correct: 53171 loss: 184.0095544308424
epoch 7 total_correct: 53237 loss: 183.23887059092522
epoch 8 total_correct: 53363 loss: 180.01661559939384
epoch 9 total_correct: 53501 loss: 176.6336236447096
