In [1]:
import numpy as np

In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [17]:
training_data = datasets.FashionMNIST(
                        root = "data",
                        train=True,
                        download=True,
                        transform=ToTensor())
test_data = datasets.FashionMNIST(root="data", train=False, download=True, transform=ToTensor())

In [20]:
print(training_data)
print(type(training_data))

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()
<class 'torchvision.datasets.mnist.FashionMNIST'>


In [24]:
type(ToTensor)

type

In [27]:
DataLoader

torch.utils.data.dataloader.DataLoader

In [28]:
batch_size = 64

train_dataloader = DataLoader(training_data, batch_size = batch_size)
test_dataloader = DataLoader(test_data, batch_size = batch_size)

In [34]:
count = 0
for X, y in test_dataloader:
    count +=1
    print(f"Shape of X [N, C, H, W]: {X.shape} {X.dtype}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break
# print(count)

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28]) torch.float32
Shape of y: torch.Size([64]) torch.int64


In [35]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"using {device} device")

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


using cpu device


In [140]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [42]:
# type(model)
print(type(nn.Softmax))

<class 'type'>


In [44]:
# nn.Softmax

In [45]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)

In [46]:
print(f"Predicted class: {y_pred}")

Predicted class: tensor([6])


In [49]:
# nn.Softmax(dim=1)
logits

tensor([[-0.1311, -0.0647, -0.0620, -0.0078, -0.1267, -0.0083,  0.0321, -0.0430,
         -0.1124, -0.0366]], grad_fn=<AddmmBackward0>)

In [55]:
pred_probab.detach().numpy().sum()

1.0

In [60]:
pred_probab

tensor([[0.0926, 0.0990, 0.0993, 0.1048, 0.0931, 0.1047, 0.1091, 0.1012, 0.0944,
         0.1018]], grad_fn=<SoftmaxBackward0>)

In [65]:
print(pred_probab.argmax())
print(pred_probab.argmax(1))

tensor(6)
tensor([6])


In [69]:
pred_probab.argmax(1).numpy()[0]
print(type(pred_probab.argmax(1).numpy()[0]))

<class 'numpy.int64'>


In [71]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [72]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [73]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [74]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.0875, -0.1183,  0.1280,  0.0654, -0.7107, -0.2359,  0.0894, -0.2541,
          0.2653,  0.4125, -0.3341,  0.0292,  0.4587, -0.3108, -0.2459,  0.5545,
          0.0916, -0.0042,  0.3453,  0.1377],
        [ 0.0277,  0.0812,  0.2251,  0.0594, -0.4826, -0.0345,  0.1187,  0.0009,
         -0.0460,  0.2232, -0.0452, -0.6066,  0.3425, -0.6105, -0.2486,  0.5447,
          0.3980,  0.1251,  0.3853,  0.3901],
        [ 0.1157,  0.0675,  0.1639, -0.0406, -0.4993,  0.0405,  0.1111, -0.2823,
          0.3881, -0.2318,  0.0764, -0.3150,  0.5621, -0.3444, -0.2126,  0.6391,
         -0.0165, -0.0760,  0.2251,  0.3621]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.1280, 0.0654, 0.0000, 0.0000, 0.0894, 0.0000, 0.2653,
         0.4125, 0.0000, 0.0292, 0.4587, 0.0000, 0.0000, 0.5545, 0.0916, 0.0000,
         0.3453, 0.1377],
        [0.0277, 0.0812, 0.2251, 0.0594, 0.0000, 0.0000, 0.1187, 0.0009, 0.0000,
         0.2232, 0.0000, 0.0000, 0.3425, 0.0000, 0.00

In [101]:
input_image = torch.rand(3, 28, 28)

In [108]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10),
    nn.Softmax(dim=1)
)

In [109]:
logits_pred_prob = seq_modules(input_image)

In [104]:
logits

tensor([[ 0.0900, -0.0867, -0.1786, -0.0289,  0.1144,  0.2024, -0.0613,  0.2840,
          0.1078, -0.0157],
        [ 0.0997, -0.1531, -0.2072, -0.0981,  0.1355,  0.2345,  0.0905,  0.1312,
         -0.0257, -0.0912],
        [-0.0190, -0.0955, -0.1406,  0.0561,  0.1879,  0.2539,  0.0438, -0.0611,
          0.0421, -0.1605]], grad_fn=<AddmmBackward0>)

In [105]:
softmax = nn.Softmax(dim=1)

In [106]:
pred_probab = softmax(logits)

In [107]:
pred_probab

tensor([[0.1039, 0.0871, 0.0794, 0.0922, 0.1065, 0.1162, 0.0893, 0.1261, 0.1058,
         0.0935],
        [0.1082, 0.0840, 0.0796, 0.0888, 0.1121, 0.1238, 0.1072, 0.1116, 0.0954,
         0.0894],
        [0.0963, 0.0892, 0.0852, 0.1038, 0.1184, 0.1265, 0.1025, 0.0923, 0.1023,
         0.0836]], grad_fn=<SoftmaxBackward0>)

In [111]:
# pred_probab.sum()
pred_probab.argmax(1)

tensor([7, 5, 5])

In [110]:
logits_pred_prob

tensor([[0.1246, 0.1128, 0.0974, 0.0844, 0.1250, 0.0893, 0.1072, 0.0729, 0.0875,
         0.0987],
        [0.1172, 0.1144, 0.0964, 0.0882, 0.1182, 0.0899, 0.1088, 0.0759, 0.0953,
         0.0958],
        [0.1139, 0.1174, 0.0917, 0.0829, 0.1226, 0.0824, 0.1127, 0.0750, 0.0967,
         0.1046]], grad_fn=<SoftmaxBackward0>)

In [112]:
logits_pred_prob.argmax(1)

tensor([4, 4, 4])

In [113]:
print(f"Model structure: {model}\n\n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)




In [128]:
# for name, param in model.named_para
count = 0
for x in model.parameters():
#     print(x.detach().size())
    count += np.size(x.detach().numpy())
#     break
print(count)
print(type(count))

669706
<class 'int'>


In [130]:
print(type(np.size(np.arange(10))))
np.size(np.arange(10))

<class 'int'>


10

In [141]:
print(f"Model structure: {model}\n\n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)




In [142]:
for name, param in model.named_parameters():
    print(f"Layer: {name} | size: {param.size()} | values: {param[:2]} \n")

Layer: linear_relu_stack.0.weight | size: torch.Size([512, 784]) | values: tensor([[ 0.0166, -0.0166, -0.0233,  ..., -0.0357, -0.0063, -0.0205],
        [ 0.0219,  0.0316,  0.0141,  ...,  0.0282, -0.0215,  0.0313]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | size: torch.Size([512]) | values: tensor([-0.0343,  0.0356], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | size: torch.Size([512, 512]) | values: tensor([[ 0.0099, -0.0206,  0.0243,  ...,  0.0016, -0.0005,  0.0117],
        [-0.0412, -0.0043,  0.0129,  ...,  0.0233, -0.0384,  0.0377]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | size: torch.Size([512]) | values: tensor([0.0044, 0.0206], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.4.weight | size: torch.Size([10, 512]) | values: tensor([[ 0.0417, -0.0268, -0.0175,  ...,  0.0322,  0.0350,  0.0039],
        [ 0.0102, -0.0389,  0.0117,  ..., -0.0179, -0.0188,  0.0393]],
       grad_fn=<SliceBackward0>) 

Laye

In [133]:
param

Parameter containing:
tensor([-0.0235, -0.0035,  0.0218,  0.0157, -0.0312, -0.0141,  0.0009,  0.0031,
        -0.0260, -0.0206], requires_grad=True)

In [134]:
param[:2]

tensor([-0.0235, -0.0035], grad_fn=<SliceBackward0>)

# Optimizing the Model Parameters

In [143]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [136]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        #Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        
        #backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        
        if batch%100 == 0:
            loss, current = loss.item(), batch*len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [137]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1)==y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [144]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.307557 [    0/60000]
loss: 2.301203 [ 6400/60000]
loss: 2.277381 [12800/60000]
loss: 2.271333 [19200/60000]
loss: 2.256761 [25600/60000]
loss: 2.213912 [32000/60000]
loss: 2.229381 [38400/60000]
loss: 2.182137 [44800/60000]
loss: 2.186615 [51200/60000]
loss: 2.156079 [57600/60000]
Test Error: 
 Accuracy: 44.9%, Avg loss: 2.148908 

Epoch 2
-------------------------------
loss: 2.156856 [    0/60000]
loss: 2.148435 [ 6400/60000]
loss: 2.084613 [12800/60000]
loss: 2.110542 [19200/60000]
loss: 2.051774 [25600/60000]
loss: 1.981811 [32000/60000]
loss: 2.020570 [38400/60000]
loss: 1.923584 [44800/60000]
loss: 1.939658 [51200/60000]
loss: 1.875477 [57600/60000]
Test Error: 
 Accuracy: 55.5%, Avg loss: 1.866222 

Epoch 3
-------------------------------
loss: 1.890666 [    0/60000]
loss: 1.864120 [ 6400/60000]
loss: 1.740201 [12800/60000]
loss: 1.802247 [19200/60000]
loss: 1.679184 [25600/60000]
loss: 1.629179 [32000/60000]
loss: 1.660403 [38400/