In [1]:
import torch, torchvision
from torch import nn
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

print(f" torch version :{torch.__version__}")
print(f" torchvision version : {torchvision.__version__}")

  from .autonotebook import tqdm as notebook_tqdm


 torch version :1.13.1
 torchvision version : 0.14.1


## Load data and load train and test dataloader

In [2]:
train_data = datasets.FashionMNIST(root="data",
                                  train=True,
                                  download=True,
                                  transform=torchvision.transforms.ToTensor(), # How do we transform the data
                                  target_transform=None)  # How do you transofrm the labels

test_data = datasets.FashionMNIST(root="data",
                                  train=False,
                                  download=True,
                                  transform=torchvision.transforms.ToTensor(), # How do we transform the data
                                  target_transform=None)  # How do you transofrm the labels


In [3]:
len(train_data), len(test_data)

(60000, 10000)

In [4]:
# setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [5]:
class_names = train_data.class_to_idx
id_to_class = {}
for idx,clas in enumerate(class_names):
    id_to_class[idx]= clas
id_to_class


{0: 'T-shirt/top',
 1: 'Trouser',
 2: 'Pullover',
 3: 'Dress',
 4: 'Coat',
 5: 'Sandal',
 6: 'Shirt',
 7: 'Sneaker',
 8: 'Bag',
 9: 'Ankle boot'}

In [6]:
#prepare Dataloader
from torch.utils.data import DataLoader

BATCH_SIZE = 32

train_dataloader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
# not shuffling the test data
test_dataloader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=False )
train_dataloader, test_dataloader

(<torch.utils.data.dataloader.DataLoader at 0x12c5e6bc0>,
 <torch.utils.data.dataloader.DataLoader at 0x12c5e73a0>)

## Create Model with linear and Non Liner layer

In [7]:
class FashionMnistModelV1(nn.Module):
    def __init__(self,
                input_shape: int,
                hidden_unit: int,
                output_shape:int):
        super().__init__()
        self.layer_stack = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=input_shape, out_features=hidden_unit),
        nn.ReLU(),
        nn.Linear(in_features=hidden_unit,
                 out_features= output_shape),
        nn.ReLU())
    
    def forward(self, x):
        return self.layer_stack(x)

In [8]:
# create manual instance pf MOdel_1
torch.manual_seed(42)
model_1 = FashionMnistModelV1(input_shape=784,
                             hidden_unit=10,
                             output_shape=len(class_names)).to(device)

In [9]:
model_1.parameters()

<generator object Module.parameters at 0x12c583610>

## Set loss fun and Optimizer

In [10]:
from helper_functions import accuracy_fn
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_1.parameters(),lr=0.001)

## Functioning Training and testing Step in shape of different reusable functions

In [11]:
torch.manual_seed(42)
def test_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn : torch.nn.Module,
              accuracy_fn,
             device: torch.device = device) :
    """ Returns a dictionary containing the results of model predicting on data loader"""
    loss, acc = 0,0
    model.eval()
    with torch.inference_mode():
        for X,y in tqdm(data_loader):
            X,y = X.to(device), y.to(device)
            # Make ored
            y_pred = model(X)
            
            # Acumulate teh loss and accuracy
            
            loss += loss_fn(y_pred,y)
            acc += accuracy_fn(y_true =y, y_pred=y_pred.argmax(dim=1))
        
        # normalize loss and acc by getting mean of batch for loss/acc
        loss /= len(data_loader)
        acc /= len(data_loader)
    print(f"Test loss: {loss:3f} | Test acc : {acc:2f}% \n")
    return{ "model_name" : model.__class__.__name__,
               "model_loss" : loss.item(),
               "model_acc" : acc
        }

In [12]:
#@title Default title text
def train_step(model : torch.nn.Module,
               dataloader : torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
    """Performs a training with model trying to learn on dataloader"""
    train_loss, train_acc = 0,0
    for batch, (X,y) in enumerate(dataloader):
        X,y = X.to(device), y.to(device)
        model.train()
        #1, Fwd pass
        y_pred = model(X)
        #2. loss
        loss = loss_fn(y_pred,y)
        train_loss += loss

        train_acc += accuracy_fn(y_true=y,y_pred=y_pred.argmax(dim=1))
        # 3. 
        optimizer.zero_grad()
        # 4. loss back
        loss.backward()
        # 5 . Step
        optimizer.step()
        # print
        if batch % 400 == 0:
            print(f"Looked at {batch * len(X)}/ {len(dataloader.dataset)} samples")

        # normalize loss and acc by getting mean of batch for loss/acc
        train_loss /= len(dataloader)
        train_acc /= len(dataloader)

    print(f"Train loss: {train_loss:3f} | Train acc : {train_acc:2f}% \n")

In [13]:
!pip install tqdm



In [14]:
from timeit import default_timer as timer
def print_total_time(start: float,
       end: float,
       device: torch.device = None):
    tat = end - start
    print(f"Train time on :{device} : {tat:3f} seconds")
    return tat

In [15]:
torch.manual_seed(42)
from tqdm import tqdm
# measure time
from timeit import default_timer as timer
training_start_time = timer()

epochs =3

# Create a training toop 

for epoch in range(epochs):
    print(f"Epoch: {epoch} \n **************")
    train_step(model=model_1,
              dataloader=train_dataloader,
              loss_fn= loss_fn,
              optimizer=optimizer,
              accuracy_fn=accuracy_fn,
              device= device)
    test_ret = test_step(model= model_1,
             data_loader=test_dataloader,
             accuracy_fn=accuracy_fn,
            loss_fn=loss_fn,
             device=device)
    print(test_ret)
train_time_end = timer()
tat = print_total_time(start=training_start_time,
         end=train_time_end,
         device= device)
print(f"time in training : {tat}")


Epoch: 0 
 **************
Looked at 0/ 60000 samples
Looked at 12800/ 60000 samples
Looked at 25600/ 60000 samples
Looked at 38400/ 60000 samples
Looked at 51200/ 60000 samples
Train loss: 0.000480 | Train acc : 0.035020% 



100%|██████████████████████████████████████████████████████| 313/313 [00:00<00:00, 1687.59it/s]

Test loss: 0.954342 | Test acc : 65.565096% 

{'model_name': 'FashionMnistModelV1', 'model_loss': 0.9543418288230896, 'model_acc': 65.56509584664536}
Epoch: 1 
 **************
Looked at 0/ 60000 samples





Looked at 12800/ 60000 samples
Looked at 25600/ 60000 samples
Looked at 38400/ 60000 samples
Looked at 51200/ 60000 samples
Train loss: 0.000354 | Train acc : 0.038354% 



100%|██████████████████████████████████████████████████████| 313/313 [00:00<00:00, 1710.72it/s]

Test loss: 0.922483 | Test acc : 66.244010% 

{'model_name': 'FashionMnistModelV1', 'model_loss': 0.9224833250045776, 'model_acc': 66.24400958466454}
Epoch: 2 
 **************
Looked at 0/ 60000 samples





Looked at 12800/ 60000 samples
Looked at 25600/ 60000 samples
Looked at 38400/ 60000 samples
Looked at 51200/ 60000 samples
Train loss: 0.000639 | Train acc : 0.028356% 



100%|██████████████████████████████████████████████████████| 313/313 [00:00<00:00, 1720.15it/s]

Test loss: 0.903419 | Test acc : 66.813099% 

{'model_name': 'FashionMnistModelV1', 'model_loss': 0.9034186005592346, 'model_acc': 66.81309904153355}
Train time on :cpu : 5.403859 seconds
time in training : 5.403859124984592





## Article on utilizing GPu to fast speed
##### https://horace.io/brrr_intro.html

In [16]:
### get model results decitionary
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(42)
def eval_model(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn : torch.nn.Module,
              accuracy_fn,
              device = device) :
    """ Returns a dictionary containing the results of model predicting on data loader"""
    loss, acc = 0,0
    model.eval()
    with torch.inference_mode():
        for X,y in tqdm(data_loader):
            X,y = X.to(device), y.to(device)
            # Make ored
            y_pred = model(X)
            
            # Acumulate teh loss and accuracy
            
            loss += loss_fn(y_pred,y)
            acc += accuracy_fn(y_true =y, y_pred=y_pred.argmax(dim=1))
        
        loss /= len(data_loader)
        acc /= len(data_loader)
        return{ "model_name" : model.__class__.__name__,
               "model_loss" : loss.item(),
               "model_acc" : acc
        }

In [17]:
model_1_results = eval_model(model=model_1,
                            data_loader=test_dataloader,
                            loss_fn=loss_fn,
                           accuracy_fn=accuracy_fn,
                           device = device)
model_1_results

100%|██████████████████████████████████████████████████████| 313/313 [00:00<00:00, 1699.44it/s]


{'model_name': 'FashionMnistModelV1',
 'model_loss': 0.9034186005592346,
 'model_acc': 66.81309904153355}

### Model expirements
##### Use of ConvNet  in visual Data  : https://poloclub.github.io/cnn-explainer/

In [18]:
torch.manual_seed(42)
class FashionMnistModelV2(nn.Module):
    def __init__(self,
                input_shape: int,
                hidden_units: int,
                output_shape: int):
        super().__init__()
        self.conv_block_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape,
                 out_channels= hidden_units,
                 kernel_size= 3,
                 stride= 1,
                 padding= 1),
        nn.ReLU(),
        nn.Conv2d(in_channels = hidden_units,
                 out_channels = hidden_units,
                 kernel_size  = 3,
                 stride=1,
                 padding=1),  # Value we set ourself are hyper parameter
        nn.MaxPool2d(kernel_size=2))
        
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels= hidden_units,
                                         out_channels= hidden_units, 
                                         kernel_size= 3,
                                         stride =1 ,
                                         padding =1),
            nn.ReLU(),
            nn.Conv2d(in_channels= hidden_units,
                     out_channels= hidden_units,
                     kernel_size =3,
                     stride=1,
                     padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        
        self.classification_layer = nn.Sequential(
        nn.Flatten(),
            nn.Linear(in_features=hidden_units*7*7,
                     out_features=output_shape)
        )
        
    def forward(self, x):
        # return self.classification_layer(self.conv_block_2(self.conv_block_1(x)))
        x = self.conv_block_1(x)
        # print(f"shape post 1st conv layer : {x.shape}")
        x = self.conv_block_2(x)
        # print(f"shape post 2nd conv layer : {x.shape}")
        x = self.classification_layer(x)
        # print(f"shape post classification layer : {x.shape}")
        return x

In [19]:
torch.manual_seed(42)
model_2 = FashionMnistModelV2(input_shape=1,
                             hidden_units=10,
                             output_shape=len(class_names)).to(device)

## steping through nn.Conv2d

In [20]:
torch.manual_seed(42)
images = torch.randn(size=(32,3,64,64))
test_image = images[0]
test_image.shape

torch.Size([3, 64, 64])

In [21]:
## create conv2d layer
conv_layer = nn.Conv2d(in_channels=3,
                       out_channels=10,
                      kernel_size=(3,3),
                      stride=1,
                      padding=1)
conv_out = conv_layer(test_image)
conv_out.shape

torch.Size([10, 64, 64])

In [22]:
##stepping through nn.MaxPoool
print(f"Test image shape: {test_image.shape} ")
print(f"Test image with unsqueeze: {test_image.unsqueeze(0).shape} ")

max_pool = nn.MaxPool2d(kernel_size=2)

# pass data through conv_layer
test_conv_out = conv_layer(test_image.unsqueeze(dim=0))
print(f"Test image conved : {test_conv_out.shape} ")

#pass data through Maxpool layer


Test image shape: torch.Size([3, 64, 64]) 
Test image with unsqueeze: torch.Size([1, 3, 64, 64]) 
Test image conved : torch.Size([1, 10, 64, 64]) 


In [23]:
torch.manual_seed(42)
# create a random tendsor with similar no of dimensions
random_tensor = torch.randn(1,1,2,2)
random_tensor
max_pool_layer = nn.MaxPool2d(kernel_size=2)
max_pooled_tensor = max_pool_layer(random_tensor)
max_pooled_tensor.shape


torch.Size([1, 1, 1, 1])

In [24]:
model_3 = FashionMnistModelV2(input_shape=1,
                            hidden_units=8,
                             output_shape=len(class_names)).to(device)

In [25]:
image,label = train_data[0]
image.shape

torch.Size([1, 28, 28])

In [26]:
image = image.unsqueeze(dim=0)
print(f"Image post Unsqueezee  : {image.shape}")
res = model_3(image)
print(f"Image size post conv : {res.shape}")

Image post Unsqueezee  : torch.Size([1, 1, 28, 28])
Image size post conv : torch.Size([1, 10])


### Setup loss_fn and optimizer

In [27]:
## Setup loss_fn and optimizer
from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_3.parameters(), 
                            lr= 0.001)

### Training

In [28]:
torch.manual_seed(42)

from timeit import default_timer as timer
start = timer()

epochs = 3

for epoch in tqdm(range(epochs)):
    print(f"Epoch : {epoch}*************\n")
    train_step(model=model_3,
              dataloader=train_dataloader,
              loss_fn=loss_fn,
              accuracy_fn=accuracy_fn,
              optimizer=optimizer,
              device=device)
    test_step(model=model_3,
             data_loader=test_dataloader,
             loss_fn=loss_fn,
             accuracy_fn = accuracy_fn,
             device= device)

  0%|                                                                    | 0/3 [00:00<?, ?it/s]

Epoch : 0*************

Looked at 0/ 60000 samples
Looked at 12800/ 60000 samples
Looked at 25600/ 60000 samples
Looked at 38400/ 60000 samples
Looked at 51200/ 60000 samples
Train loss: 0.001229 | Train acc : 0.006668% 




  0%|                                                                  | 0/313 [00:00<?, ?it/s][A
 12%|██████▉                                                 | 39/313 [00:00<00:00, 385.02it/s][A
 26%|██████████████▎                                         | 80/313 [00:00<00:00, 396.62it/s][A
 39%|█████████████████████▎                                 | 121/313 [00:00<00:00, 402.37it/s][A
 52%|████████████████████████████▍                          | 162/313 [00:00<00:00, 404.50it/s][A
 65%|███████████████████████████████████▋                   | 203/313 [00:00<00:00, 405.62it/s][A
 78%|██████████████████████████████████████████▉            | 244/313 [00:00<00:00, 405.73it/s][A
100%|███████████████████████████████████████████████████████| 313/313 [00:00<00:00, 404.18it/s][A
 33%|████████████████████                                        | 1/3 [00:22<00:45, 22.90s/it]

Test loss: 2.302711 | Test acc : 9.994010% 

Epoch : 1*************

Looked at 0/ 60000 samples
Looked at 12800/ 60000 samples
Looked at 25600/ 60000 samples
Looked at 38400/ 60000 samples
Looked at 51200/ 60000 samples
Train loss: 0.001228 | Train acc : 0.003335% 




  0%|                                                                  | 0/313 [00:00<?, ?it/s][A
 13%|███████▎                                                | 41/313 [00:00<00:00, 401.30it/s][A
 27%|██████████████▊                                         | 83/313 [00:00<00:00, 407.07it/s][A
 40%|█████████████████████▊                                 | 124/313 [00:00<00:00, 406.80it/s][A
 53%|████████████████████████████▉                          | 165/313 [00:00<00:00, 407.21it/s][A
 66%|████████████████████████████████████▎                  | 207/313 [00:00<00:00, 409.41it/s][A
 80%|███████████████████████████████████████████▊           | 249/313 [00:00<00:00, 410.74it/s][A
100%|███████████████████████████████████████████████████████| 313/313 [00:00<00:00, 408.79it/s][A
 67%|████████████████████████████████████████                    | 2/3 [00:45<00:22, 22.82s/it]

Test loss: 2.302711 | Test acc : 9.994010% 

Epoch : 2*************

Looked at 0/ 60000 samples
Looked at 12800/ 60000 samples
Looked at 25600/ 60000 samples
Looked at 38400/ 60000 samples
Looked at 51200/ 60000 samples
Train loss: 0.001233 | Train acc : 0.003335% 




  0%|                                                                  | 0/313 [00:00<?, ?it/s][A
 13%|███████▎                                                | 41/313 [00:00<00:00, 406.12it/s][A
 26%|██████████████▋                                         | 82/313 [00:00<00:00, 405.44it/s][A
 39%|█████████████████████▌                                 | 123/313 [00:00<00:00, 406.28it/s][A
 52%|████████████████████████████▊                          | 164/313 [00:00<00:00, 401.38it/s][A
 65%|████████████████████████████████████                   | 205/313 [00:00<00:00, 403.62it/s][A
 79%|███████████████████████████████████████████▍           | 247/313 [00:00<00:00, 406.20it/s][A
100%|███████████████████████████████████████████████████████| 313/313 [00:00<00:00, 406.51it/s][A
100%|████████████████████████████████████████████████████████████| 3/3 [01:08<00:00, 22.83s/it]

Test loss: 2.302711 | Test acc : 9.994010% 






In [30]:
model_2_results = eval_model(model=model_2,
                            data_loader=test_dataloader,
                            loss_fn=loss_fn,
                            accuracy_fn=accuracy_fn)

100%|███████████████████████████████████████████████████████| 313/313 [00:00<00:00, 399.81it/s]


In [31]:
model_2_results

{'model_name': 'FashionMnistModelV2',
 'model_loss': 2.302710771560669,
 'model_acc': 9.994009584664537}

In [32]:
import pandas as pd
compare_results = pd.DataFrame(model_1_results, 
                               model_2_results)
compare_results

Unnamed: 0,model_name,model_loss,model_acc
model_name,FashionMnistModelV1,0.903419,66.813099
model_loss,FashionMnistModelV1,0.903419,66.813099
model_acc,FashionMnistModelV1,0.903419,66.813099


In [29]:
#