<a href="https://colab.research.google.com/github/saqib-rgb/ML-With-Pytorch/blob/main/Going_Modular.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Let us get some data

In [79]:
# we will start by getting the uselful libraries
import os
import requests
import zipfile
from pathlib import Path

# setup path to data folder
data_path=Path('data/')
image_path=data_path/'pizza_steak_sushi'

#let us write some code to soenload some data if it doesnot exist already
if image_path.is_dir():
  print(f"{image_path} directory already exists no need to create one")
else:
  print(f'Did not find {image_path} downloading just now....')
  image_path.mkdir(parents=True,exist_ok=True)

# downloading the data with help of context manager 
with open (data_path/'pizza_steak_sushi.zip','wb') as f:
  request=requests.get('https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip')
  print(f'Downloading the data from the link right now....')
  f.write(request.content)

#Time to unzip the file
with zipfile.ZipFile(data_path/'pizza_steak_sushi.zip','r') as zip_ref:
  print('unzipping the file downloaded abobe right now....')
  zip_ref.extractall(image_path)

# as the extraction has been done no need to keep the zip file
print(f'Removing the zipfile from directory right now...')
os.remove(data_path/"pizza_steak_sushi.zip")
print('Congratulations the zip file has been remove successfully !!')




data/pizza_steak_sushi directory already exists no need to create one
Downloading the data from the link right now....
unzipping the file downloaded abobe right now....
Removing the zipfile from directory right now...
Congratulations the zip file has been remove successfully !!


In [80]:
# let us create folders for training and testin datasets
train_dir=image_path/'train'
test_dir=image_path/'test'

# 2. Creating Datasets and Dataloaders

As in the section above we have already got our data and it is in the folder shape required.

In this ection we will be forming a dataset from these and than convert them to dataloaders for batch wise training

In [81]:
# importing the required libraries
from torchvision import datasets,transforms

# let us create a transform so that our data becomes goof enough to be accepted by PyTorch
data_transform=transforms.Compose([
    transforms.Resize(size=(64,64)),
    transforms.ToTensor()
])

# Using ImageFolder to create datasets
train_data=datasets.ImageFolder(
    root=train_dir,
    transform=data_transform
)
test_data=datasets.ImageFolder(
    root=test_dir,
    transform=data_transform
)

print(f'Train data:\n {train_data}\n Test data:{test_data}')

Train data:
 Dataset ImageFolder
    Number of datapoints: 225
    Root location: data/pizza_steak_sushi/train
    StandardTransform
Transform: Compose(
               Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )
 Test data:Dataset ImageFolder
    Number of datapoints: 75
    Root location: data/pizza_steak_sushi/test
    StandardTransform
Transform: Compose(
               Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )


In [82]:
# Now let us get the class names and class dictionaries
class_names=train_data.classes
print(class_names)
class_dict=train_data.class_to_idx
print(class_dict)
# we can also check the lenghth our train and test data
print(len(train_data),len(test_data))

['pizza', 'steak', 'sushi']
{'pizza': 0, 'steak': 1, 'sushi': 2}
225 75


In [83]:
from torch.utils.data import DataLoader
import os
# Now let us set the hyperparameters
BATCH_SIZE=32
NUM_WORKERS=os.cpu_count()
# Now it is time to change our datasets to batch form using DataLoaders
train_dataloader=DataLoader(dataset=train_data,
                            batch_size=BATCH_SIZE,
                            num_workers=NUM_WORKERS,
                            shuffle=True)
test_dataloader=DataLoader(dataset=test_data,
                           batch_size=BATCH_SIZE,
                           num_workers=NUM_WORKERS,
                           shuffle=False)

# let us see what has been created
train_dataloader,test_dataloader

(<torch.utils.data.dataloader.DataLoader at 0x7f1584969490>,
 <torch.utils.data.dataloader.DataLoader at 0x7f1584969940>)

In [84]:
# Noe let us see what has been created by checking a single image size
img,label=next(iter(train_dataloader))

print(f'Image shape:{img.shape}')
print(f'label shape:{label.shape}')

Image shape:torch.Size([32, 3, 64, 64])
label shape:torch.Size([32])


# 3. Making a TinyVGG Model

In [85]:
import torch 
from torch import nn

class TinyVGG(nn.Module):
  """
  Creates a TinyVGG architecture.

  This model replicates the TinyVGG from CNN explainer website

  Args:
  input_shape: An integer indicating the number of input channels
  hidden_units: An integer indicating the number of hidden units
  ouput_shape: An integer telling us the number of output channels
  """
  def __init__(self,
               input_shape:int,
               hidden_units:int,
               output_shape:int)->None:
      super().__init__()
      self.conv_block_1=nn.Sequential(
          nn.Conv2d(in_channels=input_shape,
                    out_channels=hidden_units,
                    kernel_size=3,
                    stride=1,
                    padding=0),
          nn.ReLU(),
          nn.Conv2d(in_channels=hidden_units,
                    out_channels=hidden_units,
                    kernel_size=3,
                    stride=1,
                    padding=0),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2,
                       stride=2)

      )
      self.conv_block_2=nn.Sequential(
          nn.Conv2d(in_channels=hidden_units,
                    out_channels=hidden_units,
                    kernel_size=3,
                    stride=1,
                    padding=0),
          nn.ReLU(),
          nn.Conv2d(in_channels=hidden_units,
                    out_channels=hidden_units,
                    kernel_size=3,
                    stride=1,
                    padding=0),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=3,
                       stride=2)
          
      )
      self.Classifier=nn.Sequential(
          nn.Flatten(),
          nn.Linear(in_features=hidden_units*12*12,
                    out_features=output_shape)
      )

  def forward(self,x:torch.Tensor):
    x=self.conv_block_1(x)
    # print(x.shape)
    x=self.conv_block_2(x)
    # print(x.shape)
    x=self.Classifier(x)
    # print(x.shape)
    return x


In [86]:
# let us createa device agnostic code
device='cuda' if torch.cuda.is_available() else 'cpu'
# Now let us create an instance of our first model
model_0=TinyVGG(input_shape=3,
                hidden_units=10,
                output_shape=len(class_names)).to(device)

model_0

TinyVGG(
  (conv_block_1): Sequential(
    (0): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (Classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=1440, out_features=3, bias=True)
  )
)

# Now let us set the correct number of hidden units using torchinfo


In [87]:
# Now colab doesnot have torchinfo by default so we have to install it letus doit
try:
  import torchinfo
except:
  !pip install torchinfo
  import torchinfo

from torchinfo import summary
summary(model_0,input_size=[32,3,64,64])

  action_fn=lambda data: sys.getsizeof(data.storage()),
  return super().__sizeof__() + self.nbytes()


Layer (type:depth-idx)                   Output Shape              Param #
TinyVGG                                  [32, 3]                   --
├─Sequential: 1-1                        [32, 10, 30, 30]          --
│    └─Conv2d: 2-1                       [32, 10, 62, 62]          280
│    └─ReLU: 2-2                         [32, 10, 62, 62]          --
│    └─Conv2d: 2-3                       [32, 10, 60, 60]          910
│    └─ReLU: 2-4                         [32, 10, 60, 60]          --
│    └─MaxPool2d: 2-5                    [32, 10, 30, 30]          --
├─Sequential: 1-2                        [32, 10, 12, 12]          --
│    └─Conv2d: 2-6                       [32, 10, 28, 28]          910
│    └─ReLU: 2-7                         [32, 10, 28, 28]          --
│    └─Conv2d: 2-8                       [32, 10, 26, 26]          910
│    └─ReLU: 2-9                         [32, 10, 26, 26]          --
│    └─MaxPool2d: 2-10                   [32, 10, 12, 12]          --
├─Sequentia

# 4. Now it is time for us to do training and testing by creating two functions `train_step()` and `test_step()`

In [95]:
# importing the librraies required
from typing import Tuple
def train_step(
    model:torch.nn.Module,
    dataloader:torch.utils.data.DataLoader,
    loss_fn:torch.nn.Module,
    optimizer:torch.optim.Optimizer,
    device:torch.device)->Tuple[float,float]:
    """Trains a PyTorch model for single epoch
    
    Turns a PyTorch model to training mode and then runs through all the 
    required training steps (forward pass, loss caclulation, optimizer step)

    Args:
    model : A PyTorch model to be trained
    dataloader: Dataloader instance to be trained
    loss_fn: A loss function to minimize
    optimizer: A Pytorch optimizer to minimize the loss
    device: Which target device to compute on

    Retruns:
    A tuple of training loss and training accuracies
    In the form of (train_loss,train_accuracy).

    """
   # let us start by putting thr model in training mode
    model.train()
    # Set of train loss and train accuracy values
    train_loss,train_acc=0,0
    # Loop through the batches of dataloader instances
    for batch,(X,y) in enumerate(dataloader):
      # send all data to traget device
      X,y=X.to(device),y.to(device)

      # 1. Make a forward pass
      y_pred=model(X)

      # 2. calculate the loss
      loss=loss_fn(y_pred,y)
      train_loss+=loss.item()

      # 3. Optimizer zero grad
      optimizer.zero_grad()

      # 4. Loss backward
      loss.backward

      # 5. optimizer step
      optimizer.step()

      # Calculate and accumulate metric losss
      y_pred_class=torch.argmax(torch.softmax(y_pred,dim=1),dim=1)
      train_acc+=(y_pred_class==y).sum().item()/len(y_pred)


  # Adjust the train loss and train acc per batch
    train_loss=train_loss/len(dataloader)
    train_acc=train_acc/len(dataloader)
    return train_loss,train_acc
    



In [96]:
# Now let us build a function for test step
def test_step(model:torch.nn.Module,
              dataloader:torch.utils.data.DataLoader,
              loss_fn:torch.nn.Module,
              device:torch.device):
  """Test a PyTorch model on test data for one epoch

  Turns a target PyTorch model to eval mode and then performs
  a forward pass on the data 

  Args:
  model: A PyTorch model that is to be trained
  dataloader: A DataLoader instance for the model
  loss_fn: A PyTorch loss function
  device: The devivce to run the target 

  Returns:
   A tuple of testing loss and testing accuracy of the form
   (test_loss,test_accuracy)

  """

  # Let us start by putiing the model to eval mode
  model.eval()

  # let us initiate test_loss, test_acc
  test_loss,test_acc=0,0

  #turn on the infernce mode
  with torch.inference_mode():
    # creating a for loop to go throughthe instance of DataLoader
    for batch,(X,y) in enumerate(dataloader):
      # sending all data to target device
      X,y=X.to(device),y.to(device)

      # 1. MAke a forward pass
      y_pred=model(X)

      # 2. Calculate the loss
      loss=loss_fn(y_pred,y)
      test_loss+=loss.item()

      # Calculate the accuracy
      test_pred_labels=y_pred.argmax(dim=1)
      test_acc+=(test_pred_labels==y).sum().item()/len(test_pred_labels)

  
  # adjust the loss and accuracy to loss per batch
  test_loss=test_loss/len(dataloader)
  test_acc=test_acc/len(dataloader)

  return test_loss,test_acc





# Now it is time for us to start building out final `train()` function

In [97]:
# Importing thr required libraries
from typing import Dict,List
from tqdm.auto import tqdm

# defininga function
def train(model:torch.nn.Module,
          train_dataloader:torch.utils.data.DataLoader,
          test_dataloader:torch.utils.data.DataLoader,
          optimizer:torch.optim.Optimizer,
          loss_fn:torch.nn.Module,
          epochs:int,
          device:torch.device)-> Dict[str,List[float]]:
    """Trains a test a PyTorch model for the epochs defined

    It passes a PyTorch model through a train and test step functions fora number of epochs

    Args:
    model: A PyTorch model to be trained and tested
    train_dataloader: A DataLoader instance on which model is to be trained
    test_dataloader: A DataLoader instance on ehich model is to be tested
    optimizer: A Pytorch function to minimize the loss
    loss_fn: A PyTorch function to calculate the loss
    epochs: Number of times the data is supposed to pass through for training purposes
    device: The target device to run the model

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
                  train_acc: [...],
                  test_loss: [...],
                  test_acc: [...]}

    """

    # Let us create a list of empty dictioaries to store the values of test/train losses and accuracies for each epoch
    results={'train_loss':[],
             'train_acc':[],
             'test_loss':[],
             'test_acc':[]}
    
    # noe let us loop through the training and testing loops for each epoch
    for epoch in tqdm(range(epochs)):
      train_loss,train_acc=train_step(
          model=model,
          dataloader=train_dataloader,
          loss_fn=loss_fn,
          optimizer=optimizer,
          device=device
      )
      test_loss,test_acc=test_step(
          model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device
          
      )
      # let us print out what is happening
      print(
          f'Epoch:{epoch+1}|'
          f'train_loss:{train_loss}|'
          f"train_acc:{train_acc}|"
          f'test_loss:{test_loss}|'
          f'test_acc{test_acc}'
      )
      # Time to update the dictionary after every epoch
      results['train_loss'].append(train_loss)
      results['train_acc'].append(train_acc)
      results['test_loss'].append(test_loss)
      results['test_acc'].append(test_acc)


    return results




# 5.Function to save the built model



In [98]:
# Importing th libraries
from pathlib import Path

def save_model(
    model:torch.nn.Module,
    target_dir:str,
    model_name:str):
  """Save the PyTorch model to target directory

  Args: A PyTorch model to save
  target_dir: A directory for saving the model
  model_name: A file name for the saved model should include '.pth'

  """

  # Create a target directory
  target_dir_path=Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)
  
  # create model save path
  assert model_name.endswith('.pth') or model_name.endswith('.pt'),'model name should either end in .pth or .pt'
  model_save_path=target_dir_path/model_name

  # save the model state_dict()
  print(f'[INFO] saving model path to:{model_save_path}')
  torch.save(obj=model.state_dict(),
             f=model_save_path)


In [100]:
# Train evaluate and save the model
NUM_EPOCHS=5

# Recreating an instance of TinyVGG
model_0=TinyVGG(input_shape=3,
                hidden_units=10,
                output_shape=len(train_data.classes)).to(device)

# Setting up loss function and optimizer
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(params=model_0.parameters(),
                           lr=0.001)

# Start the timer
from timeit import default_timer as timer
start_time=timer()

# Train model_0
model_0_results=train(
    model=model_0,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    loss_fn=loss_fn,
    optimizer=optimizer,
    epochs=NUM_EPOCHS,
    device=device
)

end_time=timer()
print(f'The model took {start_time-end_time} to train')

# save the model
save_model(model=model_0,
           target_dir='models',
           model_name='going_modular.pth')

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:1|train_loss:1.0933694243431091|train_acc:0.40234375|test_loss:1.0798007249832153|test_acc0.5416666666666666
Epoch:2|train_loss:1.1051973104476929|train_acc:0.28125|test_loss:1.0798007249832153|test_acc0.5416666666666666
Epoch:3|train_loss:1.0934048295021057|train_acc:0.40234375|test_loss:1.0798007249832153|test_acc0.5416666666666666
Epoch:4|train_loss:1.0934325605630875|train_acc:0.40234375|test_loss:1.0798007249832153|test_acc0.5416666666666666
Epoch:5|train_loss:1.0926230996847153|train_acc:0.40234375|test_loss:1.0798007249832153|test_acc0.5416666666666666
The model took -12.005202604999795 to train
[INFO] saving model path to:models/going_modular.pth
