In [11]:
# by using this below code, we dont need to download the kaggle dataset , we can directly use from kaggle
!pip install opendatasets --quiet
import opendatasets as od
od.download("https://www.kaggle.com/datasets/zalando-research/fashionmnist", quiet=True)

Skipping, found downloaded files in "./fashionmnist" (use force=True to force download)


# This will be our architectural flow of Transfer Learning -

In [20]:
# importing libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
import torch.optim as optim

In [21]:
# every run will give (almost) the same results
torch.manual_seed(42) # 42 random numbers

# check for GPU or CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [22]:
df = pd.read_csv("/content/fashionmnist/fashion-mnist_train.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# train and test split
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2,random_state=42)

In [36]:
X_train = torch.tensor(X_train.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)

X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.long)

The inference transforms are available at VGG16_Weights.IMAGENET1K_V1.transforms and perform the following preprocessing operations: Accepts PIL.Image, batched (B, C, H, W) and single (C, H, W) image torch.Tensor objects. The images are resized to resize_size=[256] using interpolation=InterpolationMode.BILINEAR, followed by a central crop of crop_size=[224]. Finally the values are first rescaled to [0.0, 1.0] and then normalized using mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].

===============================

Refer link - https://docs.pytorch.org/vision/main/models/generated/torchvision.models.vgg16.html

In [37]:
# we need to first apply these 4 transformations

# 1- from PIL image,resize it to (3,256,256)
# 2- crop the centre (3,224,224)
# 3- scale it and convert it to pytorch tensor
# 4- normalize it ( p = p-mean/S.D.) for every RGB image

# so to do these steps we dont need to manually code , we already have pre-defined libraries for it in torchvision

from torchvision.transforms import transforms

custom_transform = transforms.Compose([
    transforms.Resize(256),   # here 256, so after , we need to use interpolation technique which is by default already added so we dont need to write it
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
])

In [55]:
from PIL import Image # PIL = Pillow -> It’s the go-to library for working with images in Python.

class CustomDataset(Dataset):
  def __init__(self,input_data,output_data,transform):
    self.input_data = input_data
    self.output_data = output_data
    self.transform = transform
  def __len__(self):
    return len(self.input_data)
  def __getitem__(self,index):
    # change (resize) the image from 1d( array) to 2d (28,28)
    image = self.input_data[index].reshape(28,28)
    # change datatype to np.uint8
    # mul(255)->multiplying by 255 converts it back to the 0–255 pixel range (the standard for RGB images).
    # .clamp(0, 255) -> This ensures no pixel value goes below 0 or above 255, just in case some math operations pushed them out of bounds.
    # .to(torch.uint8) ->Converts the tensor type from float32 to uint8
    # .cpu() -> as it doesnot require GPU to do these operations
    # .numpy() -> Finally converts the PyTorch tensor → NumPy array.
    image = image.mul(255).clamp(0, 255).to(torch.uint8).cpu().numpy()
    # change black and white image to colour (RGB)
    # means if array size is ([1,2],[3,4])  --> [image]*3 --> ([1,2],[3,4]) ([1,2],[3,4]) ([1,2],[3,4])

    #( PIL Image needs its image to be in this format -> (Height,Width,Channel)) but we have in this format image = (C,H,W) so axis=-1 will put C into last
    image = np.stack([image]*3,axis=-1) # [image]*3 means single channel is converted to 3 channels ( means RGB)

    # change tensor to PIL Image( PIL Image needs its image to be in this format -> (Height,Width,Channel))
    image = Image.fromarray(image)
    # apply transforms like on image -->

    #transforms.Resize(256),   # here 256, so after , we need to use interpolation technique which is by default already added so we dont need to write it
    #transforms.CenterCrop(224),
    #transforms.ToTensor(),
    #transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
    image = self.transform(image)

    # return the index
    return image,torch.tensor(self.output_data[index],dtype=torch.long) # image is already converted to tensor

# we now need to create object of this class and it will automatically call the constructor that is __init__ method
train_dataset = CustomDataset(X_train,y_train,transform=custom_transform)
test_dataset = CustomDataset(X_test,y_test,transform=custom_transform)

# now we need to call the DataLoader class --> which will create mini batches (in our case we took 32 batches )-> like this --> DataLoader(Dataset)
train_loader = DataLoader(train_dataset,batch_size=32,shuffle=True,pin_memory=True)
test_loader = DataLoader(test_dataset,batch_size=32,shuffle=False,pin_memory=True ) # because when we predict output , we dont want to shuffle the data

In [56]:
# we dont require MyNN(nn.Module) class as we will use pre-defined model that is vgg16

# we will fetch the pre-trained model here
import torchvision.models as models
vgg16 = models.vgg16(pretrained=True)

In [53]:
vgg16.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [48]:
vgg16.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [57]:
# also as we are using pre-trained model, so we will freeze the feature_extraction code we did in CNN
for i in vgg16.features.parameters():
  i.requires_grad = False     # means stop the learning

# now next part after feature_extraction was classification layer , so we will change the pre-trained classification layer with our own classification layer
vgg16.classifier = nn.Sequential(
    nn.Linear(25088,1024), # this is the input layer , 25088 already we are sending in input layer of vgg16 , 1024 hidden layer
    nn.ReLU(),
    nn.Dropout(p=0.5),

    nn.Linear(1024,512),
    nn.ReLU(),
    nn.Dropout(p=0.5),

    nn.Linear(512,10) # this is the output layer
)

vgg16.to(device)

learning_rate = 0.0001
epochs = 10

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16.classifier.parameters(), lr=learning_rate) # it will only update on classification layer section

In [58]:
# training loop

for epoch in range(epochs):

  total_epoch_loss = 0

  for batch_features, batch_labels in train_loader:

    # move data to gpu
    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    # forward pass
    outputs = vgg16(batch_features)

    print(outputs.shape)
    print(batch_labels.shape)

    # calculate loss
    loss = criterion(outputs, batch_labels)

    # back pass
    optimizer.zero_grad()
    loss.backward()

    # update grads
    optimizer.step()

    total_epoch_loss = total_epoch_loss + loss.item()

    break

  avg_loss = total_epoch_loss/len(train_loader)
  print(f'Epoch: {epoch + 1} , Loss: {avg_loss}')


  return image,torch.tensor(self.output_data[index],dtype=torch.long) # image is already converted to tensor


torch.Size([32, 10])
torch.Size([32])
Epoch: 1 , Loss: 0.0015475444793701173
torch.Size([32, 10])
torch.Size([32])
Epoch: 2 , Loss: 0.0015096898078918457
torch.Size([32, 10])
torch.Size([32])
Epoch: 3 , Loss: 0.0014662373860677084
torch.Size([32, 10])
torch.Size([32])
Epoch: 4 , Loss: 0.001503777027130127
torch.Size([32, 10])
torch.Size([32])
Epoch: 5 , Loss: 0.0013937266667683919
torch.Size([32, 10])
torch.Size([32])
Epoch: 6 , Loss: 0.0014046985308329265
torch.Size([32, 10])
torch.Size([32])
Epoch: 7 , Loss: 0.0012932125727335613
torch.Size([32, 10])
torch.Size([32])
Epoch: 8 , Loss: 0.0013265833854675293
torch.Size([32, 10])
torch.Size([32])
Epoch: 9 , Loss: 0.0011647053559621175
torch.Size([32, 10])
torch.Size([32])
Epoch: 10 , Loss: 0.0011886253356933594
