In [None]:
!pip install timm

Sources for the code: my brain, stackoverflow, ChatGPT for error trouble shooting

In [None]:
!pip install datasets

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import time
import timm # library with pretrained models
from datasets import load_dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.utils.data import Dataset
from PIL import Image

# CNN Part

## 1: Download tinyImageNet
- data set charactaristics (200 image classes)

In [None]:
tiny_imagenet = load_dataset('Maysee/tiny-imagenet')
train_dataset = load_dataset('Maysee/tiny-imagenet', split='train')
#consider valid as test split
valid_test_dataset = load_dataset('Maysee/tiny-imagenet', split='valid')


In [None]:
#check output
valid_test_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 10000
})

In [None]:
#check output
train_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 100000
})

##  2: Use a small and large version of existing image classification models,
- i.e. pre-trained models, including VGG-19, ResNet50V2, and Inceptionv4 (these all seem large)
- Then, run them on the tinyImageNet.

- Large vs small image classification models



### VGG - 19 model (large version)
- Architecture: 19 layers, with stack of convolutional layers, followed by 3 fully connected layers, uses 3x3 convolutional filters with strife and pad of 1, and max pooling layers
- Usage: good for image classification because of its deep and straightforward architecure

VGG - 11 model (small version)
- 11 layers instead of 11
- 8 conv layers and 3 fully connected ones

In [None]:
from torchvision.models import vgg19
vgg19_model = vgg19(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:10<00:00, 56.3MB/s]


In [None]:
#need to adjust the final classification layer of VGG
# classifier is a sequential module containing several layers
# vgg19_model.classifier[6] is accessing the 7th layer in the sequence (the final fully connected layer)
#I have 19 layer model, the first layer has already learned, so I can freeze them, only interested in training last model
#right now its backpropgating through all layers, so want to freeze all layers except the one I modified, using VGG because it;s oretrained, minimal gain from doing the entire thing, want to just have final layer be modified

#freezing all layers
for param in vgg19_model.parameters():
    # print(param)
    param.requires_grad = False
#unfreeze only last layer bc model is pretrained
# vgg19_model.classifier.requires_grad=True

#so instead of modifying final layer
# vgg19_model.classifier = nn.Linear(25088, 200)
vgg19_model.classifier[6] = nn.Linear(4096, 200)
#nn.Linear(4096,200) is modifying it to be a linear layer with 4096 input features and 200 output features (num classes )

In [None]:
vec = torch.randn(1,3,32,32)
vgg19_model(vec)

In [None]:
#data loader with trasnform to convert to PIL image and make sure they are in RGB format before loading into the mode

class custom_data_loader(Dataset):
    def __init__(self, data, transform=None):
      #this takes data, which is the dataset object loaded
      #transform can optionally be applied
        self.data = data
        self.transform = transform

    def __len__(self):
      #this returns len of data
        return len(self.data)

    def __getitem__(self, idx):
      #this gets the item (both image and label) for that index
        image, label = self.data[idx]['image'], self.data[idx]['label']

        # convert to PIL image and make sure it's in RGB format
        if isinstance(image, np.ndarray):  # if the image is a numpy array
            image = Image.fromarray(image)

        # if its not already in RGB format, then convert to RGB
        if not isinstance(image, Image.Image):
            # additional handling if image is not in PIL format
            image = Image.fromarray(image)

        if image.mode != 'RGB':
            image = image.convert('RGB')
        # apply transformations
        if self.transform:
            image = self.transform(image)
        return image, label


In [None]:
# define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# apply transformations to the using custom_data_loader function above
train_custom_dataset = custom_data_loader(train_dataset, transform=transform)
valid_custom_dataset = custom_data_loader(valid_test_dataset, transform=transform)

# create data loaders
train_loader = DataLoader(train_custom_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_custom_dataset, batch_size=32, shuffle=False)


In [None]:
#define training and testing functions
def train_model(model, train_loader, criterion, optimizer, epochs=1):
    model.train()
    for epoch in range(epochs):
        #tqdm progress bar
        train_loop = tqdm(train_loader, leave=True, position=0)
        for inputs, labels in train_loop:
          #sending the inputs and labels to the gpu (device =gpu)
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # update progress bar each epoch
            train_loop.set_description(f"Epoch [{epoch+1}/{epochs}]")
            train_loop.set_postfix(loss=loss.item())

def validate_model(model, valid_loader):
    model.eval()
    correct = 0
    total = 0
    valid_loop = tqdm(valid_loader, leave=True, position=0)
    with torch.no_grad():
        for inputs, labels in valid_loop:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # update progress bar description
            valid_loop.set_description("Validating")
    return 100 * correct / total


In [None]:
#check if cuda is availible
torch.cuda.is_available()

True

In [None]:
device = "cuda:0"

In [None]:
#send model to device
vgg19_model = vgg19_model.to(device)

In [None]:
# VGG 19 training and testing with progress bar
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg19_model.parameters(), lr=0.001)
vgg19_start = time.time()
# train the model
train_model(vgg19_model, train_loader, criterion, optimizer, epochs=1)

# validate and report accuracy
vgg19_accuracy = validate_model(vgg19_model, valid_loader)
vgg19_end = time.time()
vgg19_duration = vgg19_end-vgg19_start
print(f"Accuracy of VGG-19 on tinyImageNet: {vgg19_accuracy}%")
print(f"VGG run time {vgg19_duration}")


Epoch [1/1]: 100%|██████████| 3125/3125 [13:34<00:00,  3.83it/s, loss=2.34]
Validating: 100%|██████████| 313/313 [01:21<00:00,  3.86it/s]

Accuracy of VGG-19 on tinyImageNet: 56.63%
VGG run time 895.9603996276855





small version vgg11
- 11 layers instead of 19

In [None]:
vgg11_model = timm.create_model('vgg11', pretrained=True)

model.safetensors:   0%|          | 0.00/531M [00:00<?, ?B/s]

In [None]:
#send model to device
vgg11_model = vgg11_model.to(device)

In [None]:
# VGG 11 training and testing with progress bar
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg11_model.parameters(), lr=0.001)
vgg11_start = time.time()
# train the model
train_model(vgg11_model, train_loader, criterion, optimizer, epochs=1)

# validate and report accuracy
vgg11_accuracy = validate_model(vgg11_model, valid_loader)
vgg11_end = time.time()
vgg11_duration = vgg11_end-vgg11_start
print(f"Accuracy of VGG-11 on tinyImageNet: {vgg11_accuracy}%")
print(f"VGG run time {vgg11_duration}")


Epoch [1/1]: 100%|██████████| 3125/3125 [13:53<00:00,  3.75it/s, loss=5.1]
Validating: 100%|██████████| 313/313 [01:20<00:00,  3.89it/s]

Accuracy of VGG-11 on tinyImageNet: 57.33%
VGG run time 913.5587043762207





Resnet 50 Model (large version)
- Architecture: 50 layers.  residual (skip) connections that gradients flow throug. deep conv network.
- Parameters: 25 million (more efficient with parameter use than VGG-19)
- More effient and faster than VGG


Resnet 18 (small version)
- 18 layers instead of 50, fewer paramters, requires less comp power but is potentially less accurate.


In [None]:
from torchvision.models import resnet50
#load model
resnet50_model = resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 81.2MB/s]


In [None]:
#number of features
num_feats = resnet50_model.fc.in_features
#modify the final classification lauer of resnet for tinyimage net data 200 output classes
resnet50_model.fc = nn.Linear(num_feats,200)

In [None]:
#transfer model to cuda (already checked that cuda is availble
resnet50_model = resnet50_model.to(device)

In [None]:
#train resnet 50 and validate
#training and testing with progress bar
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet50_model.parameters(), lr=0.001)
resnet50_start = time.time()
# train the model
train_model(resnet50_model, train_loader, criterion, optimizer, epochs=1)

# validate and report accuracy
resnet50_model_accuracy = validate_model(resnet50_model, valid_loader)
resnet50_end = time.time()
resnet50_duration = resnet50_end-resnet50_start
print(f"Accuracy of Resnet50 on tinyImageNet: {resnet50_model_accuracy}%")
print(f"VGG run time {resnet50_duration}")


Epoch [1/1]: 100%|██████████| 3125/3125 [20:05<00:00,  2.59it/s, loss=2.73]
Validating: 100%|██████████| 313/313 [00:51<00:00,  6.13it/s]

Accuracy of Resnet50 on tinyImageNet: 20.59%
VGG run time 1256.5556631088257





small resnet (resnet 18)
- 18 layers instead of 50

In [None]:
#small resnet 18
resnet18_model = timm.create_model('resnet18', pretrained=True)

model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

In [None]:
num_feats = resnet18_model.fc.in_features
#modify the final classification lauer of resnet for tinyimage net data 200 output classes
resnet18_model.fc = nn.Linear(num_feats,200)

In [None]:
#send it to device
resnet18_model = resnet18_model.to(device)

In [None]:
#train resnet 18 and validate
#training and testing with progress bar
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet18_model.parameters(), lr=0.001)
resnet18_start = time.time()
# train the model
train_model(resnet18_model, train_loader, criterion, optimizer, epochs=1)

# validate and report accuracy
resnet18_model_accuracy = validate_model(resnet18_model, valid_loader)
resnet18_end = time.time()
resnet18_duration = resnet18_end-resnet18_start
print(f"Accuracy of Resnet18 on tinyImageNet: {resnet18_model_accuracy}%")
print(f"VGG run time {resnet18_duration}")

Epoch [1/1]: 100%|██████████| 3125/3125 [08:38<00:00,  6.02it/s, loss=1.24]
Validating: 100%|██████████| 313/313 [00:34<00:00,  9.14it/s]

Accuracy of Resnet18 on tinyImageNet: 61.84%
VGG run time 553.2150270938873





InceptionV4 model (large version)

Inception models are deep cnns designed for efficent image classification and feature extrction.

- Architecture: uses inception modules that allow it to look at the same image with different receptive field sizes (varying number of filters)
- Paramters: 42 million
- Uses: more paramter efficient than VGG, very flexible model due to modular nature of inception modules
- it incorporates residual connections where Inception V3 does not

InceptionV3 model (small version):
- less complex version of IncpetionV4





In [None]:
#load inception model from timm
inception_model = timm.create_model('inception_v4', pretrained=True)

model.safetensors:   0%|          | 0.00/171M [00:00<?, ?B/s]

In [None]:
print(inception_model)
#print model to find last layer
#the last layer is (last_linear): Linear(in_features=1536, out_features=1000, bias=True)

In [None]:
# modify the final classifier layer to have 200 outputs (tinyImageNet classes)
num_ftrs = inception_model.last_linear.in_features
inception_model.last_linear = nn.Linear(num_ftrs, 200)


In [None]:
#transfer model to GPU
inception_model = inception_model.to(device)

In [None]:
#train and evaluate inception v4 large model
#used adam optimizer (used for all)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(inception_model.parameters(), lr=0.001)
inception_start = time.time()
# train the model
train_model(inception_model, train_loader, criterion, optimizer, epochs=1)

# validate and report accuracy
inception_model_accuracy = validate_model(inception_model, valid_loader)
inception_end = time.time()
inception_duration = inception_end-inception_start
print(f"Accuracy of Inception Model on tinyImageNet: {inception_model_accuracy}%")
print(f"Inception V4 run time {inception_duration}")

Epoch [1/1]: 100%|██████████| 3125/3125 [29:16<00:00,  1.78it/s, loss=4.6]
Validating: 100%|██████████| 313/313 [01:06<00:00,  4.70it/s]

Accuracy of Inception Model on tinyImageNet: 6.13%
Inception run time 1823.1921582221985





In [None]:
#small v3 inception model from timm
inceptionv3_model = timm.create_model('inception_v3', pretrained=True)

model.safetensors:   0%|          | 0.00/95.5M [00:00<?, ?B/s]

In [None]:
#send to device
inceptionv3_model = inceptionv3_model.to(device)

In [None]:
#used adam optimizer (used for all)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(inceptionv3_model.parameters(), lr=0.001)
inceptionv3_start = time.time()
# train the model
train_model(inceptionv3_model, train_loader, criterion, optimizer, epochs=1)

# validate and report accuracy
inceptionv3_model_accuracy = validate_model(inceptionv3_model, valid_loader)
inceptionv3_end = time.time()
inceptionv3_duration = inceptionv3_end-inceptionv3_start
print(f"Accuracy of Inception Model on tinyImageNet: {inceptionv3_model_accuracy}%")
print(f"Inception run time {inceptionv3_duration}")

Epoch [1/1]: 100%|██████████| 3125/3125 [16:20<00:00,  3.19it/s, loss=3.36]
Validating: 100%|██████████| 313/313 [00:44<00:00,  6.99it/s]

Accuracy of Inception Model on tinyImageNet: 32.11%
Inception run time 1024.9125428199768





Now create a dataframe to store the accuracies and execution time to compare models

In [None]:
#large models
cnn_model_dic_large = {"Models":["VGG-19","Resnet50","Inception V4"],"Duration":[vgg19_duration,resnet50_duration,inception_duration],"Accuracy":[vgg19_accuracy,resnet50_model_accuracy,inception_model_accuracy]}

In [None]:
#create df from dic
cnn_df = pd.DataFrame(cnn_model_dic_large)
print(cnn_df)

       Model     Duration  Accuracy
0     VGG-19   895.960400     56.63
1   Resnet50  1256.555663     20.59
2  Inception  1823.192158      6.13


In [None]:
#small model dic
cnn_model_dic_small = {"Small Models":["VGG-11","Resnet18","Inception V3"],"Duration":[vgg11_duration,resnet18_duration,inceptionv3_duration],"Accuracy":[vgg11_accuracy,resnet18_model_accuracy,inceptionv3_model_accuracy]}

In [None]:
#create df from small model dic
cnn_small_df = pd.DataFrame(cnn_model_dic_small)
print(cnn_small_df)

   Small Models     Duration  Accuracy
0        VGG-11   913.558704     57.33
1      Resnet18   553.215027     61.84
2  Inception V3  1024.912543     32.11
