# Importing needed libraries

In [1]:
import numpy as np 
import pandas as pd 
import time
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Subset
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, transforms
from tqdm import tqdm

import matplotlib.pyplot as plt

# Importing needed libraries

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 45455953.42it/s] 


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


# GoogLeNet Model 

In [3]:
class Inception(nn.Module):
    
    def __init__(self, in_channels=3, use_auxiliary=True, num_classes=1000):
        super(Inception, self).__init__()
        
        self.conv1 = ConvBlock(in_channels, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = ConvBlock(64, 192, kernel_size=3, stride=1, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception3a = InceptionBlock(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = InceptionBlock(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception4a = InceptionBlock(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = InceptionBlock(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = InceptionBlock(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = InceptionBlock(512, 112, 144, 288, 32, 64, 64)
        self.auxiliary4a = Auxiliary(512, num_classes)
        
        self.inception4e = InceptionBlock(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception5a = InceptionBlock(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = InceptionBlock(832, 384, 192, 384, 48, 128, 128)
        
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.dropout = nn.Dropout(0.4)
        self.linear = nn.Linear(1024, num_classes)
        
        self.use_auxiliary = use_auxiliary
        if use_auxiliary:
            self.auxiliary4d = Auxiliary(528, num_classes)

    def forward(self, x):
        auxiliary_outputs = []

        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        
        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.maxpool3(x)
        
        x = self.inception4a(x)
        auxiliary_outputs.append(self.auxiliary4a(x))
        
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        if self.use_auxiliary:
            auxiliary_outputs.append(self.auxiliary4d(x))
        
        x = self.inception4e(x)
        x = self.maxpool4(x)
        
        x = self.inception5a(x)
        x = self.inception5b(x)
        
        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.dropout(x)
        
        x = self.linear(x)
        x = F.softmax(x, dim=1)
        if self.use_auxiliary:
            return x, auxiliary_outputs
        else:
            return x
        


class ConvBlock(nn.Module):
    
    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))
    
    
    

class InceptionBlock(nn.Module):
    
    def __init__(self, im_channels, num_1x1, num_3x3_red, num_3x3, num_5x5_red, num_5x5, num_pool_proj):
        super(InceptionBlock, self).__init__()
        
        self.one_by_one = ConvBlock(im_channels, num_1x1, kernel_size=1)
        
        self.tree_by_three_red = ConvBlock(im_channels, num_3x3_red, kernel_size=1)  
        self.tree_by_three = ConvBlock(num_3x3_red, num_3x3, kernel_size=3, padding=1)
        
        self.five_by_five_red = ConvBlock(im_channels, num_5x5_red, kernel_size=1)
        self.five_by_five = ConvBlock(num_5x5_red, num_5x5, kernel_size=5, padding=2)
        
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.pool_proj = ConvBlock(im_channels, num_pool_proj, kernel_size=1)
         
    def forward(self, x):
        x1 = self.one_by_one(x)
        
        x2 = self.tree_by_three_red(x)
        x2 = self.tree_by_three(x2)
        
        x3 = self.five_by_five_red(x)
        x3 = self.five_by_five(x3)
        
        x4 = self.maxpool(x)
        x4 = self.pool_proj(x4)
        
        x = torch.cat([x1, x2, x3, x4], 1)
        return x
    
    
    
class Auxiliary(nn.Module):
    
    def __init__(self, in_channels, num_classes):
        super(Auxiliary, self).__init__()
        self.avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv1x1 = ConvBlock(in_channels, 128, kernel_size=1)
        
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        
        self.dropout = nn.Dropout(0.7)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.avgpool(x)
        x = self.conv1x1(x)
        x = x.reshape(x.shape[0], -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x
    
    
    

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
num_classes = 10

# Instantiate the model
model = Inception(in_channels=3, num_classes=num_classes).to(device)


# Define the data loaders
dataloaders = {"train": train_loader, "val": test_loader}

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Define the scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [6]:
def train_model(model, dataloaders, criterion, optimizer, lr_scheduler, num_epochs=5, use_auxiliary=True):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    since = time.time()
    val_acc = []
    train_loss = []
    train_acc = []
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            # Use tqdm for a progress bar
            data_loader = tqdm(dataloaders[phase], desc=f'{phase.capitalize()} Epoch {epoch}')
            for inputs, labels in data_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    if use_auxiliary:
                        outputs, aux_outs = model(inputs)
                        loss = criterion(outputs, labels) + 0.3 * criterion(aux_outs[0], labels) + 0.3 * criterion(aux_outs[1], labels)
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                        batch_loss = loss.item()
                        batch_acc = torch.sum(preds == labels.data).double() / len(labels)

                        train_loss.append(batch_loss)
                        train_acc.append(batch_acc)

                        data_loader.set_postfix({'Train Loss': batch_loss, 'Train Acc': batch_acc.item()})

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            if phase == 'val':
                lr_scheduler.step(epoch_loss)
                val_acc.append(epoch_acc)

            print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:.4f}')

    # Load best model weights
    model.load_state_dict(best_model_wts)

    return model, val_acc, train_loss, train_acc

In [None]:
model, val_acc, train_loss, train_acc = train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=20)

Epoch 0/19
----------


Train Epoch 0: 100%|██████████| 782/782 [04:29<00:00,  2.90it/s, Train Loss=3.46, Train Acc=0.375] 


Train Loss: 3.5627 Acc: 0.2586


Val Epoch 0: 100%|██████████| 157/157 [00:23<00:00,  6.57it/s]


Val Loss: 3.6698 Acc: 0.1164

Epoch 1/19
----------


Train Epoch 1: 100%|██████████| 782/782 [04:24<00:00,  2.96it/s, Train Loss=3.21, Train Acc=0.438]


Train Loss: 3.3378 Acc: 0.4494


Val Epoch 1: 100%|██████████| 157/157 [00:23<00:00,  6.57it/s]


Val Loss: 3.6400 Acc: 0.1635

Epoch 2/19
----------


Train Epoch 2: 100%|██████████| 782/782 [04:23<00:00,  2.97it/s, Train Loss=3.17, Train Acc=0.562]


Train Loss: 3.1845 Acc: 0.5423


Val Epoch 2: 100%|██████████| 157/157 [00:23<00:00,  6.55it/s]


Val Loss: 3.5888 Acc: 0.2189

Epoch 3/19
----------


Train Epoch 3: 100%|██████████| 782/782 [04:23<00:00,  2.97it/s, Train Loss=3.2, Train Acc=0.438] 


Train Loss: 3.0912 Acc: 0.5825


Val Epoch 3: 100%|██████████| 157/157 [00:24<00:00,  6.47it/s]


Val Loss: 3.6216 Acc: 0.1819

Epoch 4/19
----------


Train Epoch 4: 100%|██████████| 782/782 [04:22<00:00,  2.98it/s, Train Loss=3.3, Train Acc=0.375] 


Train Loss: 3.0142 Acc: 0.6259


Val Epoch 4: 100%|██████████| 157/157 [00:23<00:00,  6.61it/s]


Val Loss: 3.5776 Acc: 0.2100

Epoch 5/19
----------


Train Epoch 5: 100%|██████████| 782/782 [04:22<00:00,  2.98it/s, Train Loss=3.07, Train Acc=0.688]


Train Loss: 2.9426 Acc: 0.6767


Val Epoch 5: 100%|██████████| 157/157 [00:23<00:00,  6.54it/s]


Val Loss: 3.5525 Acc: 0.2265

Epoch 6/19
----------


Train Epoch 6: 100%|██████████| 782/782 [04:22<00:00,  2.98it/s, Train Loss=2.84, Train Acc=0.75] 


Train Loss: 2.8831 Acc: 0.7152


Val Epoch 6: 100%|██████████| 157/157 [00:24<00:00,  6.53it/s]


Val Loss: 3.5250 Acc: 0.2458

Epoch 7/19
----------


Train Epoch 7: 100%|██████████| 782/782 [04:21<00:00,  2.99it/s, Train Loss=2.9, Train Acc=0.625] 


Train Loss: 2.8352 Acc: 0.7458


Val Epoch 7: 100%|██████████| 157/157 [00:23<00:00,  6.64it/s]


Val Loss: 3.4936 Acc: 0.2644

Epoch 8/19
----------


Train Epoch 8: 100%|██████████| 782/782 [04:20<00:00,  3.00it/s, Train Loss=2.78, Train Acc=0.812]


Train Loss: 2.7903 Acc: 0.7754


Val Epoch 8: 100%|██████████| 157/157 [00:23<00:00,  6.65it/s]


Val Loss: 3.5584 Acc: 0.2088

Epoch 9/19
----------


Train Epoch 9: 100%|██████████| 782/782 [04:20<00:00,  3.00it/s, Train Loss=3.09, Train Acc=0.5]  


Train Loss: 2.7489 Acc: 0.7984


Val Epoch 9: 100%|██████████| 157/157 [00:23<00:00,  6.60it/s]


Val Loss: 3.3851 Acc: 0.3651

Epoch 10/19
----------


Train Epoch 10: 100%|██████████| 782/782 [04:20<00:00,  3.00it/s, Train Loss=2.95, Train Acc=0.625]


Train Loss: 2.7132 Acc: 0.8179


Val Epoch 10: 100%|██████████| 157/157 [00:23<00:00,  6.59it/s]


Val Loss: 3.4173 Acc: 0.3204

Epoch 11/19
----------


Train Epoch 11: 100%|██████████| 782/782 [04:20<00:00,  3.00it/s, Train Loss=2.82, Train Acc=0.688]


Train Loss: 2.6847 Acc: 0.8315


Val Epoch 11: 100%|██████████| 157/157 [00:23<00:00,  6.61it/s]


Val Loss: 3.4602 Acc: 0.2862

Epoch 12/19
----------


Train Epoch 12: 100%|██████████| 782/782 [04:20<00:00,  3.00it/s, Train Loss=2.48, Train Acc=1]    


Train Loss: 2.6527 Acc: 0.8507


Val Epoch 12: 100%|██████████| 157/157 [00:23<00:00,  6.61it/s]


Val Loss: 3.4304 Acc: 0.3181

Epoch 13/19
----------


Train Epoch 13: 100%|██████████| 782/782 [04:20<00:00,  3.00it/s, Train Loss=2.77, Train Acc=0.75] 


Train Loss: 2.6305 Acc: 0.8618


Val Epoch 13: 100%|██████████| 157/157 [00:23<00:00,  6.61it/s]


Val Loss: 3.3746 Acc: 0.3476

Epoch 14/19
----------


Train Epoch 14:  82%|████████▏ | 643/782 [03:34<00:46,  3.00it/s, Train Loss=2.58, Train Acc=0.859]

In [None]:
val_acc= np.array([acc.cpu().numpy() for acc in val_acc])
train_acc = np.array([acc.cpu().numpy() for acc in train_acc])
train_loss_epc = [sum(train_loss[i:i+782]) / 782 for i in range(0, len(train_loss), 782)]
train_acc_epc = [sum(train_acc[i:i+782]) / 782 for i in range(0, len(train_acc), 782)]
plt.figure(figsize=(12, 4))

# Plot Validation Accuracy
plt.subplot(1, 3, 1)
plt.plot((val_acc), label='Validation Accuracy', marker='o')
plt.title('Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot Training Loss
plt.subplot(1, 3, 2)
plt.plot(np.array(train_loss_epc), label='Training Loss', marker='o')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot Training Accuracy
plt.subplot(1, 3, 3)
plt.plot((train_acc_epc), label='Training Accuracy', marker='o')
plt.title('Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# ResNet Model

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.conv1 = ConvBlock(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1)
        self.conv2 = ConvBlock(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1)
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out = out + residual
        out = self.relu(out)
        return out

In [None]:
class ResNet34(nn.Module):
    def __init__(self, block, layers, num_classes = 10):
        super(ResNet34, self).__init__()
        self.inplanes = 64
        self.conv1 = ConvBlock(3, 64, kernel_size = 7, stride = 2, padding = 3)
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._make_layer(block, 64, layers[0], stride = 1)
        self.layer1 = self._make_layer(block, 128, layers[1], stride = 2)
        self.layer2 = self._make_layer(block, 256, layers[2], stride = 2)
        self.layer3 = self._make_layer(block, 512, layers[3], stride = 2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512, num_classes)
        
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),)
            
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [None]:
model = ResNet34(ResidualBlock, [3, 4, 6, 3]).to(device)

In [None]:
def train_resnet34(model, dataloaders, criterion, optimizer, scheduler, num_epochs=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    since = time.time()
    val_acc = []
    train_loss = []
    train_acc = []
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            # Use tqdm for a progress bar
            data_loader = tqdm(dataloaders[phase], desc=f'{phase.capitalize()} Epoch {epoch}')
            for inputs, labels in data_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                        batch_loss = loss.item()
                        batch_acc = torch.sum(preds == labels.data).double() / len(labels)

                        train_loss.append(batch_loss)
                        train_acc.append(batch_acc)

                        data_loader.set_postfix({'Train Loss': batch_loss, 'Train Acc': batch_acc.item()})

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            if phase == 'val':
                scheduler.step(epoch_loss)
                val_acc.append(epoch_acc)

            print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
                # Save the best model
                torch.save(model.state_dict(), 'best_resnet34.pth')

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:.4f}')

    model.load_state_dict(best_model_wts)
    return model, val_acc, train_loss, train_acc

In [None]:
model, val_acc, train_loss, train_acc = train_resnet34(model, trainloader, criterion, optimizer, scheduler, num_epochs=10)