<a href="https://colab.research.google.com/github/mehrshad-sdtn/DeepLearning/blob/master/PyTorch/4_Pytorch_GoogLeNet_Inception.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# prompt: import all the necessary packages for common pytorch programs

import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import models
import os


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
class GoogLeNet(nn.Module):
  def __init__(self, num_classes=1000):
    super(GoogLeNet, self).__init__()
    self.conv1 = ConvBlock(3, 64, kernel_size=7, stride=2, padding=3)
    self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.conv2 = ConvBlock(64, 192, kernel_size=3, stride=1, padding=1)
    self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.inception3a = InceptionBlock(192, 64, 96, 128, 16, 32, 32)
    self.inception3b = InceptionBlock(256, 128, 128, 192, 32, 96, 64)
    self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.inception4a = InceptionBlock(480, 192, 96, 208, 16, 48, 64)
    self.inception4b = InceptionBlock(512, 160, 112, 224, 24, 64, 64)
    self.inception4c = InceptionBlock(512, 128, 128, 256, 24, 64, 64)
    self.inception4d = InceptionBlock(512, 112, 144, 288, 32, 64, 64)
    self.inception4e = InceptionBlock(528, 256, 160, 320, 32, 128, 128)
    self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    self.inception5a = InceptionBlock(832, 256, 160, 320, 32, 128, 128)
    self.inception5b = InceptionBlock(832, 384, 192, 384, 48, 128, 128)

    self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
    self.dropout = nn.Dropout(p=0.4)
    self.fc = nn.Linear(1024, num_classes)


  def forward(self, x):
    x = self.conv1(x)
    x = self.maxpool1(x)
    x = self.conv2(x)
    x = self.maxpool2(x)

    x = self.inception3a(x)
    x = self.inception3b(x)
    x = self.maxpool3(x)

    x = self.inception4a(x)
    x = self.inception4b(x)
    x = self.inception4c(x)
    x = self.inception4d(x)
    x = self.inception4e(x)
    x = self.maxpool4(x)

    x = self.inception5a(x)
    x = self.inception5b(x)
    x = self.avgpool(x)
    x = x.reshape(x.shape[0], -1)
    x = self.dropout(x)
    x = self.fc(x)
    return x





class InceptionBlock(nn.Module):
  def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool):
    super(InceptionBlock, self).__init__()
    self.branch1 = ConvBlock(in_channels, out_1x1, kernel_size=1)
    self.branch2 = nn.Sequential(
        ConvBlock(in_channels, red_3x3, kernel_size=1),
        ConvBlock(red_3x3, out_3x3, kernel_size=3, padding=1)
        )

    self.branch3 = nn.Sequential(
        ConvBlock(in_channels, red_5x5, kernel_size=1),
        ConvBlock(red_5x5, out_5x5, kernel_size=5, padding=2)
        )

    self.branch4 = nn.Sequential(
        nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
        ConvBlock(in_channels, out_1x1pool, kernel_size=1)
        )


  def forward(self, x):
    branch1 = self.branch1(x)
    branch2 = self.branch2(x)
    branch3 = self.branch3(x)
    branch4 = self.branch4(x)
    return torch.cat((branch1, branch2, branch3, branch4), dim=1)




class ConvBlock(nn.Module):
  def __init__(self, in_channels, out_channels, **kwargs):
    super(ConvBlock, self).__init__()
    self.relu = nn.ReLU()
    self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
    self.batchnorm = nn.BatchNorm2d(out_channels)

  def forward(self, x):
    return self.relu(self.batchnorm(self.conv(x)))

In [4]:
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
  print("=> Saving checkpoint")
  torch.save(state, filename)


def load_checkpoint(checkpoint, model):
  print("=> Loading checkpoint")
  model.load_state_dict(checkpoint['state_dict'])
  optimizer.load_state_dict(checkpoint['state_dict'])



In [5]:
def test():
  net = GoogLeNet()
  x = torch.randn(64, 3, 224, 224)
  y = net(x).to(device)
  print(y.shape)

test()

torch.Size([64, 1000])


In [6]:
transform_train = transforms.Compose([
    transforms.Resize(224),
    #transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_val = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the CIFAR10 training and validation datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
train_loader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

valset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val)
val_loader = DataLoader(valset, batch_size=64, shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 12821985.78it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [7]:
len(train_loader)

782

In [11]:
model = GoogLeNet()

model = model.to(device)

# Define a loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 4

In [12]:
def check_accuracy_and_loss(data_loader, model, criterion, device=device):
    num_correct = 0
    num_samples = 0
    total_loss = 0.0
    model.eval()
    with torch.no_grad():
        for x, y in data_loader:
            x = x.to(device=device)
            y = y.to(device=device)
            scores = model(x)
            _, predictions = scores.max(1)
            total_loss += criterion(scores, y).item()
            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)
    avg_loss = total_loss / num_samples
    accuracy = (num_correct / num_samples) * 100
    return accuracy, avg_loss


for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}/{num_epochs}')
    print('-' * 10)

    model.train()
    train_loss = 0.0
    num_batches = len(train_loader)

    for batch_idx, (data, targets) in enumerate(train_loader):
        if batch_idx % 25 == 0:
          print('=', end='')
        # 1) data and targets -> device
        data = data.to(device)
        targets = targets.to(device)

        # 2) optimizer init
        optimizer.zero_grad()

        # 3) forward prop
        outputs = model(data)
        loss = criterion(outputs, targets)

        # 4) backward prop
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # Calculate average training loss
    avg_train_loss = train_loss / num_batches

    # Calculate accuracy and loss for training and validation data
    train_acc, train_avg_loss = check_accuracy_and_loss(train_loader, model, criterion, device=device)
    validation_acc, validation_avg_loss = check_accuracy_and_loss(val_loader, model, criterion, device=device)
    print('\n')
    print(f'Train Accuracy: {train_acc:.2f}% - Validation Accuracy: {validation_acc:.2f}%')
    print(f'Train Loss: {avg_train_loss:.4f} - Validation Loss: {validation_avg_loss:.4f}')





Epoch 1/4
----------

Train Accuracy: 59.66% - Validation Accuracy: 58.95%
Train Loss: 1.4015 - Validation Loss: 0.0186
Epoch 2/4
----------

Train Accuracy: 71.09% - Validation Accuracy: 69.70%
Train Loss: 0.8761 - Validation Loss: 0.0137
Epoch 3/4
----------

Train Accuracy: 80.02% - Validation Accuracy: 76.95%
Train Loss: 0.6601 - Validation Loss: 0.0106
Epoch 4/4
----------

Train Accuracy: 81.47% - Validation Accuracy: 78.09%
Train Loss: 0.5420 - Validation Loss: 0.0100
