# Galaxy Classification with CNN (Pytorch)

Data:

https://www.kaggle.com/c/galaxy-zoo-the-galaxy-challenge



References:

1. https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

2. https://pytorch.org/vision/stable/models.html

3. https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

4. https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html

5. https://cs231n.github.io/transfer-learning/



## Check GPU

In [1]:
!nvidia-smi -L

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-448b9431-2f32-dc6b-875e-7c9fcd58f584)


## Import libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms, utils
import torch.optim as optim
from torch.optim import lr_scheduler

import time
import os
import zipfile
from copy import deepcopy

%matplotlib inline

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


## Unzip images

Before running the code, please upload `train_46183.zip` and `test_15395.zip` to the workspace.

We upzip these 2 files, and place all training images (46,183 images) into the folder `images_train`, and all test images (15,395 images) into the folder `images_test`.

We create the folders if they don't exist.

In [3]:
## Unzip training images
train_dir = 'images_train'
# create dir if not exist
if not os.path.exists(train_dir):
    os.makedirs(train_dir)

zip_ref = zipfile.ZipFile('train_46183.zip', 'r')
zip_ref.extractall(path=train_dir) # unzip
zip_ref.close()

In [4]:
## Unzip test images
test_dir = 'images_test'
# create dir if not exist
if not os.path.exists(test_dir):
    os.makedirs(test_dir)

zip_ref = zipfile.ZipFile('test_15395.zip', 'r')
zip_ref.extractall(path=test_dir) # unzip
zip_ref.close()

## Import custom datasets

In [3]:
## Custom Galaxy Zoo Dataset
class GalaxyZooDataset(Dataset):
    """Galaxy Zoo Dataset"""

    def __init__(self, csv_file, images_dir, transform=None):
        """
        Args:
            csv_file (string): path to the label csv
            images_dir (string): path to the dir containing all images
            transform (callable, optional): transform to apply
        """
        self.labels_df = pd.read_csv(csv_file)
        self.images_dir = images_dir
        self.transform = transform
    
    def __len__(self):
        """
        Returns the size of the dataset
        """
        return len(self.labels_df)

    def __getitem__(self, idx):
        """
        Get the idx-th sample.
		Outputs the image (channel first) and the true label
        """
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # galaxy ID
        galaxyid = self.labels_df.iloc[idx, 0].astype(str)
		# path of the image
        image_path = os.path.join(self.images_dir, galaxyid + '.jpg')
		# read the image
        image = Image.open(image_path)
		# apply transform (optional)
        if self.transform is not None:
            image = self.transform(image)
		# read the true label
        label = int(self.labels_df.iloc[idx, 1])

        return image, label

## Data Augmentation Transforms

In [4]:
def create_data_transforms(is_for_inception=False):
    """
    Create Pytorch data transforms for the GalaxyZoo datasets.
    Args:
        is_for_inception (bool): True for inception neural networks
    Outputs:
        train_transform: transform for the training data
        test_transform: transform for the testing data
    """
    if is_for_inception:
        input_size = 299
    else:
        input_size = 224

    train_transform = transforms.Compose([transforms.CenterCrop(input_size),
                                          transforms.RandomRotation(90),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.RandomVerticalFlip(),
                                          transforms.RandomResizedCrop(input_size, scale=(0.8, 1.0), ratio=(0.99, 1.01)),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0, 0, 0], [255, 255, 255]),
                                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    test_transform = transforms.Compose([transforms.CenterCrop(input_size),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0, 0, 0], [255, 255, 255]),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    
    return train_transform, test_transform

## Parameters

In [5]:
NUM_OF_CLASSES = 5  # there are 5 classes in total
BATCH_SIZE = 32     # batch zize

## Training function

In [19]:
def train_model(model, num_epochs, criterion, optimizer, scheduler, print_every=1, is_for_inception=False):
    """
    Train the model
    Args:
        model: Pytorch neural model
        num_epochs: number of epochs to train
        criterion: the loss function object
        optimizer: the optimizer
        scheduler: the learning rate decay scheduler
        print_every: print the information every X epochs
        is_for_inception: True if the model is an inception model
    """
    # cache the best model
    best_model_weights = deepcopy(model.state_dict())
    # best test acc
    best_test_acc = 0.0
    # best epoch
    best_epoch = -1

    for epoch in range(num_epochs):
        # time of start
        epoch_start_time = time.time()

        """
        Train
        """
        model.train()

        epoch_train_cum_loss = 0.0
        epoch_train_cum_corrects = 0
        
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.long().to(device)

            optimizer.zero_grad()
            
            if is_for_inception:
                pred_logits, aux_outputs = model(images)
                loss = criterion(pred_logits, labels) + 0.4*criterion(aux_outputs, labels)
            else:
                pred_logits = model(images)
                loss = criterion(pred_logits, labels)

            _, pred_classes = torch.max(pred_logits.detach(), dim=1)
            pred_classes = pred_classes.long()

            epoch_train_cum_loss += loss.item() * images.size(0)
            epoch_train_cum_corrects += torch.sum(pred_classes==labels.data)

            loss.backward()
            optimizer.step()
            
        """
        Eval
        """
        model.eval()

        epoch_test_cum_loss = 0.0
        epoch_test_cum_corrects = 0

        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.long().to(device)

            with torch.no_grad():
                pred_logits = model(images)
                _, pred_classes = torch.max(pred_logits.detach(), dim=1)
                loss = criterion(pred_logits, labels)

                epoch_test_cum_loss += loss.item() * images.size(0)
                epoch_test_cum_corrects += torch.sum(pred_classes==labels.data)

        scheduler.step()

        ## Calculate metrics
        train_loss = epoch_train_cum_loss / len(data_train)
        train_acc = epoch_train_cum_corrects / len(data_train)
        test_loss = epoch_test_cum_loss / len(data_test)
        test_acc = epoch_test_cum_corrects / len(data_test)

        # check if is the best acc ever
        if test_acc > best_test_acc:
            best_test_acc = test_acc
            best_epoch = epoch + 1
            # update the best model weights
            best_model_weights = deepcopy(model.state_dict())

        
        epoch_end_time = time.time()
        epoch_time_used = epoch_end_time - epoch_start_time

        ## Print metrics
        if (epoch+1) % print_every == 0:
            print("Epoch {}/{}\tTrain loss: {:.4f}\tTrain acc: {:.3f}\tTest loss: {:.4f}\tTest acc: {:.3f}\tTime: {:.0f}m {:.0f}s".format(
                epoch+1, num_epochs, train_loss, train_acc, test_loss, test_acc, epoch_time_used // 60, epoch_time_used % 60))
    
    # load the best weights into the model
    model.load_state_dict(best_model_weights)
    # print the best epoch
    print("Best epoch = {}, with acc = {:.3f}".format(epoch+1, best_test_acc))
    # return the best model
    return model

## ResNet18 Model

### Model architecture

**Original paper**

Deep Residual Learning for Image Recognition [(arXiv)](https://arxiv.org/abs/1512.03385)

**The last layer**

The last layer of ResNet18 model is called `fc`, with input size = `512`

We replace the last layer with a linear layer by `model.fc = nn.Linear(512, NUM_OF_CLASSES, bias=True)`

In [10]:
## Resnet18 architecture
model = models.resnet18(pretrained=True)
print(model)
# count trainable parameters
print("==========")
print("Number of trainable parameters:")
print("layer1: {}".format(sum(param.numel() for param in model.layer1.parameters() if param.requires_grad)))
print("layer2: {}".format(sum(param.numel() for param in model.layer2.parameters() if param.requires_grad)))
print("layer3: {}".format(sum(param.numel() for param in model.layer3.parameters() if param.requires_grad)))
print("layer4: {}".format(sum(param.numel() for param in model.layer4.parameters() if param.requires_grad)))
print("fc: {}".format(sum(param.numel() for param in model.fc.parameters() if param.requires_grad)))
print("TOTAL: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("==========")
# free the space
del model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

### Create transforms and dataloaders

We set `is_for_inception` to `False`

In [17]:
# create transform
train_transform, test_transform = create_data_transforms(is_for_inception=False)

# create dataset
data_train = GalaxyZooDataset('class_labels_train_46183_C5.csv', 'images_train', train_transform)
data_test = GalaxyZooDataset('class_labels_test_15395_C5.csv', 'images_test', test_transform)

# dataloader
train_loader = DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=True)

# check the sizes
print("Number of training data: {} ({} batches)".format(len(data_train), len(train_loader)))
print("Number of testing data: {} ({} batches)".format(len(data_test), len(test_loader)))

Number of training data: 46183 (1444 batches)
Number of testing data: 15395 (482 batches)


### Train

In [20]:
## Download the pre-trained resnet18 model
model = models.resnet18(pretrained=True)

# freeze the weights
for param in model.parameters():
    param.requires_grad = False

# change the last fc layer
model.fc = nn.Linear(512, NUM_OF_CLASSES)
print(model.fc) # print the modified last layer

print("============")
print("Training the last layer only")
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("============")

# move to gpu
model = model.to(device)
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

## train and return the best model
model = train_model(model, 3, criterion, optimizer, scheduler, print_every=1, is_for_inception=False)

## Save the best weights
torch.save(model.state_dict(), 'resnet18_trained.pth')

Linear(in_features=512, out_features=5, bias=True)
Training the last layer only
Number of trainable parameters: 2565
Epoch 1/3	Train loss: 0.9468	Train acc: 0.620	Test loss: 0.8935	Test acc: 0.641	Time: 5m 2s
Epoch 2/3	Train loss: 0.9159	Train acc: 0.629	Test loss: 0.8815	Test acc: 0.646	Time: 5m 1s
Epoch 3/3	Train loss: 0.9059	Train acc: 0.633	Test loss: 0.8848	Test acc: 0.643	Time: 5m 1s
Best epoch = 3, with acc = 0.645534


### Fine-tuning (layer 3 & 4)

In [26]:
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(512, NUM_OF_CLASSES)
model.load_state_dict(torch.load('resnet18_trained.pth'))

# freeze the weights of the last block
for param in model.parameters():
    param.requires_grad = False

In [27]:
"""
Fine-tuning
"""
# unfreeze the weights
for unfreezing_part in [model.layer3, model.layer4, model.fc]:
    for param in unfreezing_part.parameters():
        param.requires_grad = True

print("============")
print("Fine Tuning the whole ResNet model")
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("============")

# move to gpu
model = model.to(device)
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=5e-4)
# scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
# train
model = train_model(model, 20, criterion, optimizer, scheduler, print_every=1, is_for_inception=False)

## Save the best weights
torch.save(model.state_dict(), 'resnet18_tuned_layer34.pth')

Fine Tuning the whole ResNet model
Number of trainable parameters: 10496005
Epoch 1/20	Train loss: 0.8795	Train acc: 0.644	Test loss: 0.8471	Test acc: 0.643	Time: 5m 6s
Epoch 2/20	Train loss: 0.8163	Train acc: 0.669	Test loss: 0.7750	Test acc: 0.688	Time: 5m 5s
Epoch 3/20	Train loss: 0.7837	Train acc: 0.683	Test loss: 0.7762	Test acc: 0.693	Time: 5m 4s
Epoch 4/20	Train loss: 0.7568	Train acc: 0.693	Test loss: 0.7304	Test acc: 0.703	Time: 5m 4s
Epoch 5/20	Train loss: 0.7271	Train acc: 0.705	Test loss: 0.6917	Test acc: 0.724	Time: 5m 5s
Epoch 6/20	Train loss: 0.7034	Train acc: 0.714	Test loss: 0.6823	Test acc: 0.730	Time: 5m 6s
Epoch 7/20	Train loss: 0.6811	Train acc: 0.723	Test loss: 0.6810	Test acc: 0.722	Time: 5m 6s
Epoch 8/20	Train loss: 0.6625	Train acc: 0.736	Test loss: 0.6467	Test acc: 0.741	Time: 5m 5s
Epoch 9/20	Train loss: 0.6472	Train acc: 0.740	Test loss: 0.6304	Test acc: 0.746	Time: 5m 9s
Epoch 10/20	Train loss: 0.6350	Train acc: 0.745	Test loss: 0.6361	Test acc: 0.746	Time:

## ResNet50 Model

### Model architecture

In [28]:
## Resnet50 architecture
model = models.resnet50(pretrained=True)
print(model)
# count trainable parameters
print("==========")
print("Number of trainable parameters:")
print("layer1: {}".format(sum(param.numel() for param in model.layer1.parameters() if param.requires_grad)))
print("layer2: {}".format(sum(param.numel() for param in model.layer2.parameters() if param.requires_grad)))
print("layer3: {}".format(sum(param.numel() for param in model.layer3.parameters() if param.requires_grad)))
print("layer4: {}".format(sum(param.numel() for param in model.layer4.parameters() if param.requires_grad)))
print("fc: {}".format(sum(param.numel() for param in model.fc.parameters() if param.requires_grad)))
print("TOTAL: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("==========")
# free the space
del model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

### Create transforms and dataloaders

We set `is_for_inception` to `False`

In [29]:
# create transform
train_transform, test_transform = create_data_transforms(is_for_inception=False)

# create dataset
data_train = GalaxyZooDataset('class_labels_train_46183_C5.csv', 'images_train', train_transform)
data_test = GalaxyZooDataset('class_labels_test_15395_C5.csv', 'images_test', test_transform)

# dataloader
train_loader = DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=True)

# check the sizes
print("Number of training data: {} ({} batches)".format(len(data_train), len(train_loader)))
print("Number of testing data: {} ({} batches)".format(len(data_test), len(test_loader)))

Number of training data: 46183 (1444 batches)
Number of testing data: 15395 (482 batches)


### Train

In [30]:
## Download the pre-trained resnet50 model
model = models.resnet50(pretrained=True)

# freeze the weights
for param in model.parameters():
    param.requires_grad = False

# change the last fc layer
model.fc = nn.Linear(2048, NUM_OF_CLASSES)
print(model.fc) # print the modified last layer

print("============")
print("Training the last layer only")
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("============")

# move to gpu
model = model.to(device)
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

## train and return the best model
model = train_model(model, 3, criterion, optimizer, scheduler, print_every=1, is_for_inception=False)

## Save the best weights
torch.save(model.state_dict(), 'resnet50_trained.pth')

Linear(in_features=2048, out_features=5, bias=True)
Training the last layer only
Number of trainable parameters: 10245
Epoch 1/3	Train loss: 0.9365	Train acc: 0.623	Test loss: 0.9907	Test acc: 0.623	Time: 6m 9s
Epoch 2/3	Train loss: 0.8988	Train acc: 0.638	Test loss: 0.8946	Test acc: 0.624	Time: 6m 8s
Epoch 3/3	Train loss: 0.8900	Train acc: 0.637	Test loss: 0.8781	Test acc: 0.650	Time: 6m 9s
Best epoch = 3, with acc = 0.650341


### Fine-tuning (layers 3 & 4)

In [31]:
model = models.resnet50(pretrained=False)
model.fc = nn.Linear(2048, NUM_OF_CLASSES)
model.load_state_dict(torch.load('resnet50_trained.pth'))

# freeze the weights of the last block
for param in model.parameters():
    param.requires_grad = False

In [32]:
"""
Fine-tuning
"""
# unfreeze the weights
for unfreezing_part in [model.layer3, model.layer4, model.fc]:
    for param in unfreezing_part.parameters():
        param.requires_grad = True

print("============")
print("Fine Tuning")
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("============")

# move to gpu
model = model.to(device)
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=5e-4)
# scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
# train
model = train_model(model, 40, criterion, optimizer, scheduler, print_every=1, is_for_inception=False)

## Save the best weights
torch.save(model.state_dict(), 'resnet50_tuned_layer34.pth')

Fine Tuning
Number of trainable parameters: 22073349
Epoch 1/40	Train loss: 0.7667	Train acc: 0.699	Test loss: 0.7226	Test acc: 0.716	Time: 6m 18s
Epoch 2/40	Train loss: 0.6746	Train acc: 0.733	Test loss: 0.6444	Test acc: 0.742	Time: 6m 18s
Epoch 3/40	Train loss: 0.6385	Train acc: 0.747	Test loss: 0.5993	Test acc: 0.761	Time: 6m 19s
Epoch 4/40	Train loss: 0.6108	Train acc: 0.757	Test loss: 0.5874	Test acc: 0.768	Time: 6m 18s
Epoch 5/40	Train loss: 0.5867	Train acc: 0.767	Test loss: 0.5689	Test acc: 0.770	Time: 6m 19s
Epoch 6/40	Train loss: 0.5737	Train acc: 0.772	Test loss: 0.5534	Test acc: 0.783	Time: 6m 19s
Epoch 7/40	Train loss: 0.5571	Train acc: 0.778	Test loss: 0.5446	Test acc: 0.782	Time: 6m 18s
Epoch 8/40	Train loss: 0.5435	Train acc: 0.780	Test loss: 0.5362	Test acc: 0.786	Time: 6m 18s
Epoch 9/40	Train loss: 0.5355	Train acc: 0.788	Test loss: 0.5190	Test acc: 0.792	Time: 6m 19s
Epoch 10/40	Train loss: 0.5266	Train acc: 0.789	Test loss: 0.5120	Test acc: 0.794	Time: 6m 20s
Epoch 

## VGG-16-bn Model

### Model architecture

**Original paper**

Very Deep Convolutional Netrowks for Large-Scale Image Recognition [(arXiv)](https://arxiv.org/abs/1409.1556)

**The last layer**

The last layer of VGG-16 model is called `classifier[6]`, with input size = `4096`

We replace the last layer with a linear layer by `model.classifier[6] = nn.Linear(4096, NUM_OF_CLASSES, bias=True)`

In [None]:
## VGG-16 (bn) architecture
model = models.vgg16_bn(pretrained=True)
print(model)
# count trainable parameters
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
# free the space
del model

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

### Create transforms and dataloaders

In [33]:
# create transform
train_transform, test_transform = create_data_transforms(is_for_inception=False)

# create dataset
data_train = GalaxyZooDataset('class_labels_train_46183_C5.csv', 'images_train', train_transform)
data_test = GalaxyZooDataset('class_labels_test_15395_C5.csv', 'images_test', test_transform)

# dataloader
train_loader = DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=True)

# check the sizes
print("Number of training data: {} ({} batches)".format(len(data_train), len(train_loader)))
print("Number of testing data: {} ({} batches)".format(len(data_test), len(test_loader)))

Number of training data: 46183 (1444 batches)
Number of testing data: 15395 (482 batches)


### Train

In [34]:
## Download the pre-trained VGG16 model
model = models.vgg16_bn(pretrained=True)

# freeze the weights
for param in model.parameters():
    param.requires_grad = False

# change the last fc layer
model.classifier[6] = nn.Linear(4096, NUM_OF_CLASSES)
print(model.classifier[6]) # print the modified last layer

print("============")
print("Training the last layer only")
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("============")

# move to gpu
model = model.to(device)
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

## train and return the best model
model = train_model(model, 3, criterion, optimizer, scheduler, print_every=1, is_for_inception=False)

## Save the best weights
torch.save(model.state_dict(), 'vgg16bn_trained.pth')

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

Linear(in_features=4096, out_features=5, bias=True)
Training the last layer only
Number of trainable parameters: 20485
Epoch 1/3	Train loss: 0.9857	Train acc: 0.607	Test loss: 0.9278	Test acc: 0.631	Time: 7m 22s
Epoch 2/3	Train loss: 0.9851	Train acc: 0.605	Test loss: 0.9081	Test acc: 0.637	Time: 7m 22s
Epoch 3/3	Train loss: 0.9789	Train acc: 0.607	Test loss: 0.8949	Test acc: 0.634	Time: 7m 23s
Best epoch = 3, with acc = 0.637220


### Fine-tuning

In [35]:
model = models.vgg16_bn(pretrained=False)
model.classifier[6] = nn.Linear(4096, NUM_OF_CLASSES)
model.load_state_dict(torch.load('vgg16bn_trained.pth'))

# freeze all weights
for param in model.parameters():
    param.requires_grad = False

In [36]:
"""
Fine-tuning
"""
# unfreeze the weights
for param in model.classifier.parameters():
    param.requires_grad = True

print("============")
print("Fine-Tuning")
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("============")

# move to gpu
model = model.to(device)
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=5e-4)
# scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
# train
model = train_model(model, 50, criterion, optimizer, scheduler, print_every=1, is_for_inception=False)

## Save the best weights
torch.save(model.state_dict(), 'vgg16bn_tuned_clf.pth')

Fine-Tuning
Number of trainable parameters: 119566341
Epoch 1/50	Train loss: 1.0103	Train acc: 0.606	Test loss: 0.9384	Test acc: 0.637	Time: 7m 24s
Epoch 2/50	Train loss: 0.9278	Train acc: 0.631	Test loss: 0.8991	Test acc: 0.650	Time: 7m 24s
Epoch 3/50	Train loss: 0.9173	Train acc: 0.638	Test loss: 0.8628	Test acc: 0.656	Time: 7m 24s
Epoch 4/50	Train loss: 0.9010	Train acc: 0.640	Test loss: 0.8930	Test acc: 0.664	Time: 7m 24s
Epoch 5/50	Train loss: 0.8911	Train acc: 0.644	Test loss: 0.8891	Test acc: 0.657	Time: 7m 24s
Epoch 6/50	Train loss: 0.8827	Train acc: 0.649	Test loss: 0.8600	Test acc: 0.666	Time: 7m 24s
Epoch 7/50	Train loss: 0.8722	Train acc: 0.651	Test loss: 0.8549	Test acc: 0.666	Time: 7m 24s
Epoch 8/50	Train loss: 0.8686	Train acc: 0.650	Test loss: 0.8620	Test acc: 0.662	Time: 7m 24s
Epoch 9/50	Train loss: 0.8650	Train acc: 0.654	Test loss: 0.8609	Test acc: 0.652	Time: 7m 24s
Epoch 10/50	Train loss: 0.8582	Train acc: 0.654	Test loss: 0.8645	Test acc: 0.660	Time: 7m 24s
Epoch

## Inception v3 Model

### Model architecture

**Original paper**

Rethinking the Inception Architecture for Computer Vision [(arXiv)](https://arxiv.org/abs/1512.00567)

**The last layer**

The last layer of Inception v3 model is called `fc`, with input size = `2048`

We replace the last layer with a linear layer by `model.fc = nn.Linear(2048, NUM_OF_CLASSES, bias=True)`

**The auxiliary layer**

In addition to the `fc` layer, the model has a second output layer `AuxLogits.fc`, with input size = `768`

We replace the auxiliary layer with a linear layer by `model.AuxLogits.fc = nn.Linear(768, NUM_OF_CLASSES, bias=True)`

In [16]:
## Inception v3 architecture
model = models.inception_v3(pretrained=True)
print(model)
# count trainable parameters
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
# free the space
del model

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

### Create transforms and dataloaders

In [37]:
# create transform
train_transform, test_transform = create_data_transforms(is_for_inception=True)

# create dataset
data_train = GalaxyZooDataset('class_labels_train_46183_C5.csv', 'images_train', train_transform)
data_test = GalaxyZooDataset('class_labels_test_15395_C5.csv', 'images_test', test_transform)

# dataloader
train_loader = DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=True)

# check the sizes
print("Number of training data: {} ({} batches)".format(len(data_train), len(train_loader)))
print("Number of testing data: {} ({} batches)".format(len(data_test), len(test_loader)))

Number of training data: 46183 (1444 batches)
Number of testing data: 15395 (482 batches)


### Train

In [38]:
## Download the pre-trained resnet18 model
model = models.inception_v3(pretrained=True)

# freeze the weights
for param in model.parameters():
    param.requires_grad = False

# change the last fc layer
model.fc = nn.Linear(2048, NUM_OF_CLASSES, bias=True)
print(model.fc) # print the modified last layer

# change the aux fc layer
model.AuxLogits.fc = nn.Linear(768, NUM_OF_CLASSES, bias=True)
print(model.AuxLogits.fc) # print the modified aux fc layer

print("============")
print("Training the last layer + aux layer")
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("============")

# move to gpu
model = model.to(device)
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

## train and return the best model
model = train_model(model, 3, criterion, optimizer, scheduler, print_every=1, is_for_inception=True)

## Save the best weights
torch.save(model.state_dict(), 'inceptionv3_trained.pth')

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


  0%|          | 0.00/104M [00:00<?, ?B/s]

Linear(in_features=2048, out_features=5, bias=True)
Linear(in_features=768, out_features=5, bias=True)
Training the last layer + aux layer
Number of trainable parameters: 14090
Epoch 1/3	Train loss: 1.2747	Train acc: 0.637	Test loss: 0.8713	Test acc: 0.669	Time: 8m 36s
Epoch 2/3	Train loss: 1.2469	Train acc: 0.640	Test loss: 0.8418	Test acc: 0.673	Time: 8m 35s
Epoch 3/3	Train loss: 1.2317	Train acc: 0.644	Test loss: 0.8420	Test acc: 0.671	Time: 8m 35s
Best epoch = 3, with acc = 0.673465


### Fine-tuning

In [39]:
model = models.inception_v3(pretrained=False)
model.fc = nn.Linear(2048, NUM_OF_CLASSES, bias=True)
model.AuxLogits.fc = nn.Linear(768, NUM_OF_CLASSES, bias=True)
model.load_state_dict(torch.load('inceptionv3_trained.pth'))

# freeze the weights of the last block
for param in model.parameters():
    param.requires_grad = False



In [40]:
"""
Fine-tuning
"""
# unfreeze the weights
for unfreezing_part in [model.Mixed_7a, model.Mixed_7b, model.Mixed_7c, model.AuxLogits, model.fc]:
    for param in unfreezing_part.parameters():
        param.requires_grad = True

print("============")
print("Fine-Tuning")
print("Number of trainable parameters: {}".format(sum(param.numel() for param in model.parameters() if param.requires_grad)))
print("============")

# move to gpu
model = model.to(device)
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=5e-4)
# scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
# train
model = train_model(model, 40, criterion, optimizer, scheduler, print_every=1, is_for_inception=True)

## Save the best weights
torch.save(model.state_dict(), 'inceptionv3_tuned_mixed7.pth')

Fine-Tuning
Number of trainable parameters: 15391498
Epoch 1/40	Train loss: 1.0270	Train acc: 0.718	Test loss: 0.6711	Test acc: 0.737	Time: 8m 46s
Epoch 2/40	Train loss: 0.9316	Train acc: 0.743	Test loss: 0.6246	Test acc: 0.757	Time: 8m 45s
Epoch 3/40	Train loss: 0.9047	Train acc: 0.748	Test loss: 0.6171	Test acc: 0.759	Time: 8m 44s
Epoch 4/40	Train loss: 0.8863	Train acc: 0.754	Test loss: 0.6055	Test acc: 0.761	Time: 8m 44s
Epoch 5/40	Train loss: 0.8638	Train acc: 0.762	Test loss: 0.5898	Test acc: 0.771	Time: 8m 44s
Epoch 6/40	Train loss: 0.8547	Train acc: 0.764	Test loss: 0.5772	Test acc: 0.773	Time: 8m 45s
Epoch 7/40	Train loss: 0.8398	Train acc: 0.767	Test loss: 0.5822	Test acc: 0.772	Time: 8m 45s
Epoch 8/40	Train loss: 0.8271	Train acc: 0.770	Test loss: 0.5893	Test acc: 0.772	Time: 8m 44s
Epoch 9/40	Train loss: 0.8169	Train acc: 0.774	Test loss: 0.5766	Test acc: 0.770	Time: 8m 44s
Epoch 10/40	Train loss: 0.8077	Train acc: 0.776	Test loss: 0.5813	Test acc: 0.772	Time: 8m 45s
Epoch 

KeyboardInterrupt: ignored