<a href="https://colab.research.google.com/github/samibahig/IFT6390/blob/main/CNN_Densenet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import torch
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import numpy as np
import time
import copy
from sklearn.model_selection import train_test_split
import pandas as pd
from PIL import Image
from sklearn.metrics import confusion_matrix
import sys

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

'''
t = []
for j in range(100):
    a = np.zeros(shape=(6))
    for i in range(6):
        X_train, X_val, y_train, y_val = train_test_split(train['arr_0'], train['arr_1'], test_size=0.3, random_state=j)
        a[i] = np.sum(y_val == i)

    t.append(a.std())
    
print(np.argmin(np.array(t)))
print(np.min(np.array(t)))'''

###################################################################
  
# From the script we ran to see the ransom state that provides the minimal std, we saw that if test_size = 0.3 then rs 6, 56 or 904 are good
# and if test_size = 0.2 then rs 9, 50 or 739 are good

train, test = np.load('train.npz'), np.load('test.npz')

X_train, X_val, y_train, y_val = train_test_split(train['arr_0'], train['arr_1'], test_size=0.3, random_state=42, stratify=train['arr_1'])
X_test = test['arr_0']

mean = (X_train / 255.).mean()
std = (X_train / 255.).std()

# Reshape
X_data = train['arr_0'].reshape(-1,28,28).astype(int)
y_data = train['arr_1']

X_train = X_train.reshape(-1,28,28).astype(int)
X_val = X_val.reshape(-1,28,28).astype(int)
X_test = X_test.reshape(-1,28,28).astype(int)

##################################################################

class DatasetDraws(torch.utils.data.Dataset):
    
    def __init__(self, X, y, transform=None):
        self.X = X
        if y.all() != None:
            self.y = y
        self.transform = transform
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        image = Image.fromarray(self.X[index]).convert('RGB')
        label = self.y[index]
        if self.transform is not None:
            image = self.transform(image)
            
        return image, label

##################################################################

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "densenet"

# Number of classes in the dataset
num_classes = 6

# Batch size for training (change depending on how much memory you have)
batch_size = 16
val_test_batch_size = 16

# Number of epochs to train for
num_epochs = 90
# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
feature_extract = False

confusion = True

continue_ = True

def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False, confusion=confusion, continue_=continue_):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        conf_y = np.zeros(shape=(0))
        conf_val = np.zeros(shape=(0))
        
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                                
                # save for confusion matrix
                if phase == 'val' and confusion == True:
                    conf_y = np.hstack((conf_y, labels.cpu().numpy()))
                    conf_val = np.hstack((conf_val, preds.cpu().numpy()))
                    
                '''if phase == 'val':
                    epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
                    print('{} Acc: {:.4f}'.format(phase, epoch_acc))'''

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
            if phase == 'val' and confusion == True:
                print(confusion_matrix(conf_y, conf_val))
                
            if phase == 'val':
                #scheduler.step(loss)
                pass
                
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model, val_acc_history

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False
            
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        #model_ft = models.resnet34(pretrained=use_pretrained)
        #model_ft = models.resnet50(pretrained=use_pretrained)
        #model_ft = models.resnet101(pretrained=use_pretrained)
        #model_ft = models.resnext101_32x8d(pretrained=use_pretrained)
        #model_ft = models.resnext50_32x4d(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        #model_ft = models.vgg19_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        #model_ft = models.densenet121(pretrained=use_pretrained)
        #model_ft = models.densenet161(pretrained=use_pretrained)
        model_ft = models.densenet201(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

# Send the model to GPU
model_ft = model_ft.to(device)

print("Initializing Datasets and Dataloaders...")

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.

params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.0007, momentum=0.9)
#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer_ft, mode='max', patience=10, factor=0.5)

# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# Transforms
train_transforms = transforms.Compose([
        transforms.Resize(255),
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.RandomAffine(degrees=(0, 30), translate=(0.1, 0.25)),
        transforms.ToTensor(),
        transforms.RandomErasing(),
        transforms.Normalize([mean, mean, mean], [std, std, std])
    ])
    
val_transforms = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([mean, mean, mean], [std, std, std])
    ])
    
test_transforms = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([mean, mean, mean], [std, std, std])
    ])

# torch Dataset
train_dataset = DatasetDraws(X_train, y_train, transform=train_transforms)
val_dataset = DatasetDraws(X_val, y_val, transform=val_transforms)

whole_dataset = DatasetDraws(X_data, y_data, transform=train_transforms)

# torch Dataloaders
train_load = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
val_load = torch.utils.data.DataLoader(val_dataset, batch_size = val_test_batch_size, shuffle = True)
dataloaders = {'train': train_load, 'val':val_load}

whole_load = torch.utils.data.DataLoader(whole_dataset, batch_size = batch_size, shuffle = True)
whole_dataloaders = {'train': whole_load, 'val':val_load}

# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

model_ft, hist = train_model(model_ft, whole_dataloaders, criterion, optimizer_ft, num_epochs=10, is_inception=(model_name=="inception"))

########################################################################

# Compute predictions for the test data

# torch test Dataset and Dataloader
test_dataset = DatasetDraws(X_test, np.zeros(shape=(60000)), transform=test_transforms)
test_load = torch.utils.data.DataLoader(test_dataset, batch_size = val_test_batch_size, shuffle = False)

pred = []
with torch.no_grad():
    model_ft.eval()
    for inputs in test_load:
        inputs = inputs[0].to(device)
        outputs = model_ft(inputs)
        _, preds = torch.max(outputs, 1)
        pred.append(preds.cpu().numpy())

pred = np.concatenate(pred)
pred = pd.DataFrame(pred)
pred.to_csv('pred_17_12_lr_0.0007.csv',index=True, index_label='Id', header=['Category'])

Initializing Datasets and Dataloaders...
Params to learn:
	 features.conv0.weight
	 features.norm0.weight
	 features.norm0.bias
	 features.denseblock1.denselayer1.norm1.weight
	 features.denseblock1.denselayer1.norm1.bias
	 features.denseblock1.denselayer1.conv1.weight
	 features.denseblock1.denselayer1.norm2.weight
	 features.denseblock1.denselayer1.norm2.bias
	 features.denseblock1.denselayer1.conv2.weight
	 features.denseblock1.denselayer2.norm1.weight
	 features.denseblock1.denselayer2.norm1.bias
	 features.denseblock1.denselayer2.conv1.weight
	 features.denseblock1.denselayer2.norm2.weight
	 features.denseblock1.denselayer2.norm2.bias
	 features.denseblock1.denselayer2.conv2.weight
	 features.denseblock1.denselayer3.norm1.weight
	 features.denseblock1.denselayer3.norm1.bias
	 features.denseblock1.denselayer3.conv1.weight
	 features.denseblock1.denselayer3.norm2.weight
	 features.denseblock1.denselayer3.norm2.bias
	 features.denseblock1.denselayer3.conv2.weight
	 features.densebloc

TypeError: ignored