## Import libraries

In [None]:
# Import libraries
# Data Manipulation
import zipfile
import numpy as np
import pandas as pd
import os

# Fast AI vision
from fastai.vision import *
from fastai.metrics import error_rate

## Import Data

### Prepare Train data

In [None]:
path_to_zip_file = '/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Train_images.zip'
directory_to_extract_to = '/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Extracted_Data'

In [None]:
# Unzip the Image dataset
zip_ref = zipfile.ZipFile(path_to_zip_file, 'r')
zip_ref.extractall(directory_to_extract_to)
zip_ref.close()

In [None]:
path = directory_to_extract_to + '/' + 'Train_images/train' ;path

In [None]:
tfms = get_transforms()

In [None]:
# Set batch size
bs = 64

In [None]:
np.random.seed(123)
data = ImageDataBunch.from_csv(path, ds_tfms=tfms, size=224, bs = bs, valid_pct = 0.2).normalize(imagenet_stats)

In [None]:
data.show_batch(rows=3, figsize=(7,7))
data.classes

### Add Test data

In [None]:
path_to_test_zip = '/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Test_images.zip'
directory_to_extract_test = '/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Extracted_Test_Data'

In [None]:
# Unzip the TestImages
zip_ref = zipfile.ZipFile(path_to_test_zip, 'r')
zip_ref.extractall(directory_to_extract_test)
zip_ref.close()

In [None]:
test_data = ImageList.from_csv(directory_to_extract_test + '/' + 'Test_images/test', csv_name='test.csv')

In [None]:
data.add_test(test_data)

### Try Model Architectures
Our approach will be to try out multiple different models using Transfer learning and then create an ensemble of all the models to test out the final performance on Public leaderbaord

### Model 1- Resnet 18

In [None]:
learn = cnn_learner(data, models.resnet18, metrics=accuracy)

In [None]:
learn.fit_one_cycle(4)

In [None]:
learn.save('Resnet_18_model_1')

In [None]:
# With weights unfreezed
learn.unfreeze()

In [None]:
learn.fit_one_cycle(20)

In [None]:
learn.save('Resnet_18_retrained')

With optimum LR

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(20, max_lr=slice(1e-5,1e-4))

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

losses,idxs = interp.top_losses()

len(data.valid_ds)==len(losses)==len(idxs)

In [None]:
help(interp.plot_top_losses)

In [None]:
interp.plot_top_losses(9, figsize=(15,11), heatmap = False)

In [None]:
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)

In [None]:
interp.most_confused(min_val=2)

In [None]:
# Get the predictions
test_predictions_resnet_18 = []
for test_image in test_data:
    test_predictions_resnet_18.append(learn.predict(test_image)[0])

In [None]:
predictions_resnet_18 = pd.read_csv(directory_to_extract_test + '/' + 'Test_images/test/test.csv')

In [None]:
predictions_resnet_18['labels'] = test_predictions_resnet_18

In [None]:
submissions_dir = '/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Submissions'

In [None]:
# Store for future use (in case kernel restarts etc)
predictions_resnet_18.to_csv(submissions_dir + '/' + 'Resnet_18_preds.csv', index=False)

### Resnet 34 preds

In [None]:
learn_34 = cnn_learner(data, models.resnet34, metrics=accuracy)

In [None]:
learn_34.fit_one_cycle(4)

In [None]:
learn_34.unfreeze()
learn_34.fit_one_cycle(10)

In [None]:
# Find best LR
learn_34.lr_find()

In [None]:
# Plot best LR
learn_34.recorder.plot()

In [None]:
# Model with best LR set
learn_34.fit_one_cycle(10, max_lr=slice(1e-6,1e-5))

In [None]:
# Get the predictions
test_predictions_34 = []
for test_image in test_data:
    test_predictions_34.append(learn_34.predict(test_image)[0])

In [None]:
predictions_resnet_34 = pd.read_csv(directory_to_extract_test + '/' + 'Test_images/test/test.csv')

In [None]:
predictions_resnet_34['labels'] = test_predictions_34

In [None]:
# Store for future use (in case kernel restarts etc)
predictions_resnet_34.to_csv(submissions_dir + '/' + 'Resnet_34_preds.csv', index=False)

### With Resnet 50

In [None]:
data = ImageDataBunch.from_csv(path, ds_tfms=tfms, size=224, bs = 32, valid_pct = 0.2).normalize(imagenet_stats)
data.add_test(test_data)

In [None]:
learn_50 = cnn_learner(data, models.resnet50, metrics=accuracy)
learn_50.fit_one_cycle(4)

In [None]:
# Unfreeze weights and train for 10 cycles
learn_50.unfreeze()
learn_50.fit_one_cycle(10)

In [None]:
# Find best LR
learn_50.lr_find()

In [None]:
# Plot LR
learn_50.recorder.plot()

In [None]:
# Model with best LR set
learn_50.fit_one_cycle(10, max_lr=slice(1e-6,1e-5))

In [None]:
# Get the predictions
test_predictions_50 = []
for test_image in test_data:
    test_predictions_50.append(learn_50.predict(test_image)[0])

In [None]:
predictions_resnet_50 = pd.read_csv(directory_to_extract_test + '/' + 'Test_images/test/test.csv')
predictions_resnet_50['labels'] = test_predictions_50

In [None]:
# Store for future use (in case kernel restarts etc)
predictions_resnet_50.to_csv(submissions_dir + '/' + 'Resnet_50_preds.csv', index=False)

### With Densenet

In [None]:
# This requires input images of size 299*299 and we have to use a lower batch size so as to not run out of memory
data_densenet = ImageDataBunch.from_csv(path, ds_tfms=tfms, size=299, bs = 16, valid_pct = 0.2).normalize(imagenet_stats)
test_data_densenet = ImageList.from_csv(directory_to_extract_test + '/' + 'Test_images/test', csv_name='test.csv')
data_densenet.add_test(test_data_densenet)

In [None]:
learn_densenet = cnn_learner(data_densenet, models.densenet121, metrics=accuracy)

In [None]:
learn_densenet.fit_one_cycle(4)

In [None]:
# Unfreeze weights and re train
learn_densenet.unfreeze()
learn_densenet.fit_one_cycle(10)

In [None]:
# Try with LR unfreezed
learn_densenet.lr_find()

In [None]:
# Plot learning rates
learn_densenet.recorder.plot()

In [None]:
# Model with best LR set
learn_densenet.fit_one_cycle(10, max_lr=slice(1e-6,1e-4))

In [None]:
# Get predictions
test_predictions_densenet = []
for test_image in test_data:
    test_predictions_densenet.append(learn_densenet.predict(test_image)[0])

In [None]:
predictions_densenet = pd.read_csv(directory_to_extract_test + '/' + 'Test_images/test/test.csv')
predictions_densenet['labels'] = test_predictions_densenet

In [None]:
predictions_densenet.to_csv(submissions_dir + '/' + 'Densenet_preds.csv', index=False)

### With VGG16

In [None]:
data = ImageDataBunch.from_csv(path, ds_tfms=tfms, size=224, bs = 32, valid_pct = 0.2).normalize(imagenet_stats)
data.add_test(test_data)

In [None]:
learn_vgg = cnn_learner(data, models.vgg16_bn, metrics=accuracy)

In [None]:
learn_vgg.fit_one_cycle(4)

In [None]:
learn_vgg.unfreeze()
learn_vgg.fit_one_cycle(10)

In [None]:
# Try to find best LR
learn_vgg.lr_find()

In [None]:
learn_vgg.recorder.plot()

In [None]:
# Get predictions
test_predictions_vgg = []
for test_image in test_data:
    test_predictions_vgg.append(learn_vgg.predict(test_image)[0])

In [None]:
predictions_vgg = pd.read_csv(directory_to_extract_test + '/' + 'Test_images/test/test.csv')
predictions_vgg['labels'] = test_predictions_vgg

In [None]:
predictions_vgg.to_csv(submissions_dir + '/' + 'VGG_preds.csv', index=False)

In [None]:
predictions_vgg.head()

## Ensemble

We will follow a simple Ensembling strategy of taking the majority class of 5 predictors

In [None]:
import os
os.getcwd()

In [None]:
predictions_resnet_18 = pd.read_csv('/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Submissions/Resnet_18_preds.csv')
predictions_resnet_34 = pd.read_csv('/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Submissions/Resnet_34_preds.csv')
predictions_resnet_50 = pd.read_csv('/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Submissions/Resnet_50_preds.csv')
predictions_densenet = pd.read_csv('/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Submissions/Densenet_preds.csv')
predictions_vgg = pd.read_csv('/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Submissions/VGG_preds.csv')

In [None]:
ensemble_predictions = predictions_resnet_18

In [None]:
ensemble_predictions.head()

In [None]:
ensemble_predictions.rename(columns = {'labels':'Resnet_18_preds'}, inplace = True)

In [None]:
ensemble_predictions['Resnet_34_preds'] = predictions_resnet_34['labels']

In [None]:
ensemble_predictions['Resnet_50_preds'] = predictions_resnet_50['labels']

In [None]:
ensemble_predictions['Densenet_preds'] = predictions_densenet['labels']

In [None]:
ensemble_predictions['VGG_preds'] = predictions_vgg['labels']

In [None]:
ensemble_predictions.head()

In [None]:
# Getting final predictions from ensemble predictions
ensemble_predictions['list'] = ensemble_predictions['Resnet_18_preds'].map(str) + ' ' +  ensemble_predictions['Resnet_34_preds'].map(str) + ' ' + ensemble_predictions['Resnet_50_preds'].map(str) + ' ' + ensemble_predictions['Densenet_preds'].map(str) + ' ' + ensemble_predictions['VGG_preds'].map(str)

In [None]:
ensemble_predictions['list'] = [x.split() for x in ensemble_predictions['list']]

In [None]:
from statistics import mode

In [None]:
def return_mode(source_list):
    try:
        return mode(source_list)
    except:
        return 'No Mode available'
ensemble_predictions['category']  = ensemble_predictions['list'].map(return_mode)

In [None]:
ensemble_predictions.loc[:, ['image', 'category']].to_csv('/home/jupyter/tutorials/fastai/course-v3/Soumadiptya Game of Deep Learning Contest files/Data/Submissions/Ensemble.csv', index = False)

In [None]:
ensemble_predictions[ensemble_predictions['category'] == 'No Mode available']

### Where no mode is available do it by person