In [None]:
#Put these at the top of every notebook to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
#Load the required packages
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
import pandas as pd

In [None]:
#Set some defaults
PATH = "data/DL_Beginner/"
sz=128
#Use resnet34 for a quicker model
arch=resnet101
bs=24

In [None]:
#Training set indices
label_csv = f'{PATH}meta-data/train.csv'
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)

In [None]:
#Number of entries
n

## Initial Exploration

In [None]:
!ls {PATH}

In [None]:
#Get training labels
label_df = pd.read_csv(label_csv)

In [None]:
label_df.head()

In [None]:
label_df.pivot_table(index = 'Animal', aggfunc = len).sort_values('Image_id', ascending = False)

## Image Exploration

In [None]:
#Build a data model with augmentation
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_csv(PATH, folder="train", csv_fname=f"{PATH}meta-data/train.csv", val_idxs=val_idxs,
                                    bs=bs, tfms=tfms, test_name="test")

In [None]:
#Get an image filename
fn = PATH+data.trn_ds.fnames[1]; fn

In [None]:
#View the image
img = PIL.Image.open(fn); img 

In [None]:
size_d = {k: PIL.Image.open(PATH+k).size for k in data.trn_ds.fnames}

In [None]:
row_sz, col_sz = list(zip(*size_d.values()))

In [None]:
#Get an idea of the image sizes
plt.hist(row_sz)

In [None]:
plt.hist(col_sz)

## Create Data Model

In [None]:
#Create a data model where the images are resized and stored in a tmp folder
#This makes resizing quicker 
def get_data(sz,bs):
    tfms = tfms_from_model(arch, sz, aug_tfms = transforms_side_on, max_zoom = 1.1)
    data = ImageClassifierData.from_csv(PATH, folder="train", csv_fname=f"{PATH}meta-data/train.csv", val_idxs=val_idxs,
                                    bs=bs, tfms=tfms, test_name="test")
    
    return data if sz > 300 else data.resize(340,'tmp')

In [None]:
#Start with a size of 100
sz = 100

In [None]:
#Get a data model with resized pictures
data = get_data(sz,bs)

## Build Model

In [None]:
#Build a learner with a dropout rate of 0.5
learn = ConvLearner.pretrained(arch, data, ps=0.5)

In [None]:
#Find a learning rate
lrf=learn.lr_find()

In [None]:
#plot to decided what learning rate to use
learn.sched.plot()

In [None]:
#Train for 4 epochs
learn.fit(1e-2, 4, cycle_len=1, cycle_mult=2)

In [None]:
#Unfreeze the last layer
learn.unfreeze()
learn.bn_freeze(True)

In [None]:
#Use differential learning rates
lr=np.array([1e-5,1e-4,1e-2])

In [None]:
#Train for 2 epochs
learn.fit(lr, 2, cycle_len=1, cycle_mult=2)

In [None]:
#Freeze earlier layers
learn.freeze()

In [None]:
#Save the learner
learn.save('100_hacker_earth')

In [None]:
###### SECOND SIZE ########
#Use a larger size
sz = 224

In [None]:
learn.set_data(get_data(sz,bs))

In [None]:
learn.fit(1e-2, 4, cycle_len=1, cycle_mult=2)

In [None]:
learn.unfreeze()

In [None]:
learn.fit(lr, 2, cycle_len=1, cycle_mult=2)

In [None]:
learn.save('224_hacker_earth')

In [None]:
learn.freeze()

In [None]:
###### THIRD SIZE ########
#Use a larger size
sz = 340

In [None]:
learn.set_data(get_data(sz,bs))

In [None]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

In [None]:
learn.unfreeze()

In [None]:
learn.fit(lr, 2, cycle_len=1, cycle_mult=2)

In [None]:
learn.freeze()

In [None]:
learn.save('340_hacker_earth')

## Prediction

In [None]:
#Get predictions with test time augmentation - TTA prodcues 2 array outputs
log_preds, y = learn.TTA(is_test = True)

In [None]:
#get the exponent - TTA
probs = np.mean(np.exp(log_preds),0)

In [None]:
#Create data frame
ds = pd.DataFrame(probs)

In [None]:
#Add column names
ds.columns = data.classes

In [None]:
#View the first test image
#Get an image
fn = PATH+data.test_ds.fnames[1]; fn

In [None]:
img = PIL.Image.open(fn); img 

In [None]:
#Insert IDs, but remove folder name
ds.insert(0,'image_id', [o[5:-4] + '.jpg' for o in data.test_ds.fnames])

In [None]:
#Check the df
ds.shape

In [None]:
#Get the test data set ids
test_label_csv = f'{PATH}meta-data/test.csv'
#Get test labels
test_label_df = pd.read_csv(test_label_csv)

In [None]:
ds = ds.set_index('image_id')
ds = ds.reindex(index=test_label_df['Image_id'])
ds = ds.reset_index()

In [None]:
SUBM = f'{PATH}subm/'
os.makedirs(SUBM, exist_ok = True)
ds.to_csv(f'{PATH}subm/subm.csv', index = False)

In [None]:
#Create file link to download csv
FileLink(f'{SUBM}subm.csv')