You can find more about fast.ai [here](http://course.fast.ai)

In [None]:
#put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix
import os, random
from shutil import copy, copytree #important for creating new working directory
import gc

In [None]:
#fast.ai 
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import*
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

**Prepare Images**

In [None]:
#define train and test paths
train_path = '../input/train/'
test_path = '../input/test/'
#define numeber of train and valid images
train_samples = 10000
valid_samples = 2500

In [None]:
#create a list of all dog images in the train folder
train_dog = [train_path+i for i in os.listdir(train_path) if 'dog' in i]
#create a list of all cat images in the train folder
train_cat = [train_path+i for i in os.listdir(train_path) if 'cat' in i]

In [None]:
#shuffle images of dogs and cats
random.shuffle(train_dog)
random.shuffle(train_cat)

In [None]:
#only considering a number of images for both the training and valid set
train_dog_images = train_dog[:train_samples]
train_cat_images = train_cat[:train_samples]
valid_dog_images = train_dog[- valid_samples:]
valid_cat_images = train_cat[- valid_samples:]

In [None]:
#create new working directory 
os.makedirs('../working/dogcats/valid/cat/')
os.makedirs('../working/dogcats/valid/dog/')
os.makedirs('../working/dogcats/train/cat/')
os.makedirs('../working/dogcats/train/dog/')

In [None]:
#copy train images from input directory to working directory
for i in range(0,train_samples):
    shutil.copy(train_dog_images[i], '../working/dogcats/train/dog/')
    shutil.copy(train_cat_images[i], '../working/dogcats/train/cat/')

In [None]:
#copy valid images from input directory to working directory
for i in range(0,valid_samples):
    shutil.copy(valid_dog_images[i], '../working/dogcats/valid/dog/')
    shutil.copy(valid_cat_images[i], '../working/dogcats/valid/cat/') 

In [None]:
#create directory then copy test images to the new directory
shutil.copytree(test_path, '../working/dogcats/test/')

In [None]:
#view folders in the new directory 
os.listdir('../working/dogcats/')

In [None]:
len(os.listdir('../working/dogcats/test/'))

In [None]:
path = '../working/dogcats/'
image_size = 224

In [None]:
torch.cuda.is_available()

In [None]:
#activate an NVidia special accelerated functions for deep learning in a package called CuDNN
torch.backends.cudnn.enabled 

**Create and train a model**

In [None]:
#create a model based on resnet34
arch = resnet34
tfms = tfms_from_model(arch, image_size, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_paths(path, tfms=tfms, test_name='test')
learn = ConvLearner.pretrained(arch, data, precompute=True)

In [None]:
#find the best learning rate
lrf = learn.lr_find()

In [None]:
#plot learning rate against loss to determine the best learning rate
learn.sched.plot() 
#best alpha is 0.01

In [None]:
#fit our model
learn.fit(0.01, 2)

In [None]:
gc.collect()

**Fine Tuning **

In [None]:
#precompute  True >> means we are using the output of the pretrained model and passing it to the last layer
# which is a way of saving time
learn.precompute = False

In [None]:
#n_cycle is the number of times of resetting the learning rate back to 0.01
#cycle_len is the number of times of resetting the learning rate per an epoch
learn.fit(0.01, n_cycle=3, cycle_len=1)

In [None]:
#plot learning rate
learn.sched.plot_lr()

In [None]:
gc.collect()

In [None]:
#unfreeze all layers
learn.unfreeze()

In [None]:
#set a differential learning rate
lr=np.array([1e-4,1e-3,1e-2])
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)

In [None]:
learn.sched.plot_lr()

In [None]:
gc.collect()

**Analyze Results**

In [None]:
#TTA makes predictions not just on the images in your validation set, 
#but also makes predictions on a number of randomly augmented versions
valid_log_predictions,y = learn.TTA()
valid_prob_predictions = np.mean(np.exp(valid_log_predictions),0)

In [None]:
#calculate accuracy of valid predictions
accuracy_np(valid_prob_predictions, y)

In [None]:
#return a list of (1 if dog or 0 if cat)
valid_predictions = np.argmax(valid_prob_predictions, axis=1)

In [None]:
#plot a confussion matrix
cm = confusion_matrix(y, valid_predictions)
plot_confusion_matrix(cm, data.classes)

**Predictions**

In [None]:
#predict
#log_predictions = learn.predict(is_test=True)
log_predictions,_ = learn.TTA(is_test=True)

In [None]:
#prob_predictions = np.exp(log_predictions[:,1])
prob_predictions = np.mean(np.exp(log_predictions),0)
prob_predictions = prob_predictions[:,1]

In [None]:
submission = pd.DataFrame({'id':os.listdir(f'{path}test'), 'label':prob_predictions})

In [None]:
! rm -rf ../working/dogcats/ #remove "working" directory

In [None]:
submission['id'] = submission['id'].map(lambda x: x.split('.')[0])
submission['id'] = submission['id'].astype(int)
submission = submission.sort_values('id')

In [None]:
submission.to_csv("submission.csv", index = False)