In [None]:
# magics
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# load fastai library
from fastai.vision import *
import os
from IPython.display import FileLink

In [None]:
# set data path
path = Path('../input/timenet/tn_data/tN_data')
path

In [None]:
# set random seed 
np.random.seed(42)

# set data source for training files and set aside 20 percent for the validation set
src = (ImageList.from_folder(path/'train').split_by_rand_pct(0.2).label_from_folder())
src

In [None]:
# set transformations for data augmentation
tfms = get_transforms(do_flip=False, flip_vert=False, max_rotate=0, max_zoom=1.2, max_lighting=0.15, max_warp=0, p_affine=0, p_lighting=0.25)

In [None]:
# helper functions

# test csv is a file that contains the mappings of test files to true classes
test_csv = pd.read_csv('../input/timenet-test-labels/test.csv')
# test folder path
test_path = path/'test'

# returns paths of misclassified files(list), file names (list) and test results (dict) 

def test_accuracy(test_path=test_path, csv_df=test_csv):
    test_res = {}
    error_list = []
    error_paths = []
    for file in (test_path).ls():
        # turn path object into filename string
        fname = str(file).rsplit('/', 1)[-1]
        # open image and make prediction
        img = open_image(file)
        # add normalization fn here
        pred = learn.predict(img)
        # get label
        pred_label = str(pred[0])
        # get results from test_csv file
        actual = str(csv_df[csv_df['file'].str.match(fname)]['class'])
        actual = actual.split('\n')[0].split(' ')[-1]
        # add results to dictionary
        test_res.update({fname: [actual, pred_label, actual==pred_label]})
    # if value is false, add file to list    
    for k, v in test_res.items():
        if v[2] == False:
            error_list.append(k)
    # convert file to file path        
    for idx, i in enumerate(error_list):
        img = error_list[idx]
        error_paths.append(str(path)+'/test/'+img)

    # test if img paths and errors match
    test_accuracy = (1 - (len(error_list)/len(test_res)))*100

    # test if length of list matches length of path list
    if len(error_list) == len(error_paths):
        print('number or errors: {}/{} \ntest accuracy: {}'.format(len(error_list), len(test_res), test_accuracy))
    
    # error_paths(list), error_list (list) and test_res (dict) 
    return error_paths, error_list, test_res

# plots grid depicting model errors on test set with information on actual result
"""
input: list of error img paths
output: plots grid depicting model errors on test set with information on actual result, 
model prediction and (bool) correct: True/False

"""
def plot_errors(img_list):
    n_errors = len(img_list)
    plt.figure(figsize=(26,26))
    plt.subplots_adjust(hspace=0.3)
    
    # Plot image list as grid
    for i, img in enumerate(img_list):
        ax = plt.subplot(6, 6,i+1)
        ax.axis('off')
        img_name = img.rsplit('/',1)[-1]
        ax.set_title(img_name+'\n act, pred, correct \n'+str(test_res[img_name]))
        
        img = PIL.Image.open(img)
        im = ax.imshow(img)
    
    plt.show() 

In [None]:
# create dataloader with batchsize=21 transform data to half-original image-size (244 x 244px)
# normalize using imagenet stats because model was pre-trained on ImageNet
data = (src.transform(tfms, size=244).databunch(bs=21).normalize(imagenet_stats))

# set pre-trained model
arch = models.resnet34

# init learner
learn = cnn_learner(data, arch, metrics=accuracy, model_dir='../../../../working', ps=0.25) 

In [None]:
# look at databatch
data.show_batch(rows=3)

In [None]:
# look at data classes
data.classes

In [None]:
# run learning rate finder
learn.lr_find()

In [None]:
# plot learning rate finder output
learn.recorder.plot()

In [None]:
# reset function to retrain model from scratch
learn.purge()

In [None]:
lr=3e-2

In [None]:
# run One Cycle policy over 5 epochs, with a weight decay of 3e-7
learn.fit_one_cycle(5, max_lr=lr, wd=3e-7)

In [None]:
# Run on test set
error_paths, error_list, test_res = test_accuracy()

In [None]:
plot_errors(error_paths)

In [None]:
learn.save('13_errors')

In [None]:
learn.load('13_errors')

In [None]:
# unfreeze all the model layers
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot(stop_div=False, num_it=1000)

In [None]:
lr=1e-6

In [None]:
# train unfrozen model with lr=1e-6
learn.fit_one_cycle(3, slice(lr), wd=3e-7)

In [None]:
error_paths, error_list, test_res = test_accuracy()

In [None]:
plot_errors(error_paths)

In [None]:
# moves output file to destination where it can be downloaded from kaggle
!mv ../input/working/unfreeze_9_errors.pth .

In [None]:
# output file for download
FileLink('unfreeze_9_errors.pth')