# ResNet50

In [1]:
%matplotlib inline

In [2]:
from fastai.imports import *

from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

from sklearn import metrics

In [4]:
PATH='../data/raw/HE_DL/'
arch=resnet50
sz=350; bs=32

In [5]:
label_csv=f'{PATH}meta-data/train.csv'
n=len(list(open(label_csv))) - 1
val_idxs=get_cv_idxs(n)
n, len(val_idxs)

(13000, 2600)

## Initial Model

In [6]:
def get_data(sz, bs, val_idxs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.05)
    data = ImageClassifierData.from_csv(PATH, 
                                        'train/', 
                                        f'{PATH}meta-data/train.csv',
                                        bs=bs,
                                        tfms=tfms,
                                        val_idxs=val_idxs, # to make sure only single image is there in validation set
                                        test_name='test/'
                                       )
    
    return data if sz > 300 else data.resize(500, 'tmp/')

## Full Training

In [7]:
val_idxs = [0]

data  = get_data(sz, bs, val_idxs)
learn = ConvLearner.pretrained(arch, data, precompute=True)

100%|██████████| 407/407 [02:31<00:00,  2.69it/s]
100%|██████████| 1/1 [00:00<00:00, 30.18it/s]
100%|██████████| 188/188 [01:08<00:00,  2.73it/s]


In [8]:
learn.fit(1e-2, 2, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                    
    0      0.296985   0.002955   1.0       
    1      0.22474    0.001941   1.0                          



[array([ 0.00194]), 1.0]

In [9]:
learn.precompute=False
learn.fit(1e-2, 5, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                    
    0      0.199012   0.00064    1.0       
    1      0.173562   0.000208   1.0                         
    2      0.167498   0.000364   1.0                         
    3      0.151163   0.000379   1.0                         
    4      0.151231   0.000107   1.0                         



[array([ 0.00011]), 1.0]

In [10]:
learn.set_data(get_data(450, bs=32,val_idxs=val_idxs))
learn.fit(1e-2, 3, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                    
    0      0.159471   2.1e-05    1.0       
    1      0.130305   8e-06      1.0                         
    2      0.152088   2e-06      1.0                         



[array([ 0.]), 1.0]

In [11]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

epoch      trn_loss   val_loss   accuracy                     
    0      0.113751   2e-06      1.0       
    1      0.129911   0.0        1.0                         
    2      0.114263   0.0        1.0                          
    3      0.095675   0.0        1.0                          
    4      0.1007     0.0        1.0                          
    5      0.090573   0.0        1.0                          
    6      0.079014   0.0        1.0                          



[array([ 0.]), 1.0]

## Create Submission

In [12]:
log_preds, y = learn.TTA(is_test=True) # use test dataset rather than validation dataset
probs        = np.mean(np.exp(log_preds),0)

                                              

In [13]:
df         = pd.DataFrame(probs)
df.columns = data.classes

df.insert(0, 'image_id', [o.split('/')[1] for o in data.test_ds.fnames])
df.loc[:, 'img_num'] = [int(f.split('-')[1].split('.')[0]) for f in data.test_ds.fnames]

df = df.sort_values(by='img_num')
df.drop('img_num', axis=1, inplace=True)

df.to_csv('../data/raw/HE_DL/submissions/sub10.csv', index=False)