# Goal

Train inception_net v4 on images with side on augmentation

In [1]:
%matplotlib inline

In [2]:
from fastai.imports import *

from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

from sklearn import metrics

In [3]:
PATH='../data/raw/HE_DL/'
arch=inception_4
sz=350; bs=32

In [4]:
label_csv=f'{PATH}meta-data/train.csv'
n=len(list(open(label_csv))) - 1
val_idxs=get_cv_idxs(n)
n, len(val_idxs)

(13000, 2600)

## Initial Model

In [5]:
def get_data(sz, bs, val_idxs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.05)
    data = ImageClassifierData.from_csv(PATH, 
                                        'train/', 
                                        f'{PATH}meta-data/train.csv',
                                        bs=bs,
                                        tfms=tfms,
                                        val_idxs=val_idxs, # to make sure only single image is there in validation set
                                        test_name='test/'
                                       )
    
    return data if sz > 300 else data.resize(500, 'tmp/')

## Full Training

In [31]:
data  = get_data(sz, bs, [0])

In [32]:
learn = ConvLearner.pretrained(arch, data, precompute=True)
learn.fit(1e-2, 1)

100%|██████████| 407/407 [03:16<00:00,  2.07it/s]
100%|██████████| 1/1 [00:00<00:00, 20.79it/s]


epoch      trn_loss   val_loss   accuracy                    
    0      0.258011   0.014966   1.0       



[array([ 0.01497]), 1.0]

In [33]:
learn.precompute=False
learn.fit(1e-2, 5, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                    
    0      0.179727   0.006791   1.0       
    1      0.173601   0.004443   1.0                         
    2      0.158326   0.007129   1.0                         
    3      0.158994   0.00636    1.0                         
    4      0.13882    0.002539   1.0                         



[array([ 0.00254]), 1.0]

In [None]:
learn.set_data(get_data(400,bs=32, val_idxs=[0])) # lower batch size demands lower learning rate as well.
learn.fit(1e-2, 3, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                    
    0      0.136386   0.002081   1.0       
    1      0.106722   0.002014   1.0                         
    2      0.109144   0.002135   1.0                          



[array([ 0.00214]), 1.0]

In [None]:
# full training
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

epoch      trn_loss   val_loss   accuracy                     
    0      0.103746   0.000747   1.0       
    1      0.095859   0.001966   1.0                          
    2      0.098145   0.000996   1.0                          
    3      0.115764   0.000884   1.0                          
 29%|██▉       | 120/407 [01:28<03:31,  1.36it/s, loss=0.0979]

## Create Submission

In [None]:
log_preds, y = learn.TTA(is_test=True) # use test dataset rather than validation dataset
probs        = np.mean(np.exp(log_preds),0)

In [None]:
df         = pd.DataFrame(probs)
df.columns = data.classes

df.insert(0, 'image_id', [o.split('/')[1] for o in data.test_ds.fnames])
df.loc[:, 'img_num'] = [int(f.split('-')[1].split('.')[0]) for f in data.test_ds.fnames]

df = df.sort_values(by='img_num')
df.drop('img_num', axis=1, inplace=True)

df.to_csv('../data/raw/HE_DL/submissions/sub13.csv', index=False)