# Resnext101_64

In [1]:
%matplotlib inline

In [2]:
from fastai.imports import *

from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

from sklearn import metrics

PATH='../data/raw/HE_DL/'
arch=resnext101_64
sz=350; bs=32

In [5]:
label_csv=f'{PATH}meta-data/train.csv'
n=len(list(open(label_csv))) - 1
val_idxs=get_cv_idxs(n)
n, len(val_idxs)

(13000, 2600)

## Initial Model

In [6]:
def get_data(sz, bs, val_idxs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.05)
    data = ImageClassifierData.from_csv(PATH, 
                                        'train/', 
                                        f'{PATH}meta-data/train.csv',
                                        bs=bs,
                                        tfms=tfms,
                                        val_idxs=val_idxs, # to make sure only single image is there in validation set
                                        test_name='test/'
                                       )
    
    return data if sz > 300 else data.resize(500, 'tmp/')

## Full Training

In [23]:
data  = get_data(sz, bs, [0])

In [24]:
learn = ConvLearner.pretrained(arch, data, precompute=True)
learn.fit(1e-2, 1)

100%|██████████| 407/407 [06:50<00:00,  1.01s/it]
100%|██████████| 1/1 [00:00<00:00, 13.91it/s]
100%|██████████| 188/188 [03:12<00:00,  1.02s/it]


epoch      trn_loss   val_loss   accuracy                    
    0      0.23921    0.000154   1.0       



[array([ 0.00015]), 1.0]

In [25]:
learn.precompute=False
learn.fit(1e-2, 5, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                    
    0      0.150326   0.000812   1.0       
    1      0.117086   0.00011    1.0                         
    2      0.127323   0.000186   1.0                         
    3      0.110786   2.1e-05    1.0                          
    4      0.09008    1.8e-05    1.0                          



[array([ 0.00002]), 1.0]

In [26]:
learn.set_data(get_data(400,bs=32, val_idxs=[0])) # lower batch size demands lower learning rate as well.
learn.fit(1e-2, 3, cycle_len=1)

epoch      trn_loss   val_loss   accuracy                     
    0      0.094868   6e-06      1.0       
    1      0.096918   0.0        1.0                          
    2      0.077599   2e-06      1.0                          



[array([ 0.]), 1.0]

In [None]:
# full training
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

epoch      trn_loss   val_loss   accuracy                     
    0      0.08171    0.0        1.0       
    1      0.060234   0.0        1.0                          
    2      0.06276    0.0        1.0                          
    3      0.067807   0.0        1.0                          
    4      0.065396   0.0        1.0                          
    5      0.049899   0.0        1.0                          
    6      0.048338   2e-06      1.0                          



[array([ 0.]), 1.0]

## Create Submission

In [None]:
log_preds, y = learn.TTA(is_test=True) # use test dataset rather than validation dataset
probs        = np.mean(np.exp(log_preds),0)

 75%|███████▌  | 3/4 [12:27<04:09, 249.06s/it]

In [None]:
df         = pd.DataFrame(probs)
df.columns = data.classes

df.insert(0, 'image_id', [o.split('/')[1] for o in data.test_ds.fnames])
df.loc[:, 'img_num'] = [int(f.split('-')[1].split('.')[0]) for f in data.test_ds.fnames]

df = df.sort_values(by='img_num')
df.drop('img_num', axis=1, inplace=True)

df.to_csv('../data/raw/HE_DL/submissions/sub11.csv', index=False)