## IMPORTS

In [1]:
import fastbook
fastbook.setup_book()
from fastbook import *
from fastai.vision.all import *
import torchvision.models as models
import pandas as pd
import numpy as np
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import warnings
warnings.filterwarnings("ignore")

## PATHS

In [5]:
path_train = Path('gram_stain/')
train_fnames = get_image_files(path_train)

background_fnames_train = get_image_files(path_train/'background')
maya_fnames_train = get_image_files(path_train/'maya')
neg_basil_fnames_train = get_image_files(path_train/'neg_basil')
neg_coco_fnames_train = get_image_files(path_train/'neg_coco')
pos_chain_fnames_train = get_image_files(path_train/'pos_chain')
pos_cluster_fnames_train = get_image_files(path_train/'pos_cluster')

print('-'*50)
print('TRAIN')
print('-'*50)
print('Number of Background \t\t: '+ str(len(background_fnames_train)))
print('Number of Maya \t\t\t: '+ str(len(maya_fnames_train)))
print('Number of Negative Basil\t: '+ str(len(neg_basil_fnames_train)))
print('Number of Negative Coco\t\t: '+ str(len(neg_coco_fnames_train)))
print('Number of Positive Chain\t: '+ str(len(pos_chain_fnames_train)))
print('Number of Positive Cluster\t: '+ str(len(pos_cluster_fnames_train)))
print('-'*50)
print('\t\t\tTOTAL \t: '+ str(len(train_fnames)))
print('-'*50)

--------------------------------------------------
TRAIN
--------------------------------------------------
Number of Background 		: 10
Number of Maya 			: 10
Number of Negative Basil	: 10
Number of Negative Coco		: 11
Number of Positive Chain	: 10
Number of Positive Cluster	: 10
--------------------------------------------------
			TOTAL 	: 61
--------------------------------------------------


In [6]:
LABEL_COLS = ['background', 'maya', 'neg_basil', 'neg_coco', 'pos_chain', 'pos_cluster']

In [8]:
filepath_list = glob.glob('gram_stain/*/*.jpg')
labels = [str(filepath_list[i]).split("/")[-2] for i in range(len(filepath_list))]

filepath = pd.Series(filepath_list, name='filepath').astype(str)
label = pd.Series(labels, name='label')

train_df = pd.concat([label, filepath], axis=1)
train_df = train_df.sample(frac=1, random_state=0).reset_index(drop=True)

N_FOLDS = 5
train_df['fold'] = -1
strat_kfold = MultilabelStratifiedKFold(n_splits=N_FOLDS, random_state=43, shuffle=True)
for i, (_, test_index) in enumerate(strat_kfold.split(train_df.filepath.values, train_df.iloc[:,1:].values)):
    train_df.iloc[test_index, -1] = i
train_df['fold'] = train_df['fold'].astype('int')
train_df = train_df.reset_index(drop=True)
train_df.head()

Unnamed: 0,label,filepath,fold
0,maya,gram_stain/maya/e83b3a4c-3472-11ee-8605-48b02dd3da7b.jpg,2
1,pos_cluster,gram_stain/pos_cluster/f546b126-3472-11ee-8605-48b02dd3da7b.jpg,2
2,neg_basil,gram_stain/neg_basil/ea986832-3472-11ee-8605-48b02dd3da7b.jpg,2
3,maya,gram_stain/maya/e8cbf87a-3472-11ee-8605-48b02dd3da7b.jpg,1
4,background,gram_stain/background/e50585da-3472-11ee-8605-48b02dd3da7b.jpg,0


## AUGS AND DATALOADERS

In [9]:
augs_train = []

def get_data(fold):
    train_df_fold = ((train_df.loc[train_df.fold==fold]).reset_index(drop=True)).index
    dblock = DataBlock(blocks=(ImageBlock(cls=PILImage), CategoryBlock(vocab=LABEL_COLS)),
                       splitter=IndexSplitter(train_df_fold),
                       get_x=ColReader('filepath'),
                       get_y=ColReader('label'),
                       item_tfms=Resize(300, method="squish"),
                       batch_tfms=augs_train,
                       )
    dls = dblock.dataloaders(train_df, bs=2)
    return dls

### TRAINING

In [10]:
learn_rate = 1e-04
reduce_patience= 3
stop_patience = 5
epoch = 10

In [11]:
for f_i in np.arange(0, 1, 1):
    dls = get_data(f_i)
    learner_cnn = cnn_learner(dls, models.mobilenet_v2, cut=-1, pretrained=False, normalize=True, loss_func=CrossEntropyLossFlat(), opt_func=Adam, metrics=[accuracy] )
    #learner_cnn = cnn_learner(dls, xresnet50, normalize=True, n_out=len(dls.vocab), loss_func=CrossEntropyLossFlat(), opt_func=Adam, metrics=[accuracy])
    
    learner_cnn.fit_one_cycle(epoch, lr_max=learn_rate,
                                cbs=[
                                    SaveModelCallback(monitor='valid_loss', min_delta=0.0001, fname="mobilenet_fold_" +str(f_i)),
                                    ReduceLROnPlateau(monitor='valid_loss', min_delta=0.01, patience=reduce_patience),
                                    EarlyStoppingCallback(monitor='valid_loss', min_delta=0.0001, patience=stop_patience)
                                    ]
                             )

epoch,train_loss,valid_loss,accuracy,time
0,3.122701,5.117309,0.153846,01:26
1,3.156076,9.934793,0.153846,00:14
2,3.255619,8.288945,0.153846,00:15
3,3.018598,3.020062,0.076923,00:14
4,3.175947,2.633605,0.153846,00:14
5,2.922782,2.763947,0.076923,00:15
6,2.882091,2.839972,0.076923,00:14
7,2.981508,2.801339,0.076923,00:15
8,3.004668,2.38648,0.153846,00:14
9,2.890116,2.410639,0.153846,00:15


Better model found at epoch 0 with valid_loss value: 5.117309093475342.
Better model found at epoch 3 with valid_loss value: 3.020061731338501.
Better model found at epoch 4 with valid_loss value: 2.633605480194092.
Epoch 7: reducing lr to 1.7197876422966228e-06
Better model found at epoch 8 with valid_loss value: 2.386479616165161.


# EVALUATION