## IMPORTS

In [1]:
################## for the first time ################

In [None]:
!python3 -m pip install --upgrade pip

In [None]:
!pip install -r requirements.txt

In [2]:
######################################################

In [None]:
import fastbook
fastbook.setup_book()
from fastbook import *
from fastai.vision.all import *
import torchvision.models as models
import pandas as pd
import numpy as np
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import warnings
warnings.filterwarnings("ignore")

## PATHS

In [None]:
path_train = Path('gram_stain/')
train_fnames = get_image_files(path_train)

background_fnames_train = get_image_files(path_train/'background')
maya_fnames_train = get_image_files(path_train/'maya')
neg_basil_fnames_train = get_image_files(path_train/'neg_basil')
neg_coco_fnames_train = get_image_files(path_train/'neg_coco')
pos_chain_fnames_train = get_image_files(path_train/'pos_chain')
pos_cluster_fnames_train = get_image_files(path_train/'pos_cluster')

print('-'*50)
print('TRAIN')
print('-'*50)
print('Number of Background \t\t: '+ str(len(background_fnames_train)))
print('Number of Maya \t\t\t: '+ str(len(maya_fnames_train)))
print('Number of Negative Basil\t: '+ str(len(neg_basil_fnames_train)))
print('Number of Negative Coco\t\t: '+ str(len(neg_coco_fnames_train)))
print('Number of Positive Chain\t: '+ str(len(pos_chain_fnames_train)))
print('Number of Positive Cluster\t: '+ str(len(pos_cluster_fnames_train)))
print('-'*50)
print('\t\t\tTOTAL \t: '+ str(len(train_fnames)))
print('-'*50)

In [None]:
LABEL_COLS = ['background', 'maya', 'neg_basil', 'neg_coco', 'pos_chain', 'pos_cluster']

In [None]:
filepath_list = glob.glob('gram_stain/*/*.jpg')
labels = [str(filepath_list[i]).split("/")[-2] for i in range(len(filepath_list))]
filepath = pd.Series(filepath_list, name='filepath').astype(str)
label = pd.Series(labels, name='label')
train_df = pd.concat([label, filepath], axis=1)
train_df = train_df.sample(frac=1, random_state=0).reset_index(drop=True)

N_FOLDS = 5
train_df['fold'] = -1
strat_kfold = MultilabelStratifiedKFold(n_splits=N_FOLDS, random_state=43, shuffle=True)
for i, (_, test_index) in enumerate(strat_kfold.split(train_df.filepath.values, train_df.iloc[:,1:].values)):
    train_df.iloc[test_index, -1] = i
train_df['fold'] = train_df['fold'].astype('int')
train_df = train_df.reset_index(drop=True)
train_df.head()

## AUGS AND DATALOADERS

In [None]:
augs_train = []

def get_data(fold):
    train_df_fold = ((train_df.loc[train_df.fold==fold]).reset_index(drop=True)).index
    dblock = DataBlock(blocks=(ImageBlock(cls=PILImage), CategoryBlock(vocab=LABEL_COLS)),
                       splitter=IndexSplitter(train_df_fold),
                       get_x=ColReader('filepath'),
                       get_y=ColReader('label'),
                       item_tfms=Resize(224, method="squish"),
                       batch_tfms=augs_train,
                       )
    dls = dblock.dataloaders(train_df, bs=2)
    return dls

### TRAINING

In [None]:
learn_rate = 1e-04
reduce_patience= 3
stop_patience = 5
epoch = 10

In [None]:
for f_i in np.arange(0, 1, 1):
    dls = get_data(f_i)
    learner_cnn = cnn_learner(dls, models.mobilenet_v2, cut=-1, pretrained=False, normalize=True, loss_func=CrossEntropyLossFlat(), opt_func=Adam, metrics=[accuracy] )
    learner_cnn.fit_one_cycle(epoch, lr_max=learn_rate,
                                cbs=[
                                    SaveModelCallback(monitor='valid_loss', min_delta=0.0001, fname="mobilenet_fold_" +str(f_i)),
                                    ReduceLROnPlateau(monitor='valid_loss', min_delta=0.01, patience=reduce_patience),
                                    EarlyStoppingCallback(monitor='valid_loss', min_delta=0.0001, patience=stop_patience)
                                    ]
                             )

In [None]:
print("Training fnished")