## import .py files in Google Colab
https://stackoverflow.com/questions/48905127/importing-py-files-in-google-colab

In [0]:
from google.colab import drive, files
drive.mount('drive')

def getLocalFiles():
    _files = files.upload()
    if _files:
        for k, v in _files.items():
            open(k, 'wb').write(v)

getLocalFiles()

In [0]:
import tensorflow as tf
import numpy as np
import os
import imgaug as aug

from keras.models import load_model
from data import data_helper
from model import create_vgg19, train_model, plot_model_performance, plot_model_history

### Global variables

In [0]:
ROOT_PATH = 'drive/My Drive/master1/medical_image_recognition/'
DATASET_NAME = 'kvasir_v2'
DATASET_PATH = ROOT_PATH + 'datasets/' + DATASET_NAME + '/'
ARRAY_PATH = ROOT_PATH + 'arrays/' + DATASET_NAME + '/'
MODEL_PATH = ROOT_PATH + 'models/' + DATASET_NAME + '/'

HEIGHT, WIDTH, CHANNELS = 224, 224, 3

# the next instructions are used to make results reproducible
SEED = 1234
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(SEED)
tf.set_random_seed(SEED)
aug.seed(SEED)

### Get the data class helper 

In [0]:
dataclass = data_helper(DATASET_NAME,
                        DATASET_PATH,
                        ARRAY_PATH,
                        height=HEIGHT,
                        width=WIDTH,
                        channels=CHANNELS,
                        histogram_equalization=False)

print(dataclass.directories)
print(dataclass.labels)

### Load the data

In [0]:
#dataclass.save_images()
dataclass.get_images()

train_x = dataclass.x['train']
train_y = dataclass.y['train']
test_x  = dataclass.x['test']
test_y  = dataclass.y['test']
val_x   = dataclass.x['val']
val_y   = dataclass.y['val']

### Show images

In [0]:
#dataclass.show_image('train', 0)

## CNN

In [0]:
import pandas as pd
df = pd.DataFrame(train_y, columns=dataclass.labels.keys())

for column in df.columns:
    print(f'{column} : {np.array(df[df[column] == 1]).sum()}')

In [0]:
class_weight_mapping = None #{0: 1., 1: 3.}

In [0]:
save_model_path = MODEL_PATH + 'vgg19blocks_fc1.hdf5'
save_history_path = MODEL_PATH + 'vgg19blocks_fc1.history'

model = load_model(save_model_path)
preds = np.argmax(model.predict(test_x), axis=-1)
ground_truth = np.argmax(test_y, axis=-1)

plot_model_performance(preds, ground_truth, dataclass.labels.keys())
plot_model_history(save_history_path)

In [0]:
save_reload_model_path = MODEL_PATH + 'vgg19blocks_fc1_reload.hdf5'
save_reload_history_path = MODEL_PATH + 'vgg19blocks_fc1_reload.history'

# training
train_model(model,
            train_x,
            train_y,
            val_x,
            val_y,
            save_reload_model_path,
            epochs=300,
            batch_size=32,
            metric='val_loss',
            save_best_only=True,
            save_weights_only=False,
            stop_after=20,
            save_history=True,
            save_history_path=save_reload_history_path,
            class_weight_mapping=class_weight_mapping)

## Check metrics

In [0]:
model = load_model(save_model_path)

In [0]:
preds = np.argmax(model.predict(test_x), axis=-1)
ground_truth = np.argmax(test_y, axis=-1)

plot_model_performance(preds, ground_truth, dataclass.labels.keys())
plot_model_history(save_history_path)