# Detecting Early Stage Knee Osteoarthritis Using Deep Transfer Learning
### By Lokesh Meesala (001078109)

In [None]:
import os
import utils
import glob
import json
import pandas as pd
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg16_preprocess_input
from tensorflow.keras.applications.resnet_v2 import ResNet152V2
from tensorflow.keras.applications.resnet_v2 import preprocess_input as resnet152_preprocess_input

from tensorflow.keras.layers import Dense, Dropout 
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import regularizers

## Parameters and Paths

In [None]:
data_dir = "data/"
exp_dir = "experiments/"
train_dir = os.path.join(data_dir,'train')
test_dir = os.path.join(data_dir,'test')
val_dir = os.path.join(data_dir,'val')
target_labels=['0:Healthy', '1:Doubtful', '2:Minimal', '3:Moderate', '4:Severe']

NUM_CLASSES = 5
IMAGE_SIZE=[224, 224]
BATCH_SIZE= 64

#### Delete Any previously Augmented Data

In [None]:
for i in range(5):
    utils.del_aug_data(train_dir+"/"+str(i)+"/aug")
    utils.del_aug_data(val_dir+"/"+str(i)+"/aug")

### Create Dataframes from Paths

In [None]:
train_df, test_df, val_df = utils.get_data(train_dir, test_dir, val_dir, target_labels)
class_weights = compute_class_weight('balanced', classes=np.array(target_labels), y=train_df.target_label.values)
class_weight_dict = dict(enumerate(class_weights))

### Augment Train Data and Validation Data

In [None]:
utils.augment_dataset(train_df, train_dir, [224, 224], thresh=0.70)
utils.augment_dataset(val_df, val_dir, [224, 224], thresh=0.70)

### Create Dataframes with New Augmented Data

In [None]:
new_train_df, new_test_df, new_val_df = utils.get_data(train_dir, test_dir, val_dir, target_labels)

### Create Data Generators

In [None]:
# Create the Generators
train_data_gen = ImageDataGenerator(preprocessing_function=resnet152_preprocess_input,
                              rotation_range=5,
                              height_shift_range=0.5,
                              horizontal_flip=True,
                              vertical_flip=False,
                              fill_mode='nearest')

val_test_data_gen = ImageDataGenerator(preprocessing_function=resnet152_preprocess_input)

# Train data generator
train_data_w_aug = utils.create_data_gen(new_train_df, train_dir, train_data_gen, IMAGE_SIZE, BATCH_SIZE, shuffle=True)
train_data_wo_aug = utils.create_data_gen(train_df, train_dir, train_data_gen, IMAGE_SIZE, BATCH_SIZE, shuffle=False)

# Validation data generator
val_data_w_aug = utils.create_data_gen(new_val_df, val_dir, val_test_data_gen, IMAGE_SIZE, BATCH_SIZE, shuffle=False)
val_data_wo_aug = utils.create_data_gen(val_df, val_dir, val_test_data_gen, IMAGE_SIZE, BATCH_SIZE, shuffle=False)

# Test data generator
test_data = utils.create_data_gen(new_test_df, test_dir, val_test_data_gen, IMAGE_SIZE, BATCH_SIZE, shuffle = False)

#### Parameters to Save the Experiments

In [None]:
EXP_ID = '04_28_11_B64_AUG_TR_70_TS_70'
DESC = """Selective Augmentation;
RESNET152;
tr 0.7 ts 0.7; 
reducelr factor 0.2 pat 3;
batch_size 64
epochs 100
Dense(128,activation='relu', kernel_regularizer=regularizers.l2(0.01)), 
    Dropout(0.3),
    Dense(128,activation='relu', kernel_regularizer=regularizers.l2(0.01)), 
    Dropout(0.3),
    Dense(32,activation='relu'), 
    Dense(16,activation='relu')
"""
p = os.path.join(exp_dir,EXP_ID+'/')
os.makedirs(p+"/checkpoint", exist_ok=True)
checkpoint_filepath = p+"/checkpoint/weights-best-{epoch:02d}-{val_loss:.2f}.keras"

In [None]:
new_layers_list = [
    Dense(128,activation='relu', kernel_regularizer=regularizers.l2(0.01)), 
    Dropout(0.3),
    Dense(128,activation='relu', kernel_regularizer=regularizers.l2(0.01)), 
    Dropout(0.3),
    Dense(32,activation='relu'), 
    Dense(16,activation='relu')
]

### Create the model
model = utils.create_model(ResNet152V2, IMAGE_SIZE, NUM_CLASSES, new_layers_list, freeze_layers=True, chkp_weights=None)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

model_checkpoint_callback = ModelCheckpoint(filepath=checkpoint_filepath,
                                            monitor='val_loss',
                                            save_best_only=True)

## Fit the Model

In [None]:
model,hist = utils.fit_model(model, train_data_w_aug, val_data_w_aug, epochs=3, callbacks=[reduce_lr, early_stopping, model_checkpoint_callback],
                      class_weight=class_weight_dict)

## Get Plots and Metrics

In [None]:
val_eval_dict = utils.run_prediction_save_metrics(model,val_data_wo_aug,title="validation", plot_metrics=True, return_preds=True)
train_eval_dict = utils.run_prediction_save_metrics(model,train_data_wo_aug,title="train", plot_metrics=True, return_preds=True)
test_eval_dict = utils.run_prediction_save_metrics(model,test_data,title="test", plot_metrics=True, return_preds=True)

## Save the Experiments

In [None]:
utils.save_exp(exp_dir, EXP_ID, model, DESC, test_eval_dict, target_labels,
         model_name=EXP_ID+"_v2", training_hist=hist)

utils.save_exp(exp_dir, EXP_ID, model, DESC, train_eval_dict, target_labels)

utils.save_exp(exp_dir, EXP_ID, model, DESC, val_eval_dict, target_labels)

# Load Best Checkpoint

In [None]:
best_chkp_point = "best_checkpoint/weights-best-72-1.18.hdf5"
model = utils.create_model(ResNet152V2, IMAGE_SIZE, NUM_CLASSES, new_layers_list, freeze_layers=True, chkp_weights=best_chkp_point, )

DESC = DESC+"|best chkpoint"
EXP_ID = "final_best"

val_eval_dict = utils.run_prediction_save_metrics(model,val_data_wo_aug,title="validation_best", plot_metrics=True, return_preds=True)
train_eval_dict = utils.run_prediction_save_metrics(model,train_data_wo_aug,title="train_best", plot_metrics=True, return_preds=True)
test_eval_dict = utils.run_prediction_save_metrics(model,test_data,title="test_best", plot_metrics=True, return_preds=True)

# Analyze all the experiment results

In [None]:
exp_data = []

for p in glob.glob("**/*.json", root_dir=exp_dir, recursive=True):
    f = open(exp_dir+p)
    exp_dict = json.load(f)
    exp_dict['exp_id'] = p.split("\\")[0]
    exp_data.append(exp_dict)

res_df = pd.json_normalize(exp_data)
final_res_df  = res_df.groupby(['exp_id','title']).agg({'f1_score': 'first',
                                       'precision_score':'first',
                                       'recall_score': 'first',
                                       'accuracy_score': 'first',
                                       'description':'first'})

final_res_df.description = final_res_df.description.apply(lambda x: x.strip())

final_res_df