In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!apt-get install p7zip-full p7zip-rar
!7za  x "/content/drive/My Drive/ML/Species.zip"

Reading package lists... Done
Building dependency tree       
Reading state information... Done
p7zip-full is already the newest version (16.02+dfsg-6).
p7zip-rar is already the newest version (16.02-2).
0 upgraded, 0 newly installed, 0 to remove and 39 not upgraded.

7-Zip (a) [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.30GHz (306F0),ASM,AES-NI)

Scanning the drive for archives:
  0M Scan /content/drive/My Drive/ML/                                     1 file, 561480818 bytes (536 MiB)

Extracting archive: /content/drive/My Drive/ML/Species.zip
--
Path = /content/drive/My Drive/ML/Species.zip
Type = zip
Physical Size = 561480818

  0%      0% 2 - Species/Acinetobacter.baumanii/Acinetobacter.baumanii_0001.tif          

In [None]:
import numpy as np
import cv2
import os
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, EarlyStopping

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve,auc

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


files = os.listdir('Species')

factor = 3

label = 0;
y = []
x = []
classes = []
for file in files:
  print(file)
  images = os.listdir('Species/' + file)
  for image in images:
    image_path = 'Species/' + file + '/' + image
    classes.append(file)
    image = cv2.imread(image_path)
    if type(image) != type(None):
      image = cv2.resize(image, (int(image.shape[0] / factor), int(image.shape[1] / factor)))
      x.append(image)
      y.append(label)
  label = label + 1

print('\nDone\n')
classes = []
for file in files:
  classes.append(file)

print('Creating x & y as numpy')

x = np.array(x).astype(np.float32)
y = np.array(y).astype(np.float32)

print('Creating 5 Folds')

folds = []
for train_index, test_index in StratifiedKFold(n_splits=2).split(x, y):
  folds.append([train_index, test_index])

print('\nDone\n')

Bifidobacterium.spp
Actinomyces.israeli
Acinetobacter.baumanii
Candida.albicans
Bacteroides.fragilis

Done

Creating x & y as numpy
Creating 5 Folds

Done



In [None]:
def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss=history.history['loss']
    val_loss=history.history['val_loss']

    epochs_range = range(epochs)

    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

def show_con(model, x, y):

  con_mat = tf.math.confusion_matrix(labels=y, predictions=model.predict(x).argmax(1)).numpy()
  con_mat_norm = np.around(con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis], decimals=2)

  con_mat_df = pd.DataFrame(con_mat_norm, index = classes, columns = classes)

  figure = plt.figure(figsize=(16, 16))
  sns.heatmap(con_mat_df, annot=True,cmap=plt.cm.Blues)
  plt.tight_layout()
  plt.ylabel('True label')
  plt.xlabel('Predicted label')
  plt.show()

batch_size = 10
epochs = 10

def schedulerExp(epoch, lr):
    return lr * tf.math.exp(-0.1)

def get_callbacks(patience_lr):
    reduce_lr_loss = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=patience_lr, verbose=1, min_delta=1e-5, mode='min')
    learningRateScheduler = LearningRateScheduler(schedulerExp)
    earlyStopping = EarlyStopping(monitor='val_accuracy', patience=5)
    return [reduce_lr_loss, learningRateScheduler]#, earlyStopping]

def create_model(base_model):
    base_model.trainable = True
    global_average_layer = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
    drop = tf.keras.layers.Dropout(0.25)(global_average_layer)
    drop = tf.keras.layers.Dropout(0.5)(drop)
    prediction_layer = tf.keras.layers.Dense(33, activation='softmax')(drop)
    
    lr = 1e-3 

    opt = tf.keras.optimizers.Adam(lr=lr, decay=lr / epochs)
    loss = loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    model = tf.keras.models.Model(inputs=base_model.input, outputs=prediction_layer)
    model.compile(opt, loss, metrics=["accuracy"])
    return model


def fit_model(model, x_train_cv, y_train_cv, x_valid_cv, y_valid_cv):
  
  callbacks = get_callbacks(patience_lr=5)
  history = model.fit(x=x_train_cv, y=y_train_cv, steps_per_epoch=len(x_train_cv) / batch_size, epochs=epochs,
                      batch_size=batch_size, shuffle=True, verbose=1,
                      validation_data = (x_valid_cv, y_valid_cv), callbacks = callbacks)
  return history

base_model = tf.keras.applications.Xception(input_shape=x[0].shape, include_top=False, weights="imagenet")

#base_model = tf.keras.applications.DenseNet201(input_shape=x_train[0].shape, include_top=False, weights="imagenet")
#base_model = tf.keras.applications.InceptionResNetV2(input_shape=x_train[0].shape, include_top=False, weights="imagenet")
#base_model = tf.keras.applications.ResNet152V2(input_shape=x_train[0].shape, include_top=False, weights="imagenet")
#base_model = tf.keras.applications.InceptionV3(input_shape=x_train[0].shape, include_top=False, weights="imagenet")
#base_model = tf.keras.applications.MobileNetV2(input_shape=x_train[0].shape, include_top=False, weights="imagenet")
#base_model = tf.keras.applications.NASNetLarge(input_shape=x_train[0].shape, include_top=False, weights="imagenet")
#base_model = tf.keras.applications.EfficientNetB7(input_shape=x_train[0].shape, include_top=False, weights="imagenet")

In [None]:
#fold 5

start = 1
end = 2

cv_sum = 0;

cv_acc_list = []

historys = []

for i in range(start-1, end):
  print('\nFold ',i)
  print('                   ')

  x_train_cv = x[folds[i][0]]
  y_train_cv = y[folds[i][0]]
  x_test_cv = x[folds[i][1]]
  y_test_cv= y[folds[i][1]]
  model = create_model(base_model)
  history = fit_model(model, x_train_cv, y_train_cv, x_test_cv, y_test_cv)

  pred = model.predict(x_test_cv)
  acc = accuracy_score(y_test_cv, pred.argmax(1))
  cv_acc_list.append(acc)
  print('CV Evaluate:', model.evaluate(x_test_cv, y_test_cv))
  cv_sum = cv_sum + acc 

  plot_history(history)
  show_con(model, x_test_cv, y_test_cv)

  historys.append(history.history)

print('5 Fold Cv Avg: ', cv_sum / 5, 'List:', cv_acc_list)
print('Model: ', base_model.name)


Fold  0
                   


  super(Adam, self).__init__(name, **kwargs)


Epoch 1/10


  return dispatch_target(*args, **kwargs)


ResourceExhaustedError: ignored

In [None]:
import numpy as np
import cv2
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.lines as mlines

def plot_folds_history_acc(historys):

  val_accuracy_1 = historys[0]['val_accuracy']
  val_accuracy_2 = historys[1]['val_accuracy']
  val_accuracy_3 = historys[2]['val_accuracy']
  val_accuracy_4 = historys[3]['val_accuracy']
  val_accuracy_5 = historys[4]['val_accuracy']
  
  epochs_range = range(50)

  plt.figure(figsize=(20, 10))
    
  plt.plot(val_accuracy_0, 'ob')
  plt.plot(val_accuracy_0, 'b')
  b_circle_line = mlines.Line2D([], [], color='b', marker='o', label='Fold 1')

  plt.plot(val_accuracy_1, 'og')
  plt.plot(val_accuracy_1, 'g')
  g_circle_line = mlines.Line2D([], [], color='g', marker='o', label='Fold 2')
    
  plt.plot(val_accuracy_2, 'or')
  plt.plot(val_accuracy_2, 'r')
  r_circle_line = mlines.Line2D([], [], color='r', marker='o', label='Fold 3')

  plt.plot(val_accuracy_3, 'om')
  plt.plot(val_accuracy_3, 'm')
  m_circle_line = mlines.Line2D([], [], color='m', marker='o', label='Fold 4')

  plt.plot(val_accuracy_4, 'oy')
  plt.plot(val_accuracy_4, 'y')
  y_circle_line = mlines.Line2D([], [], color='y', marker='o', label='Fold 5')

  plt.grid(True)

  plt.legend(loc='lower right', handles=[b_circle_line, g_circle_line, r_circle_line, m_circle_line, y_circle_line], title='AVG ' + str(round(cv_sum / 5 * 100, 2)), framealpha=1, shadow=True, borderpad=1)
  plt.xlabel('Epochs')
  plt.ylabel('Accuracy')
    
  plt.show()


def plot_folds_history_lr(historys):

  lr_1 = historys[0]['lr']
  lr_2 = historys[1]['lr']
  lr_3 = historys[2]['lr']
  lr_4 = historys[3]['lr']
  lr_5 = historys[4]['lr']

  epochs_range = range(50)

  plt.figure(figsize=(20, 10))
    
  plt.plot(lr_1, 'ob')
  plt.plot(lr_1, 'b')
  b_circle_line = mlines.Line2D([], [], color='b', marker='o', label='Fold 1')

  plt.plot(lr_2, 'og')
  plt.plot(lr_2, 'g')
  g_circle_line = mlines.Line2D([], [], color='g', marker='o', label='Fold 2')
    
  plt.plot(lr_3, 'or')
  plt.plot(lr_3, 'r')
  r_circle_line = mlines.Line2D([], [], color='r', marker='o', label='Fold 3')

  plt.plot(lr_4, 'om')
  plt.plot(lr_4, 'm')
  m_circle_line = mlines.Line2D([], [], color='m', marker='o', label='Fold 4')

  plt.plot(lr_5, 'oy')
  plt.plot(lr_5, 'y')
  y_circle_line = mlines.Line2D([], [], color='y', marker='o', label='Fold 5')

  plt.grid(True) 

  plt.legend(loc='lower right', handles=[b_circle_line, g_circle_line, r_circle_line, m_circle_line, y_circle_line], framealpha=1, shadow=True, borderpad=1)
  plt.xlabel('Epochs')
  plt.ylabel('Learning Rate')
    
  plt.show()


def plot_folds_history_loss(historys):

  loss_1 = historys[0]['loss']
  loss_2 = historys[1]['loss']
  loss_3 = historys[2]['loss']
  loss_4 = historys[3]['loss']
  loss_5 = historys[4]['loss']


  epochs_range = range(50)

  plt.figure(figsize=(20, 10))

  plt.grid(True) 
  
  plt.plot(loss_1, 'ob')
  plt.plot(loss_1, 'b')
  b_circle_line = mlines.Line2D([], [], color='b', marker='o', label='Fold 1')

  plt.plot(loss_2, 'og')
  plt.plot(loss_2, 'g')
  g_circle_line = mlines.Line2D([], [], color='g', marker='o', label='Fold 2')
    
  plt.plot(loss_3, 'or')
  plt.plot(loss_3, 'r')
  r_circle_line = mlines.Line2D([], [], color='r', marker='o', label='Fold 3')

  plt.plot(loss_4, 'om')
  plt.plot(loss_4, 'm')
  m_circle_line = mlines.Line2D([], [], color='m', marker='o', label='Fold 4')

  plt.plot(loss_5, 'oy')
  plt.plot(loss_5, 'y')
  y_circle_line = mlines.Line2D([], [], color='y', marker='o', label='Fold 5')

  plt.legend(loc='lower right', handles=[b_circle_line, g_circle_line, r_circle_line, m_circle_line, y_circle_line], framealpha=1, shadow=True, borderpad=1)
  plt.xlabel('Epochs')
  plt.ylabel('Tranining Loss')
    
  plt.show()

def plot_folds_history_loss_val(historys):

  val_loss_1 = historys[0]['val_loss']
  val_loss_2 = historys[1]['val_loss']
  val_loss_3 = historys[2]['val_loss']
  val_loss_4 = historys[3]['val_loss']
  val_loss_5 = historys[4]['val_loss']

  epochs_range = range(50)

  plt.figure(figsize=(20, 10))
    
  plt.plot(val_loss_1, 'ob')
  plt.plot(val_loss_1, 'b')
  b_circle_line = mlines.Line2D([], [], color='b', marker='o', label='Fold 1')

  plt.plot(val_loss_2, 'og')
  plt.plot(val_loss_2, 'g')
  g_circle_line = mlines.Line2D([], [], color='g', marker='o', label='Fold 2')
    
  plt.plot(val_loss_3, 'or')
  plt.plot(val_loss_3, 'r')
  r_circle_line = mlines.Line2D([], [], color='r', marker='o', label='Fold 3')

  plt.plot(val_loss_4, 'om')
  plt.plot(val_loss_4, 'm')
  m_circle_line = mlines.Line2D([], [], color='m', marker='o', label='Fold 4')

  plt.plot(val_loss_5, 'oy')
  plt.plot(val_loss_5, 'y')
  y_circle_line = mlines.Line2D([], [], color='y', marker='o', label='Fold 5')

  plt.grid(True) 

  plt.legend(loc='lower right', handles=[b_circle_line, g_circle_line, r_circle_line, m_circle_line, y_circle_line], framealpha=1, shadow=True, borderpad=1)
  plt.xlabel('Epochs')
  plt.ylabel('validation Loss')
    
  plt.show()


plot_folds_history_acc(historys)
plot_folds_history_lr(historys)
plot_folds_history_loss(historys)
plot_folds_history_loss_val(historys)