In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
%matplotlib inline
from PIL import Image
# import PILs
from tensorflow.keras.layers import BatchNormalization
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from __future__ import print_function
import keras
from keras.models import Sequential
#from keras.utils import to_categorical
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.preprocessing.image import ImageDataGenerator

from keras import applications
import keras.optimizers
from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from sklearn.metrics import accuracy_score, f1_score, precision_score, plot_roc_curve, recall_score, mean_squared_error
# from pycaret.regression import *
import pandas as pd
import pickle
import numpy as np
import os
import pathlib

In [2]:
#4

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


In [3]:
def train_val_test_split(path, batch_size):

    batch = batch_size
    img_height = 200
    img_width = 200

    dataset_url = path

    data_dir = pathlib.Path(dataset_url)

    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
      data_dir,
      validation_split=0.80,          # try out 20% as training data
      subset="training",
      image_size=(img_height, img_width),
      batch_size=batch,
      seed = 155)

    val_ds = tf.keras.preprocessing.image_dataset_from_directory(
      data_dir,
      validation_split=0.05,          #try out 5% as validation data
      subset="validation",
      image_size=(img_height, img_width),
      batch_size=batch, seed = 155)

    test_ds = train_ds.take(300) 
    train_ds = train_ds.skip(300)
    
    return train_ds, val_ds, test_ds

In [4]:
train_ds, val_ds, test_ds = train_val_test_split("coad_msi_mss",32)

Found 192312 files belonging to 2 classes.
Using 38463 files for training.
Found 192312 files belonging to 2 classes.
Using 9615 files for validation.


In [10]:

def densenet_model():
    densenet = applications.densenet.DenseNet201(
        weights= 'imagenet',
        include_top=False,
        input_shape=(200,200,3))

    model = Sequential([
        tf.keras.layers.experimental.preprocessing.Rescaling(1./255),
        densenet,
        MaxPooling2D(),
        Conv2D(32, 3, padding='same', activation='relu'),
        MaxPooling2D(),
        Dropout(0.4),
        Conv2D(64, 3, padding='same', activation='relu'),
        Flatten(),
        Dropout(0.4),
        BatchNormalization(),
        Dense(2, activation='softmax')
    ])
    return model



In [11]:
def model_running(epochs):
    model = densenet_model()
    opt = tf.keras.optimizers.SGD(learning_rate=0.01, name="SGD")
    model.compile(optimizer=opt, loss=tf.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
    learn_control = ReduceLROnPlateau(monitor='loss', patience=5, verbose=1, factor=0.2, min_lr=1e-7)
    filepath="best_model.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

    history = model.fit(train_ds, use_multiprocessing = True, workers = 32, epochs=epochs, validation_data=val_ds,
                        callbacks = [learn_control,checkpoint])

In [12]:
model_running(10)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10

Epoch 00001: val_accuracy improved from -inf to 0.66895, saving model to best_model.hdf5
Epoch 2/10

Epoch 00002: val_accuracy improved from 0.66895 to 0.75465, saving model to best_model.hdf5
Epoch 3/10

Epoch 00003: val_accuracy improved from 0.75465 to 0.77015, saving model to best_model.hdf5
Epoch 4/10

Epoch 00004: val_accuracy did not improve from 0.77015
Epoch 5/10

Epoch 00005: val_accuracy did not improve from 0.77015
Epoch 6/10

Epoch 00006: val_accuracy did not improve from 0.77015
Epoch 7/10

Epoch 00007: val_accuracy did not improve from 0.77015
Epoch 8/10

Epoch 00008: val_accuracy did not improve from 0.77015
Epoch 9/10

Epoch 00009: val_accuracy improved from 0.77015 to 0.78097, saving model to best_model.hdf5
Epoch 10/10

Epoch 00010: val_accuracy improved from 0.78097 to 0.84701, saving model to best_model.hdf5

In [10]:
best_model = keras.models.load_model("best_model.hdf5")

In [11]:
def evaluate_model():
    y_pred = []  # store predicted labels
    y_true = []  # store true labels

  # iterate over the dataset
    for image_batch, label_batch in test_ds:   # use dataset.unbatch() with repeat
        # append true labels
        y_true.append(label_batch)
        # compute predictions
        preds = best_model.predict(image_batch)
        # append predicted labels
        y_pred.append(np.argmax(preds, axis = - 1))

    # convert the true and predicted labels into tensors
    correct_labels = tf.concat([item for item in y_true], axis = 0)
    predicted_labels = tf.concat([item for item in y_pred], axis = 0)
  
    print(pd.crosstab(correct_labels, predicted_labels,rownames=["Actual"], colnames=["Predict"], margins= True))
    print(confusion_matrix(predicted_labels, correct_labels))

    return correct_labels, predicted_labels
  

In [12]:
correct_labels, predicted_labels = evaluate_model()

Predict     0     1   All
Actual                   
0        2792   968  3760
1         398  5442  5840
All      3190  6410  9600
[[2792  398]
 [ 968 5442]]


In [13]:
def compute_metrics(true, predicted):
    accuracy = accuracy_score(true, predicted)
    f1 = f1_score(true, predicted)
    precision = precision_score(true, predicted)
    recall = recall_score(true, predicted)
    RMSE = mean_squared_error(true, predicted, squared=False)

    print("accuracy",accuracy)
    print("f1-score", f1)
    print("precision", precision)
    print("recall", recall)
    print("RMSE", RMSE)

    return accuracy, f1, precision, recall, RMSE

accuracy, f1, precision, recall, RMSE = compute_metrics(correct_labels, predicted_labels)

accuracy 0.8577083333333333
f1-score 0.8884897959183672
precision 0.8489859594383775
recall 0.9318493150684931
RMSE 0.3772156765918759


In [None]:
def model_store(accuracy):
    file = open('model_database.csv')
    csvreader = csv.reader(file)
    num_rows=len(list(csvreader))
    if num_rows == 0 or num_rows == 1:
        with open('model_database.csv', 'w', encoding='UTF8', newline='') as csvfile:
            fieldnames = ['model', 'accuracy', 'f1', 'precision', 'recall', 'RMSE']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerow({"model": "best_model", "accuracy":accuracy, 'f1':f1, 
                           'precision':precision, 'recall':recall, 'RMSE':RMSE})
        print("model added to database")
    elif num_rows >1:
        file = open('model_database.csv')
        csvreader = csv.reader(file)
        next(csvreader)
        for row in csvreader:
            if float(row[1]) < accuracy:
                with open('model_database.csv', 'w') as w_file:
                    fieldnames = ['model', 'accuracy', 'f1', 'precision', 'recall', 'RMSE']
                    writer = csv.DictWriter(w_file, fieldnames=fieldnames)
                    writer.writeheader()
                    writer.writerow({"model": "updated_model", "accuracy":accuracy , 'f1':f1, 
                                   'precision':precision, 'recall':recall, 'RMSE':RMSE})
                print("database updated with new model")
            else:
                print("database not updated")
    file.close()
  

In [None]:
model_store(accuracy)

database updated with new model
