# Import namespaces

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import matplotlib.image as mpimg

from sklearn.model_selection import train_test_split

import pickle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
from tensorflow.keras import layers
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, plot_confusion_matrix
import itertools

# Helper Functions

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

# Load Dataset

In [None]:
# Load the training data into a DataFrame named 'train'. 
# Print the shape of the resulting DataFrame. 
# You do not need the test data in this notebook. 

train = pd.read_csv(f'../input/histopathologic-cancer-detection/train_labels.csv', dtype=str)

print('Training Set Size:', train.shape)

train.head()

In [None]:
# # Lets play with 1% data to check if all code works
# # Comment this when running the entire code
# ignore, train = train_test_split(train, test_size=0.05, random_state=1, stratify=train.label)
# print('Training Set Size:', train.shape)

Lets update the dataset to include filename extensions

In [None]:
train['id'] = train['id'].apply(lambda x: f'{x}.tif')
train.head()

Since we don't have access to the test labels, we split the dataset and use the test split for this purpose.

In [None]:
train_df, valid_df = train_test_split(train, test_size=0.2, random_state=1, stratify=train.label)

print(train_df.shape)
print(valid_df.shape)

# Load saved model

In [None]:
cnn = keras.models.load_model('../input/cancer-models/cancer_model_v01_version10.h5')
cnn.summary()

# Data Generator

In [None]:
BATCH_SIZE = 64
train_path = "../input/histopathologic-cancer-detection/train"
print('Training Images:', len(os.listdir(train_path)))

valid_datagen = ImageDataGenerator(rescale=1/255)

valid_loader = valid_datagen.flow_from_dataframe(
    dataframe = valid_df,
    directory = train_path,
    x_col = 'id',
    y_col = 'label',
    batch_size = BATCH_SIZE,
    seed = 1,
    shuffle = True,
    class_mode = 'categorical',
    target_size = (96,96)
)

# Test Predictions

In [None]:
#change to validation dataset
valid_probs = cnn.predict(valid_loader)
print(valid_probs.shape)

# Evaluate the model

In [None]:
y_val = valid_loader.classes
y_pred = np.argmax(valid_probs, axis=1)
target_names = ['Benign (No Cancer)', 'Malignant (Has Cancer)']

## Accuracy

Accuracy is one metric for evaluating classification models. Informally, accuracy is the fraction of predictions our model got right. Formally, accuracy has the following definition. 

$
Accuracy=\frac{Number of correct predictions}{Number of total predictons}
$

In [None]:
accuracy = accuracy_score(y_val, y_pred)
print('For our model, the accuracy is %f' % accuracy)

## Confusion matrix

A confusion matrix tell us the percentage of examples from each class in our test set that our model predicted correctly. In the case of an imbalanced dataset like the one we're dealing with, this is a better measure of our model's performance than overall accuracy.

In [None]:
# Compute confusion matrix
cm=confusion_matrix(y_val, y_pred)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cm, target_names, title='Confusion matrix')

In [None]:
tp = cm[0][0] # actual Benign and predicted Benign
fn = cm[0][1] # actual Benign and predicted Malignant
tn = cm[1][0] # actual Malignant and predicted Benign
fp = cm[1][1] # actual Malignant and predicted Malignant
print("The preceding confusion matrix shows that of the", tp + fn, "samples that were Benign,",
      "the model correctly classified ", tp, "as Benign (", tp, "true positives),",
      "and incorrectly classified ", fn, "as Malignant (", fn, "false negative).")
print("Similarly, of", tn + fp, "samples that actually were Malignant, ", tn, " were correctly classified (", tn, "true negatives)", 
      "and", fp, "were incorrectly classified (", fp, "false positives).")

## Classification report

Classification report allows us to look at Precision and Recall.

Precision is defined as follows

$
Precision\ =\ \frac{TP}{TP+FP}
$

Precision helps us answer the question _"What proportion of positive identifications was actually correct?"_

Recall is is defined as follows

$
Recall\ =\ \frac{TP}{TP+FN}
$

Recall helps us answers the question _"What proportion of actual positives was identified correctly?"_


In [None]:
print(classification_report(y_val, y_pred, target_names=target_names))