## Import Statements

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

## Load DataFrame


In [None]:
test = pd.read_csv('../input/histopathologic-cancer-detection/sample_submission.csv')

print('Test Set Size:', test.shape)

In [None]:
test.head()

In [None]:
train_data = pd.read_csv('/kaggle/input/histopathologic-cancer-detection/train_labels.csv')
train_data.head()

In [None]:
y = train_data['label']
train_df, valid_df = train_test_split(train_data, test_size=0.2, random_state=45, stratify=y)

print(train_df.shape)
print(valid_df.shape)

In [None]:
def append_ext(fn):
    return fn+".tif"

In [None]:
train_df['id'] = train_df['id'].apply(append_ext)
valid_df['id'] = valid_df['id'].apply(append_ext)
valid_df.head()

In [None]:
y_valid = valid_df['label']

## Extract Test Images

In [None]:
test_path = "../input/histopathologic-cancer-detection/test/"
print('Test Images:', len(os.listdir(test_path)))

In [None]:
test['id'] = test['id'].apply(append_ext)
test.head()

## Data Generator

In [None]:
train_path = '../input/histopathologic-cancer-detection/train'


In [None]:
BATCH_SIZE = 64

test_datagen = ImageDataGenerator(rescale=1/255)
valid_datagen = ImageDataGenerator(rescale=1/255)

test_loader = test_datagen.flow_from_dataframe(
    dataframe = test,
    directory = test_path,
    x_col = 'id',
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = None,
    target_size = (32,32)
)


valid_loader = valid_datagen.flow_from_dataframe(
    dataframe = valid_df,
    directory = train_path,
    x_col = 'id',
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = None,
    target_size = (32,32)
)

## Load Model

In [None]:
cnn = keras.models.load_model('../input/cancer-detection-models/cancer_detection_model_v25.h5')
cnn.summary()

## Test Predictions

In [None]:
test_probs = cnn.predict(test_loader)
print(test_probs.shape)

In [None]:
print(test_probs[:10,].round(2))

In [None]:
valid_probs = cnn.predict(valid_loader)
print(valid_probs.shape)

In [None]:
valid_pred = np.argmax(valid_probs, axis=1)
print(valid_pred[:10])

In [None]:
cm = confusion_matrix(y_valid, valid_pred)
cm_df = pd.DataFrame(cm)
cm_df

In [None]:
pd.DataFrame(cm, index=['True No', 'True Has'], 
            columns=['Pred No', 'Pred Has'] )

- We can see 20432 sample is correctly predicted as No cancer, and 16163 samples is correctly predicted as Has Cancers. 
- There are 5750 samples predicted as has cancer, but actually dont't have cancer.
- Ther are 1660 samples predicted as has no-cancer, but actually have cancer.

In [None]:
print(classification_report(y_valid, valid_pred))

## Prepare Submission

In [None]:
submission = pd.read_csv('../input/histopathologic-cancer-detection/sample_submission.csv')
submission.head()

In [None]:
submission.label = test_probs[:,1]
submission.head()

In [None]:
submission.to_csv('submission.csv', header=True, index=False)