## Import Statements

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression

## Load DataFrame


In [None]:
test = pd.read_csv('../input/histopathologic-cancer-detection/sample_submission.csv')

print('Test Set Size:', test.shape)

In [None]:
test.head()

In [None]:
train = pd.read_csv('/kaggle/input/histopathologic-cancer-detection/train_labels.csv')

In [None]:
y_1 = train['label']
train_df_1, valid_df_1 = train_test_split(train, test_size=0.75, random_state=45, stratify=y_1)

y = train_df_1['label']

train_df, valid_df = train_test_split(train_df_1, test_size=0.2, random_state=45, stratify=y)

print(train_df.shape)

## Extract Test Images

In [None]:
test_path = "../input/histopathologic-cancer-detection/test/"
print('Test Images:', len(os.listdir(test_path)))

In [None]:
def append_ext(fn):
    return fn+".tif"

In [None]:
test['id'] = test['id'].apply(append_ext)
test.head()

In [None]:
train_df['id'] = train_df['id'].apply(append_ext)
valid_df['id'] = valid_df['id'].apply(append_ext)
train_df.head()

## Data Generator

In [None]:
BATCH_SIZE = 64

test_datagen = ImageDataGenerator(rescale=1/255)

test_loader_1 = test_datagen.flow_from_dataframe(
    dataframe = test,
    directory = test_path,
    x_col = 'id',
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = None,
    target_size = (32,32)
)

test_loader_2 = test_datagen.flow_from_dataframe(
    dataframe = test,
    directory = test_path,
    x_col = 'id',
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = None,
    target_size = (64,64)
)

In [None]:
train_path = '../input/histopathologic-cancer-detection/train'
train_df['label'] = train_df['label'].astype(str)
valid_df['label'] = valid_df['label'].astype(str)

train_datagen = ImageDataGenerator(rescale=1/255)
valid_datagen = ImageDataGenerator(rescale=1/255)

train_loader_1 = train_datagen.flow_from_dataframe(
    dataframe = train_df,
    directory = train_path,
    x_col = 'id',
    y_col = 'label',
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = 'categorical',
    target_size = (32,32)
)
train_loader_2 = train_datagen.flow_from_dataframe(
    dataframe = train_df,
    directory = train_path,
    x_col = 'id',
    y_col = 'label',
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = 'categorical',
    target_size = (64,64)
)

In [None]:
valid_df.head()

In [None]:
valid_loader_1 = valid_datagen.flow_from_dataframe(
    dataframe = valid_df,
    directory = train_path,
    x_col = 'id',
    y_col = 'label',
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = 'categorical',
    target_size = (32,32)
)

valid_loader_2 = valid_datagen.flow_from_dataframe(
    dataframe = valid_df,
    directory = train_path,
    x_col = 'id',
    y_col = 'label',
    batch_size = BATCH_SIZE,
    shuffle = False,
    class_mode = 'categorical',
    target_size = (64,64)
)

## Load Model

In [None]:
model_1 = keras.models.load_model('../input/cancer-detection-models/cancer_detection_model_v16.h5')

model_1.summary()

model_2 = keras.models.load_model('../input/cancer-detection-models/cancer_detection_model_v04.h5')

model_3 = keras.models.load_model('../input/cancer-detection-models/cancer_detection_model_v01.h5')


## Test Predictions

In [None]:
train_pred_1 = model_1.predict(train_loader_2)[:,1]
train_pred_2 = model_2.predict(train_loader_1)[:,1]
train_pred_3 = model_3.predict(train_loader_1)[:,1]


valid_pred_1 = model_1.predict(valid_loader_2)[:,1]
valid_pred_2 = model_2.predict(valid_loader_1)[:,1]
valid_pred_3 = model_3.predict(valid_loader_1)[:,1]


test_pred_1 = model_1.predict(test_loader_2)[:,1]
test_pred_2 = model_2.predict(test_loader_1)[:,1]
test_pred_3 = model_3.predict(test_loader_1)[:,1]

In [None]:
X_train = pd.DataFrame({
    'model_1': train_pred_1,
    'model_2': train_pred_2,
    'model_3': train_pred_3
})

X_valid = pd.DataFrame({
    'model_1': valid_pred_1,
    'model_2': valid_pred_2,
    'model_3': valid_pred_3
})
y_train = train_df['label']
y_valid = valid_df['label']

X_test = pd.DataFrame({
    'model_1': test_pred_1,
    'model_2': test_pred_2,
    'model_3': test_pred_3
})

In [None]:
X_train.shape


### Logistic Regression

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
lr_clf = LogisticRegression(max_iter=1000, solver='saga', penalty='elasticnet')

lr_parameters = {
    'l1_ratio': [0, 0.25, 0.5, 0.75, 1],
    'C': [0.01, 0.1, 1, 10]
}

lr_grid = GridSearchCV(lr_clf, lr_parameters, cv=10, refit='True', n_jobs=-1, verbose=10, scoring='accuracy')
lr_grid.fit(X_train, y_train)

lr_model = lr_grid.best_estimator_

print('Best Parameters:', lr_grid.best_params_)
print('Best CV Score:  ', lr_grid.best_score_)
print('Training Acc:   ', lr_model.score(X_train, y_train))

In [None]:
test_pred = lr_model.predict_proba(X_test)

In [None]:
print(test_pred.shape)


In [None]:
print(test_pred[:10,].round(2))

### Evaluation

In [None]:
valid_probs = lr_model.predict_proba(X_valid)

In [None]:
print(valid_probs.shape)

In [None]:
valid_pred = np.argmax(valid_probs, axis=1)
print(valid_pred[:10])

### Confusion matrix 

In [None]:
valid_pred = valid_pred.astype(str)

In [None]:
cm = confusion_matrix(y_valid, valid_pred)
cm_df = pd.DataFrame(cm)
cm_df

In [None]:
pd.DataFrame(cm, index=['True No', 'True Has'], 
            columns=['Pred No', 'Pred Has'] )

### Classification Report

In [None]:
print(classification_report(y_valid, valid_pred))

## Prepare Submission

In [None]:
submission = pd.read_csv('../input/histopathologic-cancer-detection/sample_submission.csv')
submission.head()

In [None]:

submission.label = test_pred[:,1]
submission.head()

In [None]:
submission.to_csv('submission.csv', header=True, index=False)