# Quick Deep Learning Project
## Artificial Neural Network for cancer prediction classification <br>

Author: Piotr Druzdzel <br>
E-mail: piotr.druzdzel@gmail.com <br>

Data source: https://github.com/Pierian-Data

#### Timing the script:

In [1]:
from datetime import datetime
startTime = datetime.now()

#### Dark theme fix:

In [None]:
from jupyterthemes import jtplot
jtplot.style(theme='monokai', context='notebook', ticks=True, grid=False)

#### Basic libraries:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### Pandas output visibility:

In [None]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 50)

#### Read the data:

In [None]:
df = pd.read_csv('cancer_classification.csv')
df

#### Basic checks:

In [None]:
df.info()

In [None]:
df.describe().transpose()

In [None]:
df.isnull().sum()

#### Basic EDA:

In [None]:
sns.countplot(df['benign_0__mal_1']);

Quite balanced datased - accuracy can be a good measure.

In [None]:
plt.figure(figsize=(22,10))
sns.heatmap(df.corr());

In [None]:
plt.figure(figsize=(22,8))
(df.corr()['benign_0__mal_1'][:-1].sort_values()).plot(kind='bar');

### Train Test split:

In [None]:
X = df.drop('benign_0__mal_1', axis=1)
y = df['benign_0__mal_1']

In [None]:
from sklearn.model_selection import train_test_split

#train:       60%
#validation:  20% = 0.25x0.8
#test:        20%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

### Scaling:

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

### ANN:

In [None]:
X_train.shape

In [None]:
import tensorflow as tf

ann = tf.keras.models.Sequential()

ann.add(tf.keras.layers.Dense(units=30, activation='relu'))     #first layer
ann.add(tf.keras.layers.Dropout(rate=0.25))                     #first dropout
ann.add(tf.keras.layers.Dense(units=30, activation='relu'))
ann.add(tf.keras.layers.Dropout(rate=0.25))

ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))   #output layer, binary classification problem - sigmoid !

In [None]:
ann.compile(optimizer='adam', 
            loss='binary_crossentropy',
            metrics = ['accuracy']) 

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss',
                           mode='min',             #minimazing the validation loss, 'auto' usually works fine too
                           verbose=2,              #print feedback with the progress
                           patience=100)           #number of epochs to still do after detecting the stopping point

In [None]:
ann.fit(x = X_train, 
        y = y_train,
        validation_data = (X_val, y_val),
        batch_size = 64, 
        epochs = 500,
        callbacks = [early_stop])

### Evaluation:

In [None]:
model_history = pd.DataFrame(ann.history.history)

In [None]:
plt.figure(figsize=(8,4))

plt.plot(model_history['loss'], label='Training');
plt.plot(model_history['val_loss'], label='Validation');

plt.legend(loc='best')
plt.xlabel('Epochs')
plt.title('Model Loss')
plt.show()

In [None]:
plt.figure(figsize=(8,4))

plt.plot(model_history['accuracy'], label='Training');
plt.plot(model_history['val_accuracy'], label='Validation');

plt.legend(loc='best')
plt.xlabel('Epochs')
plt.title('Model Accuracy')
plt.show()

### Predictions:

In [None]:
predictions = ann.predict_classes(X_test)                   #will be deprecated in 2021
#predictions = np.argmax(ann.predict(X_test), axis=-1)      #not giving the same result

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

plt.figure(figsize = (6,5))
sns.heatmap(confusion_matrix(y_test, predictions), 
            cmap='viridis',
            annot=True, fmt="d", annot_kws={'size':18},
            xticklabels = ['Pred. Benign', 'Pred. Malignant'],
            yticklabels = ['Act. Benign', 'Act. Malignant']);

In [None]:
print(classification_report(y_test, predictions, target_names=['Benign', 'Malignant']))