In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import classification_report,confusion_matrix

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv("../input/breast-cancer-wisconsin-data/data.csv")

In [None]:
df.head()

### Exploratory Data Analysis

In [None]:
df.columns

In [None]:
### Remove id and Unnamed: 32

df.drop(["Unnamed: 32","id"],axis=1,inplace=True)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe().transpose()

In [None]:
df.isnull().sum() ### No Null Values in the dataset

In [None]:
plt.figure(figsize=(8,5))
ax = sns.countplot(x='diagnosis',data=df)
for p in ax.patches:
    ax.annotate('{:}'.format(p.get_height()), (p.get_x()+0.35, p.get_height()+5),fontweight='bold',color='red')

In [None]:
sns.pairplot(df.loc[:,'diagnosis':'fractal_dimension_mean'],hue='diagnosis')

### Correlation Matrix

In [None]:
label_encoder = LabelEncoder()

In [None]:
df["diagnosis"] = label_encoder.fit_transform(df["diagnosis"])

In [None]:
plt.figure(figsize=(24,15))
sns.heatmap(df.corr(),mask=np.triu(df.corr()),annot=True,fmt='.1f',cmap='viridis')

In [None]:
corr = df.corr()
corr_mat = corr[((corr > 0.6) | (corr < -0.6))]
plt.figure(figsize=(24,15))
sns.heatmap(corr_mat,mask=np.triu(corr_mat),annot=True,fmt='.1f',cmap='viridis')

In [None]:
df.corr()["diagnosis"].sort_values(ascending=False).drop("diagnosis").plot(kind='bar',figsize=(12,6))
plt.title("Overall Correlation of features with Diagnosis")

### Data Preprocessing

In [None]:
X = df.drop('diagnosis',axis=1).values
y = df["diagnosis"].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [None]:
scaler = MinMaxScaler()

In [None]:
X_train = scaler.fit_transform(X_train)

In [None]:
X_test = scaler.transform(X_test)

### Model Creation

In [None]:
X_train.shape

In [None]:
model = Sequential()


model.add(Dense(30,activation='relu'))
model.add(Dense(15,activation='relu'))

##Binary Classification
model.add(Dense(1,activation='sigmoid'))

model.compile(optimizer='adam',loss='binary_crossentropy')


In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=600)

In [None]:
loss = pd.DataFrame(model.history.history)
loss.plot()

### Model seems to overfit the data, so to prevent this we can retrain the model using Earlystop and Dropout layers

In [None]:
early_stop = EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=25)

In [None]:
model = Sequential()


model.add(Dense(30,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(15,activation='relu'))
model.add(Dropout(0.5))

##Binary Classification
model.add(Dense(1,activation='sigmoid'))

model.compile(optimizer='adam',loss='binary_crossentropy')


In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=600,callbacks=[early_stop])

In [None]:
loss_df_drop = pd.DataFrame(model.history.history)
loss_df_drop.plot()

In [None]:
predictions = model.predict_classes(X_test)


In [None]:
print(classification_report(y_true=y_test,y_pred=predictions))

In [None]:
print(confusion_matrix(y_test,predictions))

In [None]:
sns.heatmap(confusion_matrix(y_test,predictions),annot=True,fmt='.0f')

### Conclusion: Model is very good in classifying Beningn and Malignant Classes using Dropout and Earlystopping method by preventing overfitting in Neural Networks