In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("../input/breast-cancer-wisconsin-data/data.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df = df.drop(["Unnamed: 32", "id"], axis=1)

In [None]:
df.describe().transpose()

# **Data Analysis**

In [None]:
sns.countplot(x="diagnosis", data=df)

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()

In [None]:
df["diagnosis"] = le.fit_transform(df["diagnosis"])

In [None]:
le.classes_

In [None]:
f, ax = plt.subplots(figsize=(16, 12))
plt.title('Pearson Correlation Matrix', fontsize=25)

sns.heatmap(df.corr(), linewidths=0.25, vmax=0.7, square=True, cmap="BuGn",
            linecolor='w', annot=True, annot_kws={"size":8}, cbar_kws={"shrink": .9});

area_mean, radius_mean and perimeter mean are correlated.

compactness_mean, concavity_mean and concave_points mean are correlated. Keep all for now.

radius_se, perimeter_se and area_se are correlated.

radius_worst, perimeter_worst and area_worst are correlated.

compactness_worst, concavity_worst and concave_points_worst are correlated. Keep all for now.

In [None]:
#drop_features = ["radius_mean", "perimeter_mean", "perimeter_se", "area_se", "radius_worst", "area_worst"]
drop_features = ["radius_mean", "perimeter_mean", "perimeter_se", "radius_se", "radius_worst", "perimeter_worst"]
#drop_features = ["area_mean", "perimeter_mean", "area_se", "perimeter_se", "perimeter_worst", "area_worst"]


In [None]:
df = df.drop(drop_features, axis=1)

In [None]:
df.head()

In [None]:
df.corr()["diagnosis"].sort_values().plot(kind="bar")

# **Train Validation Test Split**

In [None]:
X = df.drop("diagnosis", axis=1).values
y = df["diagnosis"].values

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=0)

60% train, 20% validation and 20% test

In [None]:
X_train.shape

In [None]:
X_val.shape

In [None]:
X_test.shape

# **Scaling the Data**

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
X_train = scaler.fit_transform(X_train)

In [None]:
X_val = scaler.transform(X_val)

In [None]:
X_test = scaler.transform(X_test)

# **Model**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout

In [None]:
model = Sequential()

model.add(Dense(24, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(16, activation="relu"))
model.add(Dropout(0.5))

model.add(Dense(1, activation="sigmoid"))

model.compile(loss="binary_crossentropy", optimizer="adam")

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
check_point = ModelCheckpoint("best_model.h5", monitor="val_loss", verbose=1, save_best_only=True)

In [None]:
early_stop = EarlyStopping(monitor="val_loss", patience=1000)

In [None]:
model.fit(x=X_train, y=y_train,
          epochs=10000, callbacks=[check_point, early_stop],
          validation_data=(X_val,y_val), verbose=1)

In [None]:
losses = pd.DataFrame(model.history.history)

In [None]:
losses.plot()

In [None]:
losses["val_loss"].min()

In [None]:
from keras.models import load_model

In [None]:
saved_model = load_model('best_model.h5')

In [None]:
predictions = saved_model.predict_classes(X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print(classification_report(y_test,predictions))

In [None]:
print(confusion_matrix(y_test,predictions))