In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import accuracy_score

In [None]:
data = pd.read_csv('../input/prostate-cancer/Prostate_Cancer.csv')
data.head()

In [None]:
data.info()

In [None]:
print(data.shape)

In [None]:
data.describe()

# EDA

In [None]:
plt.figure(figsize=(10,8))
plt.bar(list(data['diagnosis_result'].value_counts().index), data['diagnosis_result'].value_counts(), color = ['g','r'])
plt.title('Diagnosis Result')
plt.show()
print(data['diagnosis_result'].value_counts())

In [None]:
sns.clustermap(data.corr(),annot=True)

In [None]:
data['perimeter x area'] = data['perimeter'] * data['area'] # feature crosses

In [None]:
data.head()

In [None]:
sns.pairplot(data, hue = 'diagnosis_result')

# Preprocess

In [None]:
target_dict = {'M': 1, 'B':0}
data['diagnosis_result'] = data['diagnosis_result'].map(target_dict)
data.head()

In [None]:
data['id'].nunique()

In [None]:
data = data.drop('id', axis=1)
data.head()

# Train - Test Split

In [None]:
y = data['diagnosis_result']
X = data.drop('diagnosis_result',axis=1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 42)

In [None]:
print('X_train Shape : {} | y_train Shape : {}'.format(X_train.shape, y_train.shape))
print('X_test  Shape : {} | y_test  Shape : {}'.format(X_test.shape, y_test.shape))

### Scale Data

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
pd.DataFrame(X_train, columns= data.columns[:-1]).head()

In [None]:
pd.DataFrame(X_test, columns= data.columns[:-1]).head()

# Create Neural Network

In [None]:
model = Sequential()
model.add(Dense(32,activation = 'relu',input_dim = X_train.shape[1]))
model.add(Dense(64,activation = 'relu'))
model.add(Dense(1,activation = 'sigmoid'))
model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs = 120, validation_data=(X_test,y_test))

In [None]:
plt.figure(figsize=(10,8))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'],color='orange')
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
plt.figure(figsize=(10,8))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()

In [None]:
y_pred = model.predict_classes(X_test)
y_pred

In [None]:
y_test.shape

In [None]:
y_pred = np.squeeze(y_pred)
y_pred.shape

In [None]:
print('Test Accuracy : ',accuracy_score(y_test, y_pred))