In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from keras.models import Sequential
from keras.layers import Dense,Dropout
from scipy import stats
pd.set_option('display.max_rows',None)
import warnings
warnings.filterwarnings('ignore')

In [None]:
df=pd.read_csv('../input/breast-cancer-wisconsin-data/data.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.drop(['id','Unnamed: 32'],axis=1,inplace=True)

In [None]:
df.shape

In [None]:
sns.countplot(df['diagnosis'])

In [None]:
y=df['diagnosis']
df.drop(['diagnosis'],axis=1,inplace=True)

In [None]:
features_mean=list(df.columns[:10])
features_se=list(df.columns[10:20])
features_worst=list(df.columns[20:30])

In [None]:
num_rows,num_cols=5,2
fig,axes=plt.subplots(num_rows,num_cols,figsize=(20,12))
for index,column in enumerate(df[features_mean].columns):
    i,j=(index//num_cols,index%num_cols)
    g=sns.distplot(df[column],color='m',label='%.2f'%(df[column].skew()),ax=axes[i,j])
    g=g.legend(loc='best')
    

In [None]:
df_b=df[y=='B']
df_m=df[y=='M']
num_rows,num_cols=5,2
fig,axes=plt.subplots(num_rows,num_cols,figsize=(20,10))
for index,column in enumerate(df[features_mean].columns):
    i,j=index//num_cols,index%num_cols
    print(i,j)
    g=sns.kdeplot(df_b[column],color='red',shade=True,ax=axes[i,j])
    g=sns.kdeplot(df_m[column],color='blue',shade=True,ax=axes[i,j])


In [None]:
plt.figure(figsize=(12,10))
corr_matrix=df[features_mean].corr()
mask=np.triu(np.ones_like(corr_matrix,dtype=np.bool))
sns.heatmap(corr_matrix,mask=mask,square=True,annot=True,fmt='.2f')

In [None]:
upper_triangle=corr_matrix.where(np.triu(np.ones(corr_matrix.shape),k=1).astype(np.bool))
upper_triangle
mean_set_to_drop=[column for column in upper_triangle.columns if any(upper_triangle[column]>0.95)]
mean_set_to_drop

In [None]:
plt.figure(figsize=(20,12))
corr_full=df.corr().abs()
mask=np.triu(np.ones_like(corr_full,dtype=np.bool))
sns.heatmap(corr_full,mask=mask,fmt='.2f',square=True,annot=True,cmap='YlGnBu')

In [None]:
upper_triangle_full=corr_full.where(np.triu(np.ones(corr_full.shape),k=1).astype(np.bool))
all_columns_to_be_removed=[column for column in upper_triangle_full.columns if any(upper_triangle_full[column]>0.95)]
all_columns_to_be_removed


In [None]:
df=df.drop(all_columns_to_be_removed,axis=1)

In [None]:
df.head()

In [None]:
X=df



In [None]:
X_cols=X.columns

In [None]:
from sklearn.preprocessing import RobustScaler
scale=RobustScaler()
X_scale=scale.fit_transform(X)


In [None]:
X_scale_df=pd.DataFrame(X_scale,columns=X_cols)

In [None]:
X_scale_df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
y=le.fit_transform(y)

In [None]:
y=pd.Series(y)

In [None]:
y.value_counts()

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X_scale_df,y,test_size=0.25,stratify=y)

In [None]:
model=Sequential()
model.add(Dense(64,input_dim=23,activation='relu'))
model.add(Dense(128,activation='relu'))
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(128,activation='relu'))
model.add(Dense(256,activation='relu'))
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(512,activation='relu'))
model.add(Dense(1024,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])


In [None]:
model.summary()

In [None]:
model.fit(X_train,y_train,batch_size=50,epochs=150)

In [None]:
y_pred=model.predict(X_test)

In [None]:
y_pred=(y_pred>0.5)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy=accuracy_score(y_test,y_pred)
print('Accuracy is {:.2f}%'.format(accuracy*100))