![image.png](attachment:42e5c8f1-3222-471a-98f1-c282455d6bc5.png)

**Attribute Information:**
   * ID number
   * Diagnosis (M = malignant, B = benign)
   
**Ten real-valued features are computed for each cell nucleus:**
   * radius (mean of distances from center to points on the perimeter)
   * texture (standard deviation of gray-scale values)
   * perimeter
   * area
   * smoothness (local variation in radius lengths)
   * compactness (perimeter^2 / area - 1.0)
   * concavity (severity of concave portions of the contour)
   * concave points (number of concave portions of the contour)
   * symmetry
   * fractal dimension ("coastline approximation" - 1)

In [None]:
# importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.express as px
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

plt.style.use('fivethirtyeight')
%matplotlib inline

In [None]:
# read the dataset
df=pd.read_csv('/kaggle/input/breast-cancer-wisconsin-data/data.csv')
df.head()

In [None]:
df.shape 

In [None]:
#information about the dataset
df.info()

In [None]:
#description about the dataset
df.describe()

In [None]:
# dropping unncessary columns
df.drop("Unnamed: 32",axis=1,inplace=True)


In [None]:
#checking missing values
df.isnull().sum()

In [None]:
import missingno as msno
msno.bar(df)

# Exploratory Data analysis

## Univariate Analysis

In [None]:
fig= px.histogram(df, x='diagnosis',color='diagnosis', barmode='group')
fig.show()

In [None]:
df.columns

In [None]:
fig = px.pie(df, values='radius_mean', names='diagnosis', title='Relation')
fig.show()

## Multivariate Analysis

### Positive Correlation

In [None]:
#postive correlation
fig,ax=plt.subplots(2,2,figsize=(20,25))
sns.scatterplot(x='perimeter_mean',y='radius_worst',data=df,hue='diagnosis',ax=ax[0][0])
sns.scatterplot(x='area_mean',y='radius_worst',data=df,hue='diagnosis',ax=ax[1][0])
sns.scatterplot(x='texture_mean',y='texture_worst',data=df,hue='diagnosis',ax=ax[0][1])
sns.scatterplot(x='area_worst',y='radius_worst',data=df,hue='diagnosis',ax=ax[1][1])
plt.show()

### 2.Negative Correlation

In [None]:
#negative correlation
fig,ax=plt.subplots(2,2,figsize=(20,25))
sns.scatterplot(x='area_mean',y='fractal_dimension_mean',data=df,hue='diagnosis',ax=ax[0][0])
sns.scatterplot(x='radius_mean',y='smoothness_se',data=df,hue='diagnosis',ax=ax[1][0])
sns.scatterplot(x='smoothness_se',y='perimeter_mean',data=df,hue='diagnosis',ax=ax[0][1])
sns.scatterplot(x='area_mean',y='smoothness_se',data=df,hue='diagnosis',ax=ax[1][1])

### Scatterplot using plotly

In [None]:
fig = px.scatter(df,x='radius_mean',y='perimeter_mean',color='diagnosis',size_max=60)
fig.show()


In [None]:
fig2=px.scatter(df,x='texture_worst',y= 'symmetry_worst',color='diagnosis',size_max=60)
fig2.show()

### Distribution plot

In [None]:
import plotly.figure_factory as ff


hist_data = [df['radius_mean']]
group_labels = ['distplot'] # name of the dataset

fig = ff.create_distplot(hist_data, group_labels)
fig.show()

### Correlation metric

In [None]:
fig, ax = plt.subplots(figsize=(20,15)) 
sns.heatmap(df.corr(),ax=ax,annot=True,linewidth=.5)

# Data Preprocessing

In [None]:
X=df.iloc[:,2:].values
y=df.iloc[:,1].values

In [None]:
from sklearn.preprocessing import LabelEncoder,StandardScaler
labelencode = LabelEncoder()
y=labelencode.fit_transform(y)

In [None]:
#train test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=123)

In [None]:
#applying standard scaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [None]:
X_train

# Keras for classification

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense

In [None]:
classifier = Sequential()

In [None]:
#adding the input and first hidden layer

classifier.add(Dense(16, activation='relu', kernel_initializer='glorot_uniform',input_dim=30))

#adding second layer
classifier.add(Dense(6, activation='relu', kernel_initializer='glorot_uniform'))

#adding the output layer
classifier.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))

In [None]:
classifier.compile(optimizer='Adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
classifier.fit(X_train,y_train,batch_size=100,epochs=150)

In [None]:
y_pred=classifier.predict(X_test)
y_pred = (y_pred>0.5)

In [None]:
from sklearn.metrics import accuracy_score


print(f"The test accuracy is very high i.e.{accuracy_score(y_test,y_pred)}")

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred)
cm

In [None]:
sns.heatmap(cm,annot=True)


## Using Keras, We get a good model accuracy i.e. greater than 98%.

### If you like the work ,Upvote it.
### Any suggestion is welcomed.