Applying CNN on Breast Cancer Detection dataset

Importing the important libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

Import the dataset

In [None]:
df=pd.read_csv('../input/breast-cancer-wisconsin-data/data.csv')
df.head()

In [None]:
# check the shape of the data
df.shape

We have 33 columns here,we will create independent and dependent variable , in independent variable, we will drop the columns having less relevance in this dataset 'Unnamed: 32', 'id' and also we will drop the diagnosis column as it is will used in the dependent target variable.

In [None]:
# independent variable
X=df.drop(['Unnamed: 32','id','diagnosis'],axis=1)
X.head()

In [None]:
# Dependent Variable
y=df.diagnosis
y

Here our dependent variable is in categorical format, we will encode this variable by using label encoder

In [None]:
# library
from sklearn.preprocessing import LabelEncoder
label=LabelEncoder()

In [None]:
y=label.fit_transform(y)
y

We will divide this data into training and testing data

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=99)

# checking the shape of the data
X_train.shape,X_test.shape,y_train.shape,y_test.shape

In [None]:
X_train

we can see that our data is highly varied we will scale the data into a range so that our model performs better. We will scale the data using Standard Scaler

In [None]:
# import library
from sklearn.preprocessing import StandardScaler
# scale
scale=StandardScaler()
X_train=scale.fit_transform(X_train)
X_test=scale.transform(X_test)

In [None]:
X_train, X_test

In [None]:
# shape
X_train.shape, X_test.shape

We will reshape the data for our CNN model


In [None]:
X_train=X_train.reshape(X_train.shape[0],X_train.shape[1],1)
X_test=X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [None]:
# check the shape again
X_train.shape, X_test.shape

Now that our data is preprocessed we will Build a Model

In [None]:
# Import the libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Conv1D,Dropout,BatchNormalization
from tensorflow.keras.optimizers import Adam

In [None]:
# model
model=Sequential()
# layers
model.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(30,1)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv1D(filters=64, kernel_size=2, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid'))

In [None]:
# checking the summary
model.summary()

In [None]:
# compiling the model
model.compile(optimizer=Adam(learning_rate=0.00005),loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
%%time
#fitting the model
history=model.fit(X_train,y_train,epochs=50, validation_data=(X_test,y_test))

In [None]:
# plotting
pd.DataFrame(history.history).plot(figsize=(10,8))
plt.grid(True)
plt.show()