# Business Understanding

Based on several observations/features, predict if the cancer diagnosis is benign or malignant

# Importing Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# Loading Data

In [None]:
df_cancer=pd.read_csv("..//input//breast-cancer-wisconsin-data//data.csv")

In [None]:
list = ['Unnamed: 32','id']
df_cancer = df_cancer.drop(list,axis = 1 )

In [None]:
df_cancer.head()

In [None]:
df_cancer.tail()

# Exploratory Data Analysis / Data Visualization

In [None]:
sns.pairplot(df_cancer, hue='diagnosis',vars = ['radius_mean', 'texture_mean', 'area_mean', 'perimeter_mean', 'smoothness_mean'] )

In [None]:
sns.countplot(df_cancer['diagnosis'],label="count")

In [None]:
sns.scatterplot(x = 'area_mean', y = 'smoothness_mean', hue = 'diagnosis', data = df_cancer)

In [None]:
plt.figure(figsize=(20,10)) 
sns.heatmap(df_cancer.corr(), annot=True) 

# Model Building and Model Evaluation

In [None]:
# Let's drop the target label coloumns
X = df_cancer.drop(['diagnosis'],axis=1)
y = df_cancer['diagnosis']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state=5)

In [None]:
# Normalization
min_train = X_train.min()
min_train

In [None]:
range_train = (X_train - min_train).max()
range_train

In [None]:
X_train_scaled = (X_train - min_train)/range_train

In [None]:
sns.scatterplot(x = X_train['area_mean'], y = X_train['smoothness_mean'], hue = y_train)

In [None]:
sns.scatterplot(x = X_train_scaled['area_mean'], y = X_train_scaled['smoothness_mean'], hue = y_train)

In [None]:
min_test = X_test.min()
range_test = (X_test - min_test).max()
X_test_scaled = (X_test - min_test)/range_test

In [None]:
from sklearn.svm import SVC 
from sklearn.metrics import classification_report, confusion_matrix

svc_model = SVC()

In [None]:
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf']} 

In [None]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=4)

In [None]:
grid.fit(X_train_scaled,y_train)

In [None]:
grid.best_params_

In [None]:
grid.best_estimator_

In [None]:
grid_predictions = grid.predict(X_test_scaled)

In [None]:
cm = confusion_matrix(y_test, grid_predictions)

In [None]:
sns.heatmap(cm, annot=True)

In [None]:
print(classification_report(y_test,grid_predictions))