In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix

In [None]:
data = pd.read_csv('../input/breast-cancer-wisconsin-data/data.csv')

In [None]:
data.head()

In [None]:
data.drop(['Unnamed: 32','id'], axis = 1,inplace=True)

In [None]:
data.columns

In [None]:
data.info()

In [None]:
data.diagnosis.value_counts()

In [None]:
data.diagnosis.value_counts().plot(kind='pie',shadow=True,colors=('darkgreen','orange'),autopct='%.2f',figsize=(8,6))
plt.title('Diagnosis')
plt.show()

In [None]:
a = {'M':1,'B':0}
data['diagnosis'] = data['diagnosis'].replace(a)

In [None]:
data['diagnosis'] = pd.get_dummies(data['diagnosis'])

In [None]:
data['diagnosis']

In [None]:
plt.style.use('fivethirtyeight')
sns.set_style("white")
sns.pairplot(data[[data.columns[0], data.columns[1],data.columns[2],data.columns[3],
                     data.columns[4], data.columns[5]]], hue = 'diagnosis' , size=3)

In [None]:
X = data.drop(['diagnosis'],axis=1,inplace=False)
y = data.diagnosis

In [None]:
f,ax = plt.subplots(figsize=(20,15))
sns.heatmap(X.corr(),linewidths=2,cmap="YlGnBu", annot=True)
plt.show()

In [None]:
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44, shuffle =True)

In [None]:
LogisticRegressionModel = LogisticRegression(penalty='l2',solver='sag',C=1.0,random_state=33)
lr_pred = LogisticRegressionModel.fit(X_train,y_train).predict(X_test)

SVCModel = SVC(kernel= 'rbf', max_iter=100,C=1.0,gamma='auto')
svm_pred=SVCModel.fit(X_train,y_train).predict(X_test)

GaussianNBModel = GaussianNB()
gnb_pred = GaussianNBModel.fit(X_train, y_train).predict(X_test)

DecisionTreeClassifierModel = DecisionTreeClassifier(criterion='entropy',max_depth=3,random_state=33) 
dt_pred = DecisionTreeClassifierModel.fit(X_train, y_train).predict(X_test)

RandomForestClassifierModel = RandomForestClassifier(criterion = 'gini',n_estimators=100,max_depth=2,random_state=33)
rf_pred = RandomForestClassifierModel.fit(X, y).predict(X_test)

SGDClassifierModel = SGDClassifier(penalty='l2',learning_rate='optimal',random_state=33)
SGD_pred = SGDClassifierModel.fit(X_train, y_train).predict(X_test)

GBCModel = GradientBoostingClassifier(n_estimators=100,max_depth=3,random_state=33) 
GBC_pred = GBCModel.fit(X_train, y_train).predict(X_test)

In [None]:
models=['Logistic Regression','SVM','GaussianNB','DecisionTree Classifier','RandomForest Classifier','SGD Classifier','GBCModel']
preds=[lr_pred,svm_pred,gnb_pred,dt_pred,rf_pred,SGD_pred,GBC_pred]
acc=[]
for i in preds:
    accscore=accuracy_score(i,y_test).round(2)
    acc.append(accscore)



In [None]:
data=zip(models,acc)
result=pd.DataFrame(data,columns=['Model','Accuracy']).sort_values(["Accuracy"], ascending = False)

In [None]:
result

In [None]:
plt.figure(figsize=(15,7))
sns.barplot(x = "Model", y = "Accuracy", data = result)
plt.show()

In [None]:
cm =confusion_matrix(y_test,GBC_pred)
print(cm)

In [None]:
plt.figure(figsize=(5,5))
ax = sns.heatmap(cm, annot=True, fmt="d")
plt.show()