# Import Libraries

In [60]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# import dataset from seaborn

In [4]:
from sklearn import datasets


In [5]:
data = datasets.load_breast_cancer()

In [10]:
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

In [22]:
df.head(6)
df.shape

(569, 31)

In [12]:
df.isnull().sum().sum()

0

In [19]:
df_int=df.select_dtypes(include=["int64","float64"]).columns

In [20]:
df_int

Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension',
       'target'],
      dtype='object')

# Train Test Split Data

In [46]:
from sklearn.model_selection import train_test_split
X=df.drop("target",axis=1)
y=df['target']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=0)

# feature Scalling for svm and Logistic Regreesion

In [47]:
from sklearn.preprocessing import StandardScaler


In [48]:
stdsc=StandardScaler()
X_train_sc=stdsc.fit_transform(X_train)
X_test_sc=stdsc.transform(X_test)

# Logistic Regression

In [49]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(X_train_sc,y_train)
y_predict=lr.predict(X_test_sc)

In [50]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_predict)

0.9649122807017544

# SVM Support Vector Classification(SVC)

In [51]:
from sklearn.svm import SVC
svc=SVC()
svc.fit(X_train_sc,y_train)
svm_y_predict=svc.predict(X_test_sc)


In [52]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,svm_y_predict)

0.9824561403508771

# Hyper Parameter Tuning

In [53]:
max_kernel="linear"
max_c=0
max_acr=0
kernel=["linear","rbf","poly"]
for ker in kernel:
    for c in range(1,11):
        svc=SVC(kernel=ker,C=c)
        svc.fit(X_train_sc,y_train)
        svm_y_predict=svc.predict(X_test_sc)
        acc=accuracy_score(y_test,svm_y_predict)
        if acc > max_acr:
            max_acr=acc
            max_c=c
            max_kernel=ker

In [54]:
print("max kernel is: {} max c value is: {} max accuracy is: {}".format(max_kernel,max_c,max_acr))

max kernel is: rbf max c value is: 3 max accuracy is: 0.9912280701754386


# Decision Tree

In [56]:
from sklearn.tree import DecisionTreeClassifier
dtc=DecisionTreeClassifier()
dtc.fit(X_train,y_train)
y_pred=dtc.predict(X_test)
accuracy_score(y_test,y_pred)

0.9035087719298246

# Random Forest

In [58]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier()
rf.fit(X_train,y_train)
rf_pred=rf.predict(X_test)
accuracy_score(y_test,rf_pred)

0.9736842105263158

# ADABoost Classification

In [59]:
from sklearn.ensemble import AdaBoostClassifier
abc=AdaBoostClassifier()
abc.fit(X_train,y_train)
abc_pred=abc.predict(X_test)
accuracy_score(y_test,abc_pred)

0.956140350877193

# Naive Baye's Classifier

In [61]:
from sklearn.naive_bayes import GaussianNB
nb=GaussianNB()
nb.fit(X_train,y_train)
nb_pred=nb.predict(X_test)
accuracy_score(y_test,nb_pred)

0.9298245614035088

# Gradient Boosting

In [62]:
from sklearn.ensemble import GradientBoostingClassifier
abc=GradientBoostingClassifier()
abc.fit(X_train,y_train)
abc_pred=abc.predict(X_test)
accuracy_score(y_test,abc_pred)

0.9736842105263158

# XGBoost claasifier

In [63]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-1.3.3-py3-none-manylinux2010_x86_64.whl (157.5 MB)
[K     |████████████████████████████████| 157.5 MB 21 kB/s  eta 0:00:01    |██▎                             | 11.1 MB 1.9 MB/s eta 0:01:17     |███████▊                        | 37.9 MB 4.4 MB/s eta 0:00:28     |█████████▊                      | 48.0 MB 6.1 MB/s eta 0:00:18     |████████████▊                   | 62.4 MB 5.2 MB/s eta 0:00:19     |██████████████████▎             | 89.9 MB 4.8 MB/s eta 0:00:15     |███████████████████████         | 113.0 MB 2.5 MB/s eta 0:00:18     |███████████████████████▎        | 114.3 MB 1.9 MB/s eta 0:00:23     |██████████████████████████▎     | 129.4 MB 400 kB/s eta 0:01:11     |███████████████████████████     | 133.0 MB 2.7 MB/s eta 0:00:10     |██████████████████████████████  | 148.0 MB 298 kB/s eta 0:00:32     |███████████████████████████████▋| 155.8 MB 1.5 MB/s eta 0:00:02
Installing collected packages: xgboost
Successfully installed xgboost-1.3.3


In [64]:
from xgboost import XGBClassifier
xg=XGBClassifier()
xg.fit(X_train,y_train)
xg_pred=xg.predict(X_test)
accuracy_score(y_test,xg_pred)





0.9824561403508771