In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score
from sklearn import datasets

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB

In [0]:
breast_cancer = datasets.load_breast_cancer()

In [0]:
x_breast_cancer = breast_cancer.data
y_breast_cancer = breast_cancer.target

In [4]:
breast_cancer_features = pd.DataFrame(x_breast_cancer, columns=breast_cancer.feature_names)
breast_cancer_features.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [0]:
def numeric_to_categoric(data, new_data):
  for target in data:
    if target == 0 :
      target = "Malignant"
      new_data.append(target)
    if target == 1 :
      target = "Benign"
      new_data.append(target)

In [6]:
new_target = []

numeric_to_categoric(y_breast_cancer, new_target)
breast_cancer_target = pd.DataFrame(new_target, columns=['Target'])
breast_cancer_target.head()

Unnamed: 0,Target
0,Malignant
1,Malignant
2,Malignant
3,Malignant
4,Malignant


In [7]:
complete_breast_cancer_data = pd.concat([breast_cancer_features, breast_cancer_target], axis=1)
complete_breast_cancer_data.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,Malignant
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,Malignant
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,Malignant
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,Malignant
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,Malignant


In [0]:
x_train, x_test, y_train, y_test = train_test_split(x_breast_cancer, y_breast_cancer, random_state=1, test_size=0.3)

In [0]:
svm_model = SVC(gamma='scale')
kneighbors_model = KNeighborsClassifier()
decision_tree_model = DecisionTreeClassifier()
random_forest_model = RandomForestClassifier(n_estimators=100) 
adaboost_model = AdaBoostClassifier()
naive_bayes_model = GaussianNB()

In [0]:
svm = svm_model.fit(x_train, y_train)
decision_tree = decision_tree_model.fit(x_train, y_train)
kneighbors = kneighbors_model.fit(x_train, y_train)
random_forest = random_forest_model.fit(x_train, y_train)
adaboost = adaboost_model.fit(x_train, y_train)
naive_bayes = naive_bayes_model.fit(x_train, y_train)

In [0]:
svm_predict = svm.predict(x_test)
decision_tree_predict = decision_tree.predict(x_test)
kneighbors_predict = kneighbors.predict(x_test)
random_forest_predict = random_forest.predict(x_test)
adaboost_predict = adaboost.predict(x_test)
naive_bayes_predict = naive_bayes.predict(x_test)

In [14]:
prediction_table = pd.concat([
           pd.DataFrame(svm_predict, columns = ['SVM Prediction']), 
           pd.DataFrame(decision_tree_predict, columns = ['Decision Tree Prediction']), 
           pd.DataFrame(kneighbors_predict, columns = ['KNeighbors Prediction']),  
           pd.DataFrame(random_forest_predict, columns = ['Random Forest Prediction']), 
           pd.DataFrame(adaboost_predict, columns = ['Adaboost Prediction']),
           pd.DataFrame(naive_bayes_predict, columns=['Naive Bayes Prediction'])
           ],axis=1)

prediction_table.head()

Unnamed: 0,SVM Prediction,Decision Tree Prediction,KNeighbors Prediction,Random Forest Prediction,Adaboost Prediction,Naive Bayes Prediction
0,1,1,0,1,1,0
1,1,0,0,0,0,0
2,1,1,1,1,1,1
3,0,0,0,0,0,0
4,0,0,0,1,0,1


In [0]:
new_svm = []
new_decision_tree = []
new_kneighbors = []
new_random_forest = []
new_adaboost = []
new_naive_bayes = []
numeric_to_categoric(prediction_table["SVM Prediction"], new_svm)
numeric_to_categoric(prediction_table["Decision Tree Prediction"], new_decision_tree)
numeric_to_categoric(prediction_table["KNeighbors Prediction"], new_kneighbors)
numeric_to_categoric(prediction_table["Random Forest Prediction"], new_random_forest)
numeric_to_categoric(prediction_table["Adaboost Prediction"], new_adaboost)
numeric_to_categoric(prediction_table["Naive Bayes Prediction"], new_naive_bayes)

In [16]:
new_prediction_actual_table = prediction_table = pd.concat([
           pd.DataFrame(new_svm, columns = ['SVM Prediction']), 
           pd.DataFrame(new_decision_tree, columns = ['Decision Tree Prediction']), 
           pd.DataFrame(new_kneighbors, columns = ['KNeighbors Prediction']),  
           pd.DataFrame(new_random_forest, columns = ['Random Forest Prediction']), 
           pd.DataFrame(new_adaboost, columns = ['Adaboost Prediction']),
           pd.DataFrame(new_naive_bayes, columns= ['Naive Bayes Prediction']),
           pd.DataFrame(new_target, columns = ['Actual Target'])
           ],axis=1)

new_prediction_actual_table.head()

Unnamed: 0,SVM Prediction,Decision Tree Prediction,KNeighbors Prediction,Random Forest Prediction,Adaboost Prediction,Naive Bayes Prediction,Actual Target
0,Benign,Benign,Malignant,Benign,Benign,Malignant,Malignant
1,Benign,Malignant,Malignant,Malignant,Malignant,Malignant,Malignant
2,Benign,Benign,Benign,Benign,Benign,Benign,Malignant
3,Malignant,Malignant,Malignant,Malignant,Malignant,Malignant,Malignant
4,Malignant,Malignant,Malignant,Benign,Malignant,Benign,Malignant


In [0]:
def result_score(method, prediction):
  print("Confusion Matrix of {}".format(method))
  print(confusion_matrix(y_test, prediction))
  print("With Accuracy : {}".format(accuracy_score(y_test, prediction)))

In [18]:
result_score("SVM", svm_predict)

Confusion Matrix of SVM
[[ 51  12]
 [  2 106]]
With Accuracy : 0.9181286549707602


In [19]:
result_score("Decision Tree", decision_tree_predict)

Confusion Matrix of Decision Tree
[[ 55   8]
 [  1 107]]
With Accuracy : 0.9473684210526315


In [20]:
result_score("KNeigbors", kneighbors_predict)

Confusion Matrix of KNeigbors
[[ 56   7]
 [  5 103]]
With Accuracy : 0.9298245614035088


In [21]:
result_score("Random Forest", random_forest_predict)

Confusion Matrix of Random Forest
[[ 58   5]
 [  2 106]]
With Accuracy : 0.9590643274853801


In [22]:
result_score("Adaboost", adaboost_predict)

Confusion Matrix of Adaboost
[[ 58   5]
 [  5 103]]
With Accuracy : 0.9415204678362573


In [25]:
result_score("Naive Bayes", naive_bayes_predict)

Confusion Matrix of Naive Bayes
[[ 58   5]
 [  4 104]]
With Accuracy : 0.9473684210526315


In [0]:
score_table = [accuracy_score(y_test, svm_predict),
               accuracy_score(y_test, decision_tree_predict),
               accuracy_score(y_test, kneighbors_predict),
               accuracy_score(y_test, random_forest_predict),
               accuracy_score(y_test, adaboost_predict),
               accuracy_score(y_test, naive_bayes_predict)]
               
method = ['SVM', 'Decision Tree', 'KNeighbors', 'Random Forest', 'Adaboost', 'Naive Bayes']

In [30]:
pd.DataFrame(score_table, method, columns=["Accuracy"])

Unnamed: 0,Accuracy
SVM,0.918129
Decision Tree,0.947368
KNeighbors,0.929825
Random Forest,0.959064
Adaboost,0.94152
Naive Bayes,0.947368
