In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing,cross_validation
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import model_selection
from sklearn.metrics import classification_report,accuracy_score
from pandas.plotting import scatter_matrix

In [10]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"

names = ["id","clump_thickness","uniform_cell_size","uniform_cell_shape","marginal_adhesion","single_epithelial_size",
         "bare_nulcei","bland_chromatin","normal_nuclei","metosis","class"]
data = pd.read_csv(url,names=names)

In [11]:
data.head()

Unnamed: 0,id,clump_thickness,uniform_cell_size,uniform_cell_shape,marginal_adhesion,single_epithelial_size,bare_nulcei,bland_chromatin,normal_nuclei,metosis,class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2


In [12]:
data.shape

(699, 11)

In [13]:
data.replace('?',-99999,inplace=True)

In [14]:
X = np.array(data.drop(["class"],axis=1))
Y = np.array(data["class"])
x_train,x_test,y_train,y_test = cross_validation.train_test_split(X,Y,test_size=0.2)

In [15]:
seed = 8
scoring = "accuracy"
models = []
models.append(('KNN',KNeighborsClassifier(n_neighbors = 5)))
models.append(('SVM',SVC()))

In [16]:
results = []
names = []

for name,model in models:
    kfold = model_selection.KFold(n_splits=10,random_state=seed)
    cv_results = model_selection.cross_val_score(model,x_train,y_train,cv=kfold,scoring=scoring)
    results.append(cv_results)
    names.append(name)
    
    msg = "%s: %f  (%f)" % (name,cv_results.mean(),cv_results.std())
    print(msg)


KNN: 0.583377  (0.078305)
SVM: 0.654838  (0.054824)


In [17]:
for name,model in models:
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    
    print(name)
    print(accuracy_score(y_test,pred))
    print(classification_report(y_test,pred))
    

KNN
0.6928571428571428
             precision    recall  f1-score   support

          2       0.74      0.85      0.79        96
          4       0.52      0.34      0.41        44

avg / total       0.67      0.69      0.67       140

SVM
0.6928571428571428
             precision    recall  f1-score   support

          2       0.69      1.00      0.82        96
          4       1.00      0.02      0.04        44

avg / total       0.79      0.69      0.57       140

