In [1]:

%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn; 
from scipy import stats
import pylab as pl
from sklearn.model_selection import train_test_split

seaborn.set()

In [2]:
df = pd.read_csv("wbc.csv")
df.head()

Unnamed: 0,clump_thickness,cell_size,cell_shape,marginal_adhesion,epitelial_cellsize,bare_nuclei,bland_chromatin,normal_nucleoli,mitoses,jenis
0,5,1,1,1,2,1,3,1,1,2
1,5,4,4,5,7,10,3,2,1,2
2,3,1,1,1,2,2,3,1,1,2
3,6,8,8,1,3,4,3,7,1,2
4,4,1,1,3,2,1,3,1,1,2


In [3]:
df = df.replace('?',np.nan)

In [4]:
df['bare_nuclei'] = pd.to_numeric(df.bare_nuclei.fillna(df.bare_nuclei.median()))

In [5]:
X = df.drop([df.columns[-1]], axis=1)
y = df.jenis

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.33, random_state=42)

In [7]:
from sklearn import svm

In [8]:
svc=svm.SVC(kernel='rbf')
svc.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [9]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [10]:
y_pred = svc.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred)
print (('Akurasi: %.4f') % (accuracy))
print (('Precision: %.4f') % (precision[1]))
print (('Recall: %.4f') % (recall[1]))
print (('F1: %.4f') % (f1[1]))

Akurasi: 0.9610
Precision: 0.8953
Recall: 1.0000
F1: 0.9448


In [11]:
from sklearn import metrics

In [12]:
metrics.confusion_matrix(y_test, y_pred)

array([[145,   9],
       [  0,  77]])

In [13]:
svc.score(X_train,y_train), svc.score(X_test, y_test)

(0.99786324786324787, 0.96103896103896103)

In [14]:
from sklearn.model_selection import GridSearchCV
from __future__ import print_function
from sklearn.metrics import classification_report
from sklearn.svm import SVC

In [15]:
svc2=svm.SVC(C=4.0, gamma=0.001, kernel='rbf')
svc2.fit(X_train,y_train)

SVC(C=4.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [16]:
y_pred = svc2.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred)
print (('Akurasi: %.4f') % (accuracy))
print (('Precision: %.4f') % (precision[1]))
print (('Recall: %.4f') % (recall[1]))
print (('F1: %.4f') % (f1[1]))

Akurasi: 0.9654
Precision: 0.9726
Recall: 0.9221
F1: 0.9467


In [21]:
from sklearn.model_selection import RandomizedSearchCV

In [17]:
svc3=svm.SVC(C=54, cache_size=2000, class_weight=None, coef0=7.800000000000001,
  decision_function_shape=None, degree=27, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.004, verbose=False)

In [18]:
svc3.fit(X_train,y_train)

SVC(C=54, cache_size=2000, class_weight=None, coef0=7.800000000000001,
  decision_function_shape=None, degree=27, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.004, verbose=False)

In [19]:
y_pred = svc3.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred)
print (('Akurasi: %.4f') % (accuracy))
print (('Precision: %.4f') % (precision[1]))
print (('Recall: %.4f') % (recall[1]))
print (('F1: %.4f') % (f1[1]))

Akurasi: 0.9740
Precision: 0.9383
Recall: 0.9870
F1: 0.9620


# svc3 memiliki akurasi yang lebih tinggi dibanding svc2 namun precision yang lebih rendah

In [22]:
import scipy
svc_clf = SVC()
svc_search_params = {'C': scipy.stats.expon(scale=1), 
                     'gamma': scipy.stats.expon(scale=.1),
                     'kernel': ['rbf'],
                     'class_weight':['balanced',None]}
svc_search = RandomizedSearchCV(svc_clf, 
                                param_distributions=svc_search_params, 
                                n_iter=25)

In [23]:
svc_search.fit(X,y)

RandomizedSearchCV(cv=None, error_score='raise',
          estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          fit_params={}, iid=True, n_iter=25, n_jobs=1,
          param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000000B7B95C0>, 'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000000B7B9748>, 'kernel': ['rbf'], 'class_weight': ['balanced', None]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring=None, verbose=0)

In [24]:
svc_search.grid_scores_



[mean: 0.94850, std: 0.02626, params: {'C': 0.23189909969824543, 'class_weight': 'balanced', 'gamma': 0.067341399394551513, 'kernel': 'rbf'},
 mean: 0.94850, std: 0.02848, params: {'C': 0.28478231363679612, 'class_weight': None, 'gamma': 0.064452583961916013, 'kernel': 'rbf'},
 mean: 0.95994, std: 0.01916, params: {'C': 0.13274648369797196, 'class_weight': None, 'gamma': 0.034355888355556932, 'kernel': 'rbf'},
 mean: 0.94707, std: 0.02611, params: {'C': 0.077700250915674715, 'class_weight': None, 'gamma': 0.05053731573074053, 'kernel': 'rbf'},
 mean: 0.95565, std: 0.02815, params: {'C': 1.2910788972793397, 'class_weight': 'balanced', 'gamma': 0.087501154474002696, 'kernel': 'rbf'},
 mean: 0.93848, std: 0.02901, params: {'C': 1.1870818727273071, 'class_weight': None, 'gamma': 0.23690002410294225, 'kernel': 'rbf'},
 mean: 0.94564, std: 0.02791, params: {'C': 0.5070519313444829, 'class_weight': None, 'gamma': 0.11869272967609307, 'kernel': 'rbf'},
 mean: 0.94850, std: 0.02848, params: {'C

In [25]:
svc_search.best_estimator_

SVC(C=0.1284694532908186, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.0044965584770206334,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [26]:
svc_search.best_score_

0.96852646638054363

In [27]:
svc6=SVC(C=0.39331564608003322, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.0035134196499454111,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)


In [29]:
svc6.fit(X_train,y_train)

SVC(C=0.3933156460800332, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.003513419649945411,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [32]:
y_pred1 = svc6.predict(X_test)

In [37]:
y_pred1 = svc6.predict(X_test)
accuracy = accuracy_score(y_test, y_pred1)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred1)
print (('Akurasi: %.4f') % (accuracy))
print (('Precision: %.4f') % (precision[1]))
print (('Recall: %.4f') % (recall[1]))
print (('F1: %.4f') % (f1[1]))

Akurasi: 0.9697
Precision: 0.9487
Recall: 0.9610
F1: 0.9548


In [38]:
metrics.confusion_matrix(y_test, y_pred1)

array([[150,   4],
       [  3,  74]])

# svc6 memiliki akurasi precision recall dan f1 di atas 94% 


In [39]:
svc4=svm.SVC(C=0.091149875631783353, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.012540745017286704,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
svc4.fit(X_train,y_train)

SVC(C=0.09114987563178335, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.012540745017286704,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [40]:
y_pred2 = svc4.predict(X_test)
accuracy = accuracy_score(y_test, y_pred2)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred2)
print (('Akurasi: %.4f') % (accuracy))
print (('Precision: %.4f') % (precision[1]))
print (('Recall: %.4f') % (recall[1]))
print (('F1: %.4f') % (f1[1]))

Akurasi: 0.9654
Precision: 0.9367
Recall: 0.9610
F1: 0.9487


In [43]:
metrics.confusion_matrix(y_test, y_pred2)

array([[149,   5],
       [  3,  74]])

In [48]:
print("Best accuracy obtained: {0}".format(clf3.best_score_))
print("Parameters:")
for key, value in clf3.best_params_.items():
    print("\t{}: {}".format(key, value))

Best accuracy obtained: 0.9668225156056771
Parameters:
	C: 20000000.0
	gamma: 2e-07
	kernel: rbf


In [65]:
print( clf3.best_estimator_ )
clf3.best_score_

SVC(C=20000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=2e-07, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


0.96682251560567711

In [41]:
svc5=SVC(C=20000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=2e-07, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
svc5.fit(X_train,y_train)

SVC(C=20000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=2e-07, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [42]:
y_pred3 = svc5.predict(X_test)
accuracy = accuracy_score(y_test, y_pred3)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred3)
print (('Akurasi: %.4f') % (accuracy))
print (('Precision: %.4f') % (precision[1]))
print (('Recall: %.4f') % (recall[1]))
print (('F1: %.4f') % (f1[1]))

Akurasi: 0.9567
Precision: 0.9718
Recall: 0.8961
F1: 0.9324


In [44]:
metrics.confusion_matrix(y_test, y_pred3)

array([[152,   2],
       [  8,  69]])

In [66]:
print( clf4.best_estimator_ )
clf4.best_score_

SVC(C=2000000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=2e-09, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


0.96499423559943109

In [45]:
svc6=SVC(C=2000000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=2e-09, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
svc6.fit(X_train,y_train)

SVC(C=2000000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=2e-09, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [46]:
y_pred4 = svc6.predict(X_test)
accuracy = accuracy_score(y_test, y_pred4)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred4)
print (('Akurasi: %.4f') % (accuracy))
print (('Precision: %.4f') % (precision[1]))
print (('Recall: %.4f') % (recall[1]))
print (('F1: %.4f') % (f1[1]))

Akurasi: 0.9567
Precision: 0.9589
Recall: 0.9091
F1: 0.9333


In [47]:
metrics.confusion_matrix(y_test, y_pred4)

array([[151,   3],
       [  7,  70]])

In [69]:
print( clf7.best_estimator_ )
clf7.best_score_

SVC(C=2000000000000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=2e-09, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


0.96267291327899129

In [48]:
svc7=SVC(C=2000000000000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=2e-09, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
svc7.fit(X_train,y_train)

SVC(C=2000000000000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=2e-09, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [49]:
y_pred5 = svc7.predict(X_test)
accuracy = accuracy_score(y_test, y_pred5)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred5)
print (('Akurasi: %.4f') % (accuracy))
print (('Precision: %.4f') % (precision[1]))
print (('Recall: %.4f') % (recall[1]))
print (('F1: %.4f') % (f1[1]))

Akurasi: 0.9567
Precision: 0.9589
Recall: 0.9091
F1: 0.9333


In [50]:
metrics.confusion_matrix(y_test, y_pred5)

array([[151,   3],
       [  7,  70]])