In [9]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline

cancer = load_breast_cancer()

X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=42)

In [10]:
from sklearn.svm import SVC

In [11]:
min_train = X_train.min(axis=0)
range_train = (X_train - min_train).max(axis=0)

X_train_scaled = (X_train - min_train)/range_train

print('min per feat\n{}'.format(X_train_scaled.min(axis=0)))
print('max per feat\n{}'.format(X_train_scaled.max(axis=0)))



min per feat
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
max per feat
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]


In [12]:
X_test_scaled = (X_test - min_train)/range_train

svm = SVC()
svm.fit(X_train_scaled, y_train)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [13]:

print(svm.score(X_train_scaled, y_train))
print(svm.score(X_test_scaled, y_test))

0.9460093896713615
0.965034965034965


In [14]:
svm = SVC(C=1000)
svm.fit(X_train_scaled, y_train)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [15]:
print(svm.score(X_train_scaled, y_train))
print(svm.score(X_test_scaled, y_test))

0.9906103286384976
0.972027972027972


In [16]:
print(svm.decision_function(X_test_scaled)[:20])

[  0.97380141  -7.8215926   -4.77166518   5.31255452   6.30248201
 -16.34512538 -14.07697154  -2.23822132  -0.04151188   4.0183653
   1.53448979  -2.66231067   3.39097304  -0.80308508   3.30333368
  -4.63236663   3.73730245   7.74889342   7.76058111 -10.03690057]


In [17]:
print(svm.decision_function(X_test_scaled)[:20] > 0)

[ True False False  True  True False False False False  True  True False
  True False  True False  True  True  True False]


In [18]:
svm = SVC(C=1000, probability=True)
svm.fit(X_train_scaled, y_train)

print(svm.score(X_train_scaled, y_train))
print(svm.score(X_test_scaled, y_test))

0.9906103286384976
0.972027972027972


In [19]:
print(svm.predict_proba(X_test_scaled[:20]))

[[2.10011009e-01 7.89988991e-01]
 [9.99588033e-01 4.11967185e-04]
 [9.90272428e-01 9.72757215e-03]
 [2.94926982e-03 9.97050730e-01]
 [3.37346970e-06 9.99996627e-01]
 [9.99999900e-01 1.00000010e-07]
 [9.99999372e-01 6.28270957e-07]
 [8.81382702e-01 1.18617298e-01]
 [4.32230646e-01 5.67769354e-01]
 [1.11898214e-02 9.88810179e-01]
 [1.29278754e-01 8.70721246e-01]
 [9.20216843e-01 7.97831569e-02]
 [2.12226399e-02 9.78777360e-01]
 [6.25263149e-01 3.74736851e-01]
 [2.31933212e-02 9.76806679e-01]
 [9.88762692e-01 1.12373077e-02]
 [1.49179399e-02 9.85082060e-01]
 [1.67927563e-07 9.99999832e-01]
 [1.63906104e-07 9.99999836e-01]
 [9.99958568e-01 4.14322522e-05]]


In [22]:
svm.predict(X_test_scaled)

array([1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1])