### Learning and Predicting

In [2]:
from sklearn import datasets

In [6]:
digits = datasets.load_digits()

In [11]:
digits.data.shape

(1797, 64)

In [12]:
digits.target.shape

(1797,)

In [13]:
digits.images.shape

(1797, 8, 8)

In [15]:
from sklearn import svm

In [16]:
clf = svm.SVC(gamma=0.001, C=100.)
clf.fit(digits.data[:-1], digits.target[:-1])

SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [20]:
clf.predict(digits.data[-1:])

array([8])

### Model Persistence

In [22]:
iris = datasets.load_iris()

In [24]:
clf = svm.SVC()
clf.fit(iris.data, iris.target)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [29]:
clf.predict(iris.data[0:1])

array([0])

In [25]:
import pickle

In [30]:
temp = pickle.dumps(clf)
clf2 = pickle.loads(temp)
clf2.predict(iris.data[0:1])

array([0])

In [31]:
from sklearn.externals import joblib

In [32]:
joblib.dump(clf, 'temp.pkl')
clf3 = joblib.load('temp.pkl')
clf3.predict(iris.data[0:1])

array([0])

### Type Casting

In [45]:
clf1 = svm.SVC()
clf1.fit(iris.data[:-1], iris.target[:-1])

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [46]:
clf2 = svm.SVC()
clf2.fit(iris.data[:-1], iris.target_names[iris.target][:-1])

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [50]:
clf1.predict(iris.data[-1:])

array([2])

In [51]:
clf2.predict(iris.data[-1:])

array(['virginica'],
      dtype='<U10')

### Refitting and Updating Parameters

In [54]:
import numpy as np
from sklearn.svm import SVC

In [59]:
rng = np.random.RandomState(0)

In [65]:
X = rng.rand(100, 10)
X.shape

(100, 10)

In [69]:
y = rng.binomial(1, 0.5, 100)
y

array([1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 1, 1])

In [71]:
X_test = rng.rand(5, 10)
X_test.shape

(5, 10)

In [76]:
clf = SVC()

In [77]:
clf.set_params(kernel='linear').fit(X, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [78]:
clf.predict(X_test)

array([0, 1, 0, 0, 0])

In [79]:
clf.set_params(kernel='rbf').fit(X, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [80]:
clf.predict(X_test)

array([0, 0, 0, 0, 0])

### Multiclass vs. Multilabel Fitting

In [92]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer

In [93]:
X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]
y = [0, 0, 1, 1, 2]

In [94]:
classif = OneVsRestClassifier(estimator=SVC(random_state=0))
classif.fit(X, y).predict(X)

array([0, 0, 1, 1, 2])

In [95]:
y = LabelBinarizer().fit_transform(y)
y

array([[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 1]])

In [96]:
classif.fit(X, y).predict(X)

array([[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [97]:
y = [[0, 1], [0, 2], [1, 3], [0, 2, 3], [2, 4]]
y = MultiLabelBinarizer().fit_transform(y)
y

array([[1, 1, 0, 0, 0],
       [1, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 0, 1, 1, 0],
       [0, 0, 1, 0, 1]])