In [1]:
from sklearn import svm
from sklearn import datasets
iris = datasets.load_iris()
digits = datasets.load_digits()

In [2]:
print(digits.data)  

[[ 0.  0.  5. ...  0.  0.  0.]
 [ 0.  0.  0. ... 10.  0.  0.]
 [ 0.  0.  0. ... 16.  9.  0.]
 ...
 [ 0.  0.  1. ...  6.  0.  0.]
 [ 0.  0.  2. ... 12.  0.  0.]
 [ 0.  0. 10. ... 12.  1.  0.]]


In [3]:
digits.target

array([0, 1, 2, ..., 8, 9, 8])

In [4]:
digits.images[0]

array([[ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.],
       [ 0.,  0., 13., 15., 10., 15.,  5.,  0.],
       [ 0.,  3., 15.,  2.,  0., 11.,  8.,  0.],
       [ 0.,  4., 12.,  0.,  0.,  8.,  8.,  0.],
       [ 0.,  5.,  8.,  0.,  0.,  9.,  8.,  0.],
       [ 0.,  4., 11.,  0.,  1., 12.,  7.,  0.],
       [ 0.,  2., 14.,  5., 10., 12.,  0.,  0.],
       [ 0.,  0.,  6., 13., 10.,  0.,  0.,  0.]])

## Support vector machineによる分類

In [5]:
# C: 正則化パラメータ
clf = svm.SVC(gamma=0.001, C=100.)
clf

SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [6]:
# 5件の学習では正しく判定できない
clf.fit(digits.data[0:5], digits.target[0:5])  
clf.predict(digits.data[-1:])

array([3])

In [7]:
# 1796件の学習後は正しく判定できている
print(len(digits.target[:-1]))
clf.fit(digits.data[:-1], digits.target[:-1])  
print(clf.predict(digits.data[-1:]))
print(digits.target[-1:])

1796
[8]
[8]


In [8]:
from sklearn import svm
from sklearn import datasets
clf = svm.SVC()

In [9]:
# サポートベクターマシンであやめの分類
iris = datasets.load_iris()
X, y = iris.data, iris.target

In [10]:
training_X = X[0:120]
test_X = X[120:150]
training_Y = y[0:120]
test_Y = y[120:150]

In [11]:
clf.fit(training_X, training_Y)  

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [12]:
# 正解率を表示
print(str(len(list(filter(lambda x: x, clf.predict(test_X) == test_Y)))) + " / " + str(len(test_Y)))

25 / 30


In [13]:
import pickle
s = pickle.dumps(clf)
clf2 = pickle.loads(s)
clf2.predict(X[0:1])

array([0])

In [14]:
from sklearn.externals import joblib
# 学習したモデルの保存と読み込み
joblib.dump(clf, 'filename.pkl') 
clf2 = joblib.load('filename.pkl') 

## ロジスティック回帰

In [15]:
from sklearn.linear_model import LogisticRegression

# 訓練
lr = LogisticRegression()

In [16]:
iris = datasets.load_iris()
X, y = iris.data, iris.target
training_X = X[0:120]
test_X = X[120:150]
training_Y = y[0:120]
test_Y = y[120:150]

In [17]:
lr.fit(training_X, training_Y)  
print (lr.score(test_X, test_Y))
print(str(len(list(filter(lambda x: x, lr.predict(test_X) == test_Y)))) + " / " + str(len(test_Y)))

0.43333333333333335
13 / 30


In [18]:
import numpy as np
from sklearn import random_projection
rng = np.random.RandomState(0)
X = rng.rand(10, 2000)
X = np.array(X, dtype='float32')
print(X)
X.dtype

[[0.5488135  0.71518934 0.60276335 ... 0.4801078  0.64386404 0.5017731 ]
 [0.8115185  0.476084   0.523156   ... 0.83000296 0.9328062  0.30833843]
 [0.29264206 0.56651825 0.13741443 ... 0.6965229  0.4836966  0.33955073]
 ...
 [0.32059506 0.24986687 0.03107279 ... 0.3893891  0.93272    0.33276632]
 [0.39553738 0.8440175  0.15044175 ... 0.53561085 0.35458204 0.935781  ]
 [0.81402713 0.85133713 0.43113658 ... 0.44645575 0.3601266  0.6258866 ]]


dtype('float32')

In [19]:
transformer = random_projection.GaussianRandomProjection()
X_new = transformer.fit_transform(X)
print(X_new)
X_new.dtype

[[ 1.16145765  0.20289995 -1.01573501 ... -0.99681265 -0.59281611
   0.27108681]
 [ 1.56430995  0.02465674 -0.02700313 ... -0.36768306 -0.5135887
   0.46633748]
 [ 0.73888458  0.04702969 -0.68514762 ... -0.8507486  -0.47080709
   0.39555964]
 ...
 [ 0.4544726  -0.0686129  -0.46393039 ... -0.98473079  0.03769808
   0.50746332]
 [ 1.221337    0.32921055 -0.39171809 ... -0.47798973 -0.03858774
   0.52084865]
 [ 1.04146059  0.22751703 -0.67205412 ... -1.24410476 -0.5255735
  -0.14972287]]


dtype('float64')

In [20]:
from sklearn import datasets
from sklearn.svm import SVC
iris = datasets.load_iris()
clf = SVC()
clf.fit(iris.data, iris.target)  

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [21]:
print(iris.target_names)
print(iris.target)

['setosa' 'versicolor' 'virginica']
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [22]:
list(clf.predict(iris.data[:3]))

[0, 0, 0]

In [23]:
iris.target[:3]

array([0, 0, 0])

In [24]:
clf.fit(iris.data, iris.target_names[iris.target])  

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [25]:
list(clf.predict(iris.data[:3]))  

['setosa', 'setosa', 'setosa']

In [26]:
import numpy as np
from sklearn.svm import SVC
rng = np.random.RandomState(0)
X = rng.rand(100, 10)
y = rng.binomial(1, 0.5, 100)
X_test = rng.rand(5, 10)

In [27]:
clf = SVC()
clf.set_params(kernel='linear').fit(X, y)  

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [28]:
clf.predict(X_test)

array([1, 0, 1, 1, 0])

In [29]:
clf.set_params(kernel='rbf').fit(X, y)  

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [30]:
clf.predict(X_test)

array([0, 0, 0, 1, 0])

In [31]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelBinarizer

In [32]:
X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]
y = [0, 0, 1, 1, 2]

In [33]:
classif = OneVsRestClassifier(estimator=SVC(random_state=0))
classif.fit(X, y).predict(X)

array([0, 0, 1, 1, 2])

In [34]:
y = LabelBinarizer().fit_transform(y)
classif.fit(X, y).predict(X)

array([[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [35]:
from sklearn.preprocessing import MultiLabelBinarizer
y = [[0, 1], [0, 2], [1, 3], [0, 2, 3], [2, 4]]
y = MultiLabelBinarizer().fit_transform(y)
classif.fit(X, y).predict(X)

array([[1, 1, 0, 0, 0],
       [1, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 0, 1, 0, 0],
       [1, 0, 1, 0, 0]])