# Learning and predicting

In [1]:
from sklearn import datasets

In [3]:
iris = datasets.load_iris()
digits = datasets.load_digits()

In [4]:
print(digits.data)

[[  0.   0.   5. ...,   0.   0.   0.]
 [  0.   0.   0. ...,  10.   0.   0.]
 [  0.   0.   0. ...,  16.   9.   0.]
 ..., 
 [  0.   0.   1. ...,   6.   0.   0.]
 [  0.   0.   2. ...,  12.   0.   0.]
 [  0.   0.  10. ...,  12.   1.   0.]]


In [5]:
digits.target

array([0, 1, 2, ..., 8, 9, 8])

In [6]:
digits.images[0]

array([[  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.],
       [  0.,   0.,  13.,  15.,  10.,  15.,   5.,   0.],
       [  0.,   3.,  15.,   2.,   0.,  11.,   8.,   0.],
       [  0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.],
       [  0.,   5.,   8.,   0.,   0.,   9.,   8.,   0.],
       [  0.,   4.,  11.,   0.,   1.,  12.,   7.,   0.],
       [  0.,   2.,  14.,   5.,  10.,  12.,   0.,   0.],
       [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])

In [7]:
from sklearn import svm
clf = svm.SVC(gamma=0.001, C=100.)

In [8]:
clf.fit(digits.data[:-1], digits.target[:-1])

SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [9]:
clf.predict(digits.data[-1:])

array([8])

In [10]:
digits.data[-1:]

array([[  0.,   0.,  10.,  14.,   8.,   1.,   0.,   0.,   0.,   2.,  16.,
         14.,   6.,   1.,   0.,   0.,   0.,   0.,  15.,  15.,   8.,  15.,
          0.,   0.,   0.,   0.,   5.,  16.,  16.,  10.,   0.,   0.,   0.,
          0.,  12.,  15.,  15.,  12.,   0.,   0.,   0.,   4.,  16.,   6.,
          4.,  16.,   6.,   0.,   0.,   8.,  16.,  10.,   8.,  16.,   8.,
          0.,   0.,   1.,   8.,  12.,  14.,  12.,   1.,   0.]])

# Model persistence

In [12]:
from sklearn import svm
from sklearn import datasets

clf = svm.SVC()
iris = datasets.load_iris()
X, y = iris.data, iris.target
clf.fit(X,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [13]:
import pickle
s = pickle.dumps(clf)
clf2 = pickle.loads(s)
clf2.predict(X[0:1])

array([0])

In [14]:
y[0]

0

# Type Casting

In [15]:
import numpy as np
from sklearn import random_projection

In [16]:
rng = np.random.RandomState(0)
X = rng.rand(10,2000)
X = np.array(X, dtype='float32')
X.dtype

dtype('float32')

In [17]:
transformer = random_projection.GaussianRandomProjection()
X_new = transformer.fit_transform(X)
X_new.dtype

dtype('float64')

In [18]:
from sklearn import datasets
from sklearn.svm import SVC
iris = datasets.load_iris()
clf = SVC()
clf.fit(iris.data, iris.target)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [19]:
list(clf.predict(iris.data[:3]))

[0, 0, 0]

In [20]:
clf.fit(iris.data, iris.target_names[iris.target])

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [21]:
list(clf.predict(iris.data[:3]))

['setosa', 'setosa', 'setosa']

# Refitting and updating parameters

In [22]:
import numpy as np
from sklearn.svm import SVC

In [25]:
rng = np.random.RandomState(0)
X = rng.rand(100,10)
y = rng.binomial(1,0.5,100)
X_test = rng.rand(5,10)

In [26]:
clf = SVC()
clf.set_params(kernel='linear').fit(X,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [27]:
clf.predict(X_test)

array([1, 0, 1, 1, 0])

In [28]:
clf.set_params(kernel='rbf').fit(X,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [29]:
clf.predict(X_test)

array([0, 0, 0, 1, 0])

## Statistical learning: the setting and estimator object in scikit-learn


In [35]:
from sklearn import datasets
iris = datasets.load_iris()
data = iris.data
data.shape

ImportError: cannot import name estimator

In [36]:
import numpy as np
from sklearn import datasets
iris= datasets.load_iris()
iris_X = iris.data
iris_y = iris.target
np.unique(iris_y)

array([0, 1, 2])

In [37]:
np.random.seed(0)
indices = np.random.permutation(len(iris_X))
iris_X_train = iris_X[indices[:-10]]
iris_y_train = iris_y[indices[:-10]]
iris_X_test = iris_X[indices[-10:]]
iris_y_test = iris_y[indices[-10:]]

In [39]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(iris_X_train, iris_y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [None]:
k