In [8]:
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12 
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "training_linear_models"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [9]:
print("\n\nExercise: train an SVM classifier on the MNIST dataset. \n"
      "Since SVM classifiers are binary classifiers, you will need to use \n"
      "one-versus-all to classify all 10 digits. You may want to tune \n" 
      "the hyperparameters using small validation sets to speed up the process. \n" 
      "What accuracy can you reach?\n")
print("\nFirst, let's load the dataset and split it into a training set and \n" 
      "a test set. We could use train_test_split() but people usually just take \n" 
      "the first 60,000 instances for the training set, and the last 10,000 instances \n"
      "for the test set (this makes it possible to compare your model's performance with others):\n")



Exercise: train an SVM classifier on the MNIST dataset. 
Since SVM classifiers are binary classifiers, you will need to use 
one-versus-all to classify all 10 digits. You may want to tune 
the hyperparameters using small validation sets to speed up the process. 
What accuracy can you reach?


First, let's load the dataset and split it into a training set and 
a test set. We could use train_test_split() but people usually just take 
the first 60,000 instances for the training set, and the last 10,000 instances 
for the test set (this makes it possible to compare your model's performance with others):



In [10]:
from sklearn.svm import SVC, LinearSVC 
from sklearn.preprocessing import StandardScaler

from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784',version=1)

X = mnist['data']
y = mnist['target']

X_train = X[:60000]
y_train = y[:60000]
X_test = X[60000:]
y_test=y[60000:]

lin_clf = LinearSVC(random_state=42) 
print("\nlin_clf.fit(X_train, y_train): \n", lin_clf.fit(X_train, y_train))
#2017250045 정태환


lin_clf.fit(X_train, y_train): 
 LinearSVC(random_state=42)




In [11]:
from sklearn.metrics import accuracy_score

y_pred = lin_clf.predict(X_train)
accuracy_score(y_train,y_pred)
#2017250045 정태환

0.8348666666666666

In [12]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled = scaler.fit_transform(X_test.astype(np.float32))
#2017250045 정태환
lin_clf = LinearSVC(random_state=42)
print("\nlin_clf.fit(X_train_scaled, y_train): \n", lin_clf.fit(X_train_scaled, y_train))


lin_clf.fit(X_train_scaled, y_train): 
 LinearSVC(random_state=42)




In [13]:
y_pred = lin_clf.predict(X_train_scaled) 
print("\naccuracy_score(y_train, y_pred): \n", accuracy_score(y_train, y_pred))
#2017250045 정태환


accuracy_score(y_train, y_pred): 
 0.9214


In [14]:
svm_clf = SVC(gamma="scale")
#2017250045 정태환
print("\nsvm_clf.fit(X_train_scaled[:10000], y_train[:10000]): \n", svm_clf.fit(X_train_scaled[:10000], y_train[:10000]))


svm_clf.fit(X_train_scaled[:10000], y_train[:10000]): 
 SVC()


In [15]:
y_pred = svm_clf.predict(X_train_scaled)
print("\naccuracy_score(y_train, y_pred): \n", accuracy_score(y_train, y_pred))
#2017250045 정태환


accuracy_score(y_train, y_pred): 
 0.9455333333333333


In [16]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_dist = {'gamma':reciprocal(0.001,0.1),"C":uniform(1,10)}

rnd_search_cv =  RandomizedSearchCV(svm_clf,param_dist,cv=3,n_iter=10,verbose=2,n_jobs=-1)
rnd_search_cv.fit(X_train_scaled[:1000],y_train[:1000])
#2017250045 정태환
print("\nrnd_search_cv.best_estimator_: \n", rnd_search_cv.best_estimator_)
print("\nrnd_search_cv.best_score_: \n", rnd_search_cv.best_score_)

Fitting 3 folds for each of 10 candidates, totalling 30 fits

rnd_search_cv.best_estimator_: 
 SVC(C=7.116531604882809, gamma=0.0010330768043240405)

rnd_search_cv.best_score_: 
 0.8639897382412353


In [17]:
rnd_search_cv.best_estimator_.fit(X_train_scaled, y_train)

y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled) 
#2017250045 정태환


In [18]:
accuracy_score(y_train, y_pred)
#2017250045 정태환


0.9972833333333333

In [20]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled) 
accuracy_score(y_test, y_pred)
#2017250045 정태환

0.9718