In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "MNISTTraining"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)

In [None]:
X, y = mnist["data"], mnist["target"] # X, y: pandas DataFrame
X, y = X.to_numpy(), y.to_numpy() # X, y: numpy array
y = y.astype(np.uint8)

In [None]:
from sklearn.model_selection import train_test_split
X_train_, X_test, y_train_, y_test = train_test_split(X, y , test_size=0.2, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_, y_train_, test_size=0.25, random_state=42)
#train : vaild : test = 6 : 2 : 2 

In [None]:
from sklearn.svm import LinearSVC
lin_clf = LinearSVC(max_iter = 10000, random_state = 42)
lin_clf.fit(X_train, y_train)

In [None]:
lin_clf.score(X_valid, y_valid)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_valid_scaled = scaler.fit_transform(X_valid.astype(np.float32))

lin_clf.fit(X_train_scaled, y_train)
lin_clf.score(X_valid_scaled, y_valid)

In [None]:
from sklearn.svm import SVC
svm_clf = SVC(decision_function_shape = "ovr", gamma = "auto")
svm_clf.fit(X_train_scaled, y_train)

In [None]:
svm_clf.score(X_valid_scaled, y_valid)

In [None]:
from sklearn.model_selection import GridSearchCV
import math
import time

svm_clf = SVC(gamma = 'auto')
param_grid = [
    # try 12 (3×4) combinations of hyperparameters
    {'C':[1,10,100,1000], 'kernel':['linear']},
    {'C':[1,10,100,1000], 'kernel':['rbf'], 'gamma':[0.5, 0.1, 0.01, 0.001]}
  ]
grid_search = GridSearchCV(svm_clf, param_grid,
                           n_jobs = -1,
                           scoring='neg_mean_squared_error',
                           return_train_score=True)


In [None]:
start = time.time()
grid_search.fit(X_train_scaled, y_train)
end = time.time()

print(f"Grid Search 학습 시간: {end - start:.3f} sec")

In [None]:
grid_search.best_estimator_

In [None]:
svm_clf_1 = SVC(C = , kernel = '')

In [None]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import train_test_split
import math
import time

def plot_learning_curves(model, X_t, X_v, y_t, y_v):
    X_train, X_valid, y_train, y_valid = X_t, X_v, y_t, y_v
    train_errors, valid_errors = [], []
    for m in range (6000, 42001, 6000):
        model.fit(X_train[:m], y_train[:m])
        y_train_predict = model.predict(X_train[:m])
        y_valid_predict = model.predict(X_valid)
        train_errors.append(mse(y_train[:m], y_train_predict))
        valid_errors.append(mse(y_valid, y_valid_predict))
        
    plt.plot(np.sqrt(train_errors), "r-+", linewidth = 2, label = "train_set")
    plt.plot(np.sqrt(valid_errors), "b-", linewidth = 3, label = "valid_set")
    plt.legend(loc="upper right", fontsize = 14)
    plt.xlabel("train_set_size", fontsize = 14)
    plt.ylabel("RMSE", fontsize = 14)

In [None]:
start = time.time()
plot_learning_curves(lin_clf, X_train, X_valid, y_train, y_valid)
end = time.time()

print(f"LinearSVC Classifier학습 시간: {end - start:.3f} sec")

plt.show()