In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "MNISTTraining"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1)
mnist.target = mnist.target.astype(np.uint8)

In [13]:
from sklearn.model_selection import train_test_split
X_train_, X_test, y_train_, y_test = train_test_split(mnist["data"], mnist["target"],test_size=0.2, random_state=42)
X_train,X_valid,y_train,y_valid=train_test_split(X_train_, y_train_,test_size=0.25, random_state=42)
print(len(X_train),len(y_valid),len(y_test))

42000 14000 14000


In [18]:
softmax_reg = LogisticRegression(multi_class="multinomial", C=10, random_state=42, max_iter=1000)
softmax_reg.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(C=10, max_iter=1000, multi_class='multinomial',
                   random_state=42)

In [5]:
softmax_reg.score(X_valid, y_valid)

0.9173571428571429

In [None]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import train_test_split
import math
import time

def plot_learning_curves(model, X_t, X_v, y_t, y_v):
    X_train, X_valid, y_train, y_valid = X_t, X_v, y_t, y_v
    train_errors, valid_errors = [], []
    for m in range (6000, 42001, 6000):
        model.fit(X_train[:m], y_train[:m])
        y_train_predict = model.predict(X_train[:m])
        y_valid_predict = model.predict(X_valid)
        train_errors.append(mse(y_train[:m], y_train_predict))
        valid_errors.append(mse(y_valid, y_valid_predict))
        
    plt.plot(np.sqrt(train_errors), "r-+", linewidth = 2, label = "train_set")
    plt.plot(np.sqrt(valid_errors), "b-", linewidth = 3, label = "valid_set")
    plt.legend(loc="lower right", fontsize = 14)
    plt.xlabel("train_set_size", fontsize = 14)
    plt.ylabel("RMSE", fontsize = 14)
    


In [None]:
start = time.time()
plot_learning_curves(softmax_reg, X_train, X_valid, y_train, y_valid)
end = time.time()

print(f"Softmax 학습 시간: {end - start:.3f} sec")

plt.show()

In [23]:
from sklearn.model_selection import GridSearchCV
import math
import time

softmax_reg = LogisticRegression(multi_class='multinomial', solver='lbfgs', C = 10, random_state=42)
param_grid = [
    # try 12 (3×4) combinations of hyperparameters
    {'C':[0.001,0.01,0.1,0,1,10,100,1000]},
    {'max_iter':[100,200,300,400,500,600]}
  ]
grid_search = GridSearchCV(softmax_reg, param_grid,
                           n_jobs = 4,
                           scoring='neg_mean_squared_error',
                           return_train_score=True)


In [None]:
start = time.time()
grid_search.fit(X_train, y_train)
end = time.time()

print(f"Grid Search 학습 시간: {end - start:.3f} sec")

plt.show()

In [None]:
grid_search.best_estimator_

In [22]:
grid_search.best_params_

{'C': 0.001}

In [10]:
softmax_reg_1 = LogisticRegression(multi_class="multinomial", C=0.001, random_state=42)
softmax_reg_1.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(C=0.001, multi_class='multinomial', random_state=42)

In [11]:
softmax_reg_1.score(X_valid, y_valid)

0.9165714285714286

In [None]:
start = time.time()
plot_learning_curves(softmax_reg_1, X_train, X_valid, y_train, y_valid)
end = time.time()

print(f"Grid Search를 이용한 Hyperparameter를 적용한 Softmax 학습 시간: {end - start:.3f} sec")

plt.show()