In [1]:
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12 
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "training_linear_models"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [3]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
X = housing["data"]
y = housing["target"]

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

from sklearn.svm import LinearSVR

lin_svr = LinearSVR(random_state=42)
lin_svr.fit(X_train_scaled, y_train)
#2017250045 정태환



LinearSVR(random_state=42)

In [4]:
from sklearn.metrics import mean_squared_error

y_pred = lin_svr.predict(X_train_scaled)
mse = mean_squared_error(y_train, y_pred)
mse
#2017250045 정태환

0.9641780189948642

In [5]:
np.sqrt(mse)
#2017250045 정태환

0.9819256687727764

In [6]:
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(SVR(), param_distributions, n_iter=10, verbose=2, cv=3, random_state=42)
rnd_search_cv.fit(X_train_scaled, y_train)
#2017250045 정태환

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=  12.8s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=  12.4s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=  13.6s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=  14.0s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=  12.8s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=  13.0s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=  13.0s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=  12.3s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=  12.5s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=  12.9s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=  12.2s
[CV] END ....C=1.5808361216819946, gamma=0.05399

RandomizedSearchCV(cv=3, estimator=SVR(),
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000021D6B181FD0>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000021D6B298820>},
                   random_state=42, verbose=2)

In [7]:
rnd_search_cv.best_estimator_
#2017250045 정태환

SVR(C=4.745401188473625, gamma=0.07969454818643928)

In [8]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
mse = mean_squared_error(y_train, y_pred)
np.sqrt(mse)
#2017250045 정태환

0.5727524770785369

In [9]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
np.sqrt(mse)
#2017250045 정태환

0.5929168385528745