# Ridge 회귀 모델

출처

<img src="http://www.databaser.net/moniwiki/pds/Python_2dLinearRegression/lm_compare.png">

## data/library 불러오기

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
data=pd.read_csv("house_price.csv")
x=data[data.columns[1:5]]
y=data[['house_value']]

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
x_train,x_test,y_train,y_test= train_test_split(x,y,random_state=42)
scaler.fit(x_train)
x_scaled_train=scaler.transform(x_train)
x_scaled_test=scaler.transform(x_test)

In [2]:
from sklearn.linear_model import Ridge

## model 학습하기

In [6]:
model=Ridge()
model.fit(x_scaled_train,y_train)
pred_train=model.predict(x_scaled_train)
pred_test=model.predict(x_scaled_test)

## 결과 확인하기

In [10]:
model.score(x_scaled_train,y_train)

0.5455487773718164

In [12]:
model.score(x_scaled_test,y_test)

0.5626954941458684

## RMSE 확인하기

In [14]:
import numpy as np
from sklearn.metrics import mean_squared_error
MSE_train=mean_squared_error(y_train,pred_train)
MSE_test=mean_squared_error(y_test,pred_test)
print(np.sqrt(MSE_train))
print(np.sqrt(MSE_test))

64342.018619526265
63219.99395904853


## hyper parameter 최적화

### GridSearch

In [16]:
param_grid={"alpha":[1e-4,1e-3,1e-2,0.1,0.5,1.0,5,10.0]}

In [20]:
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(Ridge(),param_grid,cv=5)
grid_search.fit(x_scaled_train,y_train)

GridSearchCV(cv=5, estimator=Ridge(),
             param_grid={'alpha': [0.0001, 0.001, 0.01, 0.1, 0.5, 1.0, 5,
                                   10.0]})

#### GridSearch 속성 확인

In [25]:
for t in dir(grid_search):
    print(t)

__abstractmethods__
__class__
__delattr__
__dict__
__dir__
__doc__
__eq__
__format__
__ge__
__getattribute__
__getstate__
__gt__
__hash__
__init__
__init_subclass__
__le__
__lt__
__module__
__ne__
__new__
__reduce__
__reduce_ex__
__repr__
__setattr__
__setstate__
__sizeof__
__str__
__subclasshook__
__weakref__
_abc_impl
_check_is_fitted
_check_n_features
_estimator_type
_format_results
_get_param_names
_get_tags
_more_tags
_pairwise
_repr_html_
_repr_html_inner
_repr_mimebundle_
_required_parameters
_run_search
_validate_data
best_estimator_
best_index_
best_params_
best_score_
classes_
cv
cv_results_
decision_function
error_score
estimator
fit
get_params
iid
inverse_transform
multimetric_
n_features_in_
n_jobs
n_splits_
param_grid
pre_dispatch
predict
predict_log_proba
predict_proba
refit
refit_time_
return_train_score
score
scorer_
scoring
set_params
transform
verbose


#### 결과 확인

In [26]:
print("Best Parameter : {}".format(grid_search.best_params_))
print("Best Cross-validity Score : {:.4f}".format(grid_search.best_score_))
print('Test set Score : {:.4f}'.format(grid_search.score(x_scaled_test,y_test)))

Best Parameter : {'alpha': 0.1}
Best Cross-validity Score : 0.5452
Test set Score : 0.5627


### RandomSerach

In [30]:
from scipy.stats import randint
param_distribs={'alpha':randint(low=0.0001,high=100)}
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(Ridge(),param_distributions=param_distribs,n_iter=100,cv=5)
random_search.fit(x_scaled_train,y_train)

RandomizedSearchCV(cv=5, estimator=Ridge(), n_iter=100,
                   param_distributions={'alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000027A65CC4100>})

#### RandomSearch 속성 확인

In [32]:
for t in dir(random_search):
    print(t)

__abstractmethods__
__class__
__delattr__
__dict__
__dir__
__doc__
__eq__
__format__
__ge__
__getattribute__
__getstate__
__gt__
__hash__
__init__
__init_subclass__
__le__
__lt__
__module__
__ne__
__new__
__reduce__
__reduce_ex__
__repr__
__setattr__
__setstate__
__sizeof__
__str__
__subclasshook__
__weakref__
_abc_impl
_check_is_fitted
_check_n_features
_estimator_type
_format_results
_get_param_names
_get_tags
_more_tags
_pairwise
_repr_html_
_repr_html_inner
_repr_mimebundle_
_required_parameters
_run_search
_validate_data
best_estimator_
best_index_
best_params_
best_score_
classes_
cv
cv_results_
decision_function
error_score
estimator
fit
get_params
iid
inverse_transform
multimetric_
n_features_in_
n_iter
n_jobs
n_splits_
param_distributions
pre_dispatch
predict
predict_log_proba
predict_proba
random_state
refit
refit_time_
return_train_score
score
scorer_
scoring
set_params
transform
verbose


In [33]:
#### 결과 확인

print("Best Parameter : {}".format(random_search.best_params_))
print("Best Cross-validity Score : {:.4f}".format(random_search.best_score_))
print('Test set Score : {:.4f}'.format(random_search.score(x_scaled_test,y_test)))

Best Parameter : {'alpha': 0}
Best Cross-validity Score : 0.5452
Test set Score : 0.5627
