# 1. 분석 데이터 준비

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
data = pd.read_csv('house_price.csv',encoding='utf-8')
X = data[data.columns[1:5]]
y = data[['house_value']]

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

# 2. 기본 모델 적용

In [4]:
from sklearn.linear_model import Lasso
model = Lasso()
model.fit(X_scaled_train,y_train)
pred_train=model.predict(X_scaled_train)
pred_test=model.predict(X_scaled_test)
model.score(X_scaled_train,y_train)

0.5455724679313863

In [5]:
model.score(X_scaled_test,y_test)

0.5626850497564577

In [6]:
import numpy as np
from sklearn.metrics import mean_squared_error

MSE_train = mean_squared_error(y_train,pred_train)
MSE_test = mean_squared_error(y_test,pred_test)
print("훈련 데이터 RMSE : ",np.sqrt(MSE_train))
print("테스트 데이터 RMSE : ",np.sqrt(MSE_test))

훈련 데이터 RMSE :  64340.34152172676
테스트 데이터 RMSE :  63220.748913873045


# 3. Grid Search

In [7]:
from sklearn.model_selection import GridSearchCV
param_grid = {'alpha' : [0.0,1e-6,1e-5,1e-4,1e-3,1e-2,0.1,0.5,1.0,2.0,3.0]}
grid_search = GridSearchCV(Lasso(),param_grid,cv=5)
grid_search.fit(X_scaled_train,y_train)

GridSearchCV(cv=5, estimator=Lasso(),
             param_grid={'alpha': [0.0, 1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1,
                                   0.5, 1.0, 2.0, 3.0]})

In [8]:
print("Best Parameter : ", grid_search.best_params_)
print("Best Score : ", grid_search.best_score_)
print("Testset Score : ", grid_search.score(X_scaled_test,y_test))

Best Parameter :  {'alpha': 0.5}
Best Score :  0.5451645008645551
Testset Score :  0.5626846805056158


# 4. Random Search

In [9]:
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV
param_distribs = {'alpha' : randint(low=0.00001,high=10)}
random_search = RandomizedSearchCV(Lasso(),
                                   param_distributions=param_distribs,
                                   n_iter=100,cv=5)
random_search.fit(X_scaled_train,y_train)

RandomizedSearchCV(cv=5, estimator=Lasso(), n_iter=100,
                   param_distributions={'alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000024AF23221C0>})

In [10]:
print("Best Parameter : ", random_search.best_params_)
print("Best Score : ", random_search.best_score_)
print("Testset Score : ", random_search.score(X_scaled_test,y_test))

Best Parameter :  {'alpha': 1}
Best Score :  0.54516447554499
Testset Score :  0.5626850497564577
