In [1]:
import pandas as pd
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score, GridSearchCV

In [2]:
id_list = [1040012100,1040012000,1040011100, 1040011000,1040010100,1040010000,1040009100,1040009000]

In [35]:
#def get_avg_rmse_cv(models):
#    
#    for model in models:
#        rmse_list = np.sqrt(-cross_val_score(model, X, y, scoring= "neg_mean_squared_error", cv = 5))
#        rmse_avg = np.mean(rmse_list)
#        print('\n{0} CV RMSE list: {1}'.format(model.__class__.__name__, np.round(rmse_list,3)))
#        print('\n{0} CV 평균 RMSE: {1}'.format(model.__class__.__name__, rmse_avg))


In [3]:
model_xgb = XGBRegressor(random_state = 0)
model_lgbm = LGBMRegressor(random_state = 0)
model_rf = RandomForestRegressor(random_state = 0)
model_mlp = MLPRegressor(activation = 'relu', hidden_layer_sizes = (32,64,32), max_iter = 1000)
model_lr = LinearRegression()

In [10]:
# 하이퍼 파라미터 설정
xgb_parameters = {'n_estimators': [50,100,200], 'max_depth': [3,5,7],'learning_rate':[0.01, 0.1]}
lgbm_parameters = {'n_estimators': [50,100], 'learning_rate': [0.01, 0.1], 'num_leaves' : [20,31]}
rf_parameters = {'n_estimators': [50,100,200], 'max_depth':[3,10], 'min_samples_split': [5,10], 'min_samples_leaf':[1,2,4]}
mlp_parameters = {'alpha':[0.01,0.1]}
lr_parameters = {'fit_intercept': [True]}


In [11]:
def grid_best(model, params):
    grid_model = GridSearchCV(model,param_grid = params, scoring = 'neg_mean_squared_error', cv = 5)
    grid_model.fit(X, y)
    rmse = np.sqrt(-1*grid_model.best_score_)
    print('{0} 5 CV시 최적 평균 RMSE 값: {1}, 최적 parameter:{2}'.format(model.__class__.__name__, rmse, grid_model.best_params_))

### 1040011100

In [12]:
df = pd.read_csv("final_1040011100.csv",encoding = "cp949", index_col = 0)
y = df['value']
X = df.drop(['value','time'], axis = 1)

In [13]:
grid_best(model_xgb, xgb_parameters)
grid_best(model_lgbm, lgbm_parameters)
grid_best(model_rf, rf_parameters)
grid_best(model_mlp, mlp_parameters)
grid_best(model_lr, lr_parameters)

XGBRegressor 5 CV시 최적 평균 RMSE 값: 2.0656234442829313, 최적 parameter:{'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}
LGBMRegressor 5 CV시 최적 평균 RMSE 값: 2.0304509379668505, 최적 parameter:{'learning_rate': 0.1, 'n_estimators': 100, 'num_leaves': 31}
RandomForestRegressor 5 CV시 최적 평균 RMSE 값: 2.0101533658233675, 최적 parameter:{'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}
MLPRegressor 5 CV시 최적 평균 RMSE 값: 5.495583429647198, 최적 parameter:{'alpha': 0.01}
LinearRegression 5 CV시 최적 평균 RMSE 값: 2.579419322793655, 최적 parameter:{'fit_intercept': True}


### 1040011000

In [14]:
df = pd.read_csv("final_1040011000.csv",encoding = "cp949", index_col = 0)
y = df['value']
X = df.drop(['value','time'], axis = 1)

In [15]:
grid_best(model_xgb, xgb_parameters)
grid_best(model_lgbm, lgbm_parameters)
grid_best(model_rf, rf_parameters)
grid_best(model_mlp, mlp_parameters)
grid_best(model_lr, lr_parameters)

XGBRegressor 5 CV시 최적 평균 RMSE 값: 2.4551515960881387, 최적 parameter:{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
LGBMRegressor 5 CV시 최적 평균 RMSE 값: 2.3982692760748954, 최적 parameter:{'learning_rate': 0.1, 'n_estimators': 100, 'num_leaves': 20}
RandomForestRegressor 5 CV시 최적 평균 RMSE 값: 2.368513304168615, 최적 parameter:{'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 200}
MLPRegressor 5 CV시 최적 평균 RMSE 값: 5.724887359803671, 최적 parameter:{'alpha': 0.01}
LinearRegression 5 CV시 최적 평균 RMSE 값: 2.6407353815970454, 최적 parameter:{'fit_intercept': True}
