In [9]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error, mean_squared_log_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.model_selection import GridSearchCV

In [2]:
train =  pd.read_csv('data/preprocess/train_0910.csv')
test =  pd.read_csv('data/preprocess/test_0910.csv')

# GCV_RF

In [3]:
params={'n_estimators':[100,300,500],
        'max_features':[3,4,5],
        'max_depth':[3,4,5],
        'random_state':[2045]}

In [4]:
Model_rf=RandomForestRegressor()

gcv_RF=GridSearchCV(Model_rf,
                    param_grid=params,
                    scoring='neg_mean_squared_log_error',
                    cv=KFold(n_splits=5,
                             shuffle=True,
                             random_state=2045),
                    refit=True)

In [5]:
X=train.drop(columns = ['SalePrice'])
y=train['SalePrice']
gcv_RF.fit(X,y)

GridSearchCV(cv=KFold(n_splits=5, random_state=2045, shuffle=True),
             estimator=RandomForestRegressor(),
             param_grid={'max_depth': [3, 4, 5], 'max_features': [3, 4, 5],
                         'n_estimators': [100, 300, 500],
                         'random_state': [2045]},
             scoring='neg_mean_squared_log_error')

In [6]:
gcv_RF.best_params_

{'max_depth': 5, 'max_features': 5, 'n_estimators': 100, 'random_state': 2045}

In [7]:
gcv_RF.best_score_

-0.00023696432504585933

# GCV_XGB

In [14]:
params={'n_estimators':[50,100,200],
        'max_depth':[5,6,7],
        'gamma' : [0,1,2],
        'random_state':[2045]}

In [15]:
Model_xgb=XGBRegressor()

gcv_xgb=GridSearchCV(Model_xgb,
                    param_grid=params,
                    scoring='neg_mean_squared_log_error',
                    cv=KFold(n_splits=5,
                             shuffle=True,
                             random_state=2045),
                    refit=True)

In [16]:
X=train.drop(columns = ['SalePrice'])
y=train['SalePrice']
gcv_xgb.fit(X,y)

GridSearchCV(cv=KFold(n_splits=5, random_state=2045, shuffle=True),
             estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None, gamma=None,
                                    gpu_id=None, importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, mon..._constraints=None,
                                    n_estimators=100, n_jobs=None,
                                    num_parallel_tree=None, random_state=None,
                                    reg_alpha=None, reg_lambda=None,
                                    scale_pos_weight=None, subsample=None,
                    

In [17]:
gcv_xgb.best_params_

{'gamma': 0, 'max_depth': 5, 'n_estimators': 100, 'random_state': 2045}

In [19]:
gcv_xgb.best_score_

-0.00011392367668177706

In [20]:
y_hat = gcv_xgb.predict(test)
pred = np.expm1(y_hat)

In [24]:
sub=pd.read_csv('data/sample_submission.csv')
sub['SalePrice']=pred
sub.to_csv('./submission/GCV_XGB_1.csv',index=False)

In [25]:
params={'max_depth':[3,4,5],
        'gamma' : [0,3,6],
        'random_state':[2045]}

Model_xgb=XGBRegressor()

gcv_xgb=GridSearchCV(Model_xgb,
                    param_grid=params,
                    scoring='neg_mean_squared_log_error',
                    cv=KFold(n_splits=5,
                             shuffle=True,
                             random_state=2045),
                    refit=True)

In [26]:
X=train.drop(columns = ['SalePrice'])
y=train['SalePrice']
gcv_xgb.fit(X,y)

GridSearchCV(cv=KFold(n_splits=5, random_state=2045, shuffle=True),
             estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None, gamma=None,
                                    gpu_id=None, importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=100, n_jobs=None,
                                    num_parallel_tree=None, random_state=None,
                                    reg_alpha=None, reg_lambda=None,
                                    scale_pos_weight=None, subsample=None,
                  

In [27]:
gcv_xgb.best_params_

{'gamma': 0, 'max_depth': 4, 'random_state': 2045}

In [28]:
gcv_xgb.best_score_

-0.00011031070416179828

In [30]:
y_hat = gcv_xgb.predict(test)
pred = np.expm1(y_hat)

In [31]:
sub=pd.read_csv('data/sample_submission.csv')
sub['SalePrice']=pred
sub.to_csv('./submission/GCV_XGB_2.csv',index=False)

#The End#