In [None]:
from google.colab import files
uploaded =files.upload()

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
######## Importing Libraries ########
import pandas as pd
import numpy as np

######## Settings to display rows & columns ########
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

usecols = ['date', 'month', 'hour', 'season', 'weekday', 'is_holiday','working_day',
           'total_count', 'temp', 'r_temp', 'wind', 'humidity', 'weather_sit', 'is_covid']
######## Importing the dataset ########
df = pd.read_csv("/content/data.csv", usecols=usecols)
######## Changing the datatype of date column from object to proper pandas datetime64 ########
df["date"] = pd.to_datetime(df["date"])
######## Sorting the data by date and hour ########
df.sort_values(by=['date', "hour"], ascending=True, inplace=True)
df.head()

Unnamed: 0,date,month,hour,season,weekday,is_holiday,working_day,total_count,temp,r_temp,wind,humidity,weather_sit,is_covid
0,2018-01-01,1,0,winter,0,1,0,34,-9.0,-15.0,13.0,53.0,1.0,0
1,2018-01-01,1,1,winter,0,1,0,49,-9.0,-15.0,13.0,57.0,1.0,0
2,2018-01-01,1,2,winter,0,1,0,37,-9.0,-16.0,17.0,57.0,1.0,0
3,2018-01-01,1,3,winter,0,1,0,9,-10.0,-15.0,9.0,62.0,1.0,0
4,2018-01-01,1,4,winter,0,1,0,12,-10.0,-17.0,17.0,62.0,1.0,0


In [11]:
####### Taking all the data before 1st feb of 2022 as training data #######
train_df = df.loc[df["date"] <= '2022-01-31']
####### Taking all the data from 1st feb of 2022 but before 31st March 2022 as validation data #######
valid_df = df.loc[(df["date"] >= '2022-02-01') & (df["date"] <= '2022-03-31')]
####### Taking all the data after 31st March 2022 as test data #######
test_df = df.loc[df["date"] >= '2022-04-01']
print(f"Number of Observations & Features in train_df are - {train_df.shape}")
print(f"Number of Observations & Features in valid_df are - {valid_df.shape}")
print(f"Number of Observations & Features in test_df are - {test_df.shape}")

Number of Observations & Features in train_df are - (38174, 14)
Number of Observations & Features in valid_df are - (1763, 14)
Number of Observations & Features in test_df are - (840, 14)


In [12]:
### This is done to drop observations which contains duplicate observations having different temp, r_temp, wind etc. 
### at the same hour, date 
train_df = train_df[~train_df.duplicated(subset=["date","month",
        "hour","season","weekday","is_holiday","working_day","total_count"],keep="last")].reset_index(drop=True)
print(f"Number of Observations left after removing duplicate observations in train_df are - {train_df.shape}")

#### Propagating the same thing on validation & test sets ####
valid_df = valid_df[~valid_df.duplicated(subset=["date","month",
        "hour","season","weekday","is_holiday","working_day","total_count"],keep="last")].reset_index(drop=True)
print(f"Number of Observations left after removing duplicate observations in valid_df are - {valid_df.shape}")

test_df = test_df[~test_df.duplicated(subset=["date","month",
        "hour","season","weekday","is_holiday","working_day","total_count"],keep="last")].reset_index(drop=True)
print(f"Number of Observations left after removing duplicate observations in test_df are - {test_df.shape}")

Number of Observations left after removing duplicate observations in train_df are - (30876, 14)
Number of Observations left after removing duplicate observations in valid_df are - (1413, 14)
Number of Observations left after removing duplicate observations in test_df are - (720, 14)


In [13]:
####### Changing the datatype of weather_sit column from float64 to int64 #######
train_df["weather_sit"] = train_df["weather_sit"].astype("int64")
valid_df["weather_sit"] = valid_df["weather_sit"].astype("int64")
test_df["weather_sit"] = test_df["weather_sit"].astype("int64")
train_df.head()

Unnamed: 0,date,month,hour,season,weekday,is_holiday,working_day,total_count,temp,r_temp,wind,humidity,weather_sit,is_covid
0,2018-01-01,1,0,winter,0,1,0,34,-9.0,-15.0,13.0,53.0,1,0
1,2018-01-01,1,1,winter,0,1,0,49,-9.0,-15.0,13.0,57.0,1,0
2,2018-01-01,1,2,winter,0,1,0,37,-9.0,-16.0,17.0,57.0,1,0
3,2018-01-01,1,3,winter,0,1,0,9,-10.0,-15.0,9.0,62.0,1,0
4,2018-01-01,1,4,winter,0,1,0,12,-10.0,-17.0,17.0,62.0,1,0


In [14]:
####### Applying Label Encoding on season_dict feature #######
season_dict = {"winter":1, "spring":2, "summer":3, "fall":4}
####### Applying the mapping on Each dataset #######
train_df["season"] = train_df["season"].map(season_dict)
valid_df["season"] = valid_df["season"].map(season_dict)
test_df["season"] = test_df["season"].map(season_dict)

print(train_df.shape)
print(valid_df.shape)
print(test_df.shape)

####### Extracting x_train & y_train #######
y_train = train_df["total_count"]
x_train = train_df.drop(["total_count"], axis=1)
####### Extracting x_valid & y_valid #######
y_valid = valid_df["total_count"]
x_valid = valid_df.drop(["total_count"], axis=1)
####### Extracting x_train & y_train #######
y_test = test_df["total_count"]
x_test = test_df.drop(["total_count"], axis=1)

x_train.head()

(30876, 14)
(1413, 14)
(720, 14)


Unnamed: 0,date,month,hour,season,weekday,is_holiday,working_day,temp,r_temp,wind,humidity,weather_sit,is_covid
0,2018-01-01,1,0,1,0,1,0,-9.0,-15.0,13.0,53.0,1,0
1,2018-01-01,1,1,1,0,1,0,-9.0,-15.0,13.0,57.0,1,0
2,2018-01-01,1,2,1,0,1,0,-9.0,-16.0,17.0,57.0,1,0
3,2018-01-01,1,3,1,0,1,0,-10.0,-15.0,9.0,62.0,1,0
4,2018-01-01,1,4,1,0,1,0,-10.0,-17.0,17.0,62.0,1,0


In [2]:
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(np.mean(((y_true)-(y_pred))**2))

def mean_squared_error(y_true, y_pred):
    return np.mean(((y_true)-(y_pred))**2)

from sklearn.metrics import r2_score

In [16]:
############# Base model ############
###### Predicting the average y_train value for both train, valid & test datasets ######
y_random_train = np.zeros(len(y_train))
y_random_train = y_random_train + np.mean(y_train)
y_random_valid = np.zeros(len(y_valid))
y_random_valid = y_random_valid + np.mean(y_train)
y_random_test = np.zeros(len(y_test))
y_random_test = y_random_test + np.mean(y_train)

print(f"RMSE on train data when predicting average value of y_train -> {root_mean_squared_error(y_train, y_random_train)}")
print(f"RMSE on validation data when predicting average value of y_train -> {root_mean_squared_error(y_valid,y_random_valid)}")
print(f"RMSE on test data when predicting average value of y_train -> {root_mean_squared_error(y_test, y_random_test)}")
print(" ")
print(f"R2 on train data when predicting average value of y_train -> {r2_score(y_train, y_random_train)}")
print(f"R2 on validation data when predicting average value of y_train -> {r2_score(y_valid, y_random_valid)}")
print(f"R2 on test data when predicting average value of y_train -> {r2_score(y_test, y_random_test)}")

RMSE on train data when predicting average value of y_train -> 322.01002670721624
RMSE on validation data when predicting average value of y_train -> 273.51072220873937
RMSE on test data when predicting average value of y_train -> 379.7046476518417
 
R2 on train data when predicting average value of y_train -> 0.0
R2 on validation data when predicting average value of y_train -> -0.01463034094688509
R2 on test data when predicting average value of y_train -> -0.09061900546013213


In [1]:
######## Now, we want a model which does better than this model which always predict average y_train value ########

######## Setting date column as index for all the datasets ########
x_train.set_index("date", inplace=True)
x_valid.set_index("date", inplace=True)
x_test.set_index("date", inplace=True)

######################################## LINEAR REGRESSION MODEL ######################################## 
###### No scaling was done on continous features ######
from sklearn.linear_model import LinearRegression

def run_model(model, x_train, y_train, x_valid, y_valid, x_test, y_test):
    model_ = model.fit(x_train, y_train)
    train_pred = model_.predict(x_train)
    valid_pred = model_.predict(x_valid)
    test_pred = model_.predict(x_test)
    print(f"RMSE on train data using {model} -> {root_mean_squared_error(y_train, train_pred)}")
    print(f"RMSE on valid data using {model} -> {root_mean_squared_error(y_valid,valid_pred)}")
    print(f"RMSE on test data using {model} -> {root_mean_squared_error(y_test, test_pred)}")
    print(" ")
    print(f"R2 score on train data using {model} -> {round(r2_score(y_train, train_pred),2)}")
    print(f"R2 score on validation data using {model} -> {round(r2_score(y_valid, valid_pred),2)}")
    print(f"R2 score on test data using {model} -> {round(r2_score(y_test, test_pred),2)}")
    
#run_model(LinearRegression(),x_train, y_train, x_valid, y_valid, x_test, y_test)

In [None]:
########## Our Linear Regression Model is doing much better than our simple average base model ##########
from sklearn.linear_model import Lasso
######################### Hyperparameter tuning did not help. Hence, default values are chosen #########################
run_model(Lasso(), x_train, y_train, x_valid, y_valid, x_test, y_test)

RMSE on train data using Lasso() -> 249.78052781001804
RMSE on valid data using Lasso() -> 202.26014038650243
RMSE on test data using Lasso() -> 258.27531667116784
 
R2 score on train data using Lasso() -> 0.4
R2 score on train data using Lasso() -> 0.45
R2 score on train data using Lasso() -> 0.5


In [None]:
######################################## KNN MODEL ######################################## 
###### No scaling was done on continous features ######
from sklearn.neighbors import KNeighborsRegressor
run_model(KNeighborsRegressor(metric="braycurtis", n_neighbors=9), x_train, y_train, x_valid, y_valid, x_test, y_test)

RMSE on train data using KNeighborsRegressor(metric='braycurtis', n_neighbors=9) -> 160.28116930957418
RMSE on valid data using KNeighborsRegressor(metric='braycurtis', n_neighbors=9) -> 165.62172499844414
RMSE on test data using KNeighborsRegressor(metric='braycurtis', n_neighbors=9) -> 219.634027538567
 
R2 score on train data using KNeighborsRegressor(metric='braycurtis', n_neighbors=9) -> 0.75
R2 score on train data using KNeighborsRegressor(metric='braycurtis', n_neighbors=9) -> 0.63
R2 score on train data using KNeighborsRegressor(metric='braycurtis', n_neighbors=9) -> 0.64


In [None]:
############## DO NOT RUN THIS CELL ###############
from sklearn.model_selection import GridSearchCV, KFold
knn = KNeighborsRegressor(n_neighbors=5, n_jobs=-1)
param_grid = {"n_neighbors" : [3,5,7,9,11,13,15,17],
              "weights" : ["uniform", "distance"],
              "algorithm" : ['auto', 'ball_tree', 'kd_tree', 'brute'],
              "metric" : ["minkowski", "manhattan", "euclidean"]}
kf = KFold(n_splits=5, random_state=786, shuffle=True)
gs = GridSearchCV(estimator=knn, param_grid=param_grid, cv=kf, scoring="r2") 
gs.fit(x_train, y_train)
print(gs.best_score_)
print(gs.best_params_)

0.6968560963695112
{'algorithm': 'brute', 'metric': 'manhattan', 'n_neighbors': 9, 'weights': 'distance'}


In [None]:
################### KNN after HyperParameter tuning ###################
knn = KNeighborsRegressor(**gs.best_params_).fit(x_train, y_train)

knn_train_pred = knn.predict(x_train)
knn_valid_pred = knn.predict(x_valid)
knn_test_pred = knn.predict(x_test)
print(f"RMSE on train data using KNN -> {root_mean_squared_error(y_train, knn_train_pred)}")
print(f"RMSE on valid data using KNN -> {root_mean_squared_error(y_valid,knn_valid_pred)}")
print(f"RMSE on test data using KNN -> {root_mean_squared_error(y_test, knn_test_pred)}")
print(" ")
print(f"R2 score on train data using KNN -> {round(r2_score(y_train, knn_train_pred),2)}")
print(f"R2 score on valid data using KNN -> {round(r2_score(y_valid, knn_valid_pred),2)}")
print(f"R2 score on test data using KNN -> {round(r2_score(y_test, knn_test_pred),2)}")

RMSE on train data using KNN -> 4.21641566158054
RMSE on valid data using KNN -> 164.16729527665638
RMSE on test data using KNN -> 217.54628729991094
 
R2 score on train data using KNN -> 1.0
R2 score on valid data using KNN -> 0.63
R2 score on test data using KNN -> 0.64


In [None]:
################### Default HyperParameter Settings ###################
from sklearn.svm import SVR
run_model(SVR(), x_train, y_train, x_valid, y_valid, x_test, y_test)

RMSE on train data using SVR() -> 261.2415171844519
RMSE on valid data using SVR() -> 212.00927880269313
RMSE on test data using SVR() -> 296.74798189706576
 
R2 score on train data using SVR() -> 0.34
R2 score on train data using SVR() -> 0.39
R2 score on train data using SVR() -> 0.33


In [None]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=100, criterion="mse",random_state=786, n_jobs=-1)
param_grid = {"n_estimators" : [50,100,150,200],
              "max_depth" : [2,3,4,5,6,7,8,9],
              "min_samples_split" : [2,4,5,6,10]}
kf = KFold(n_splits=5, shuffle=True, random_state=786)
gs = GridSearchCV(estimator=rf, param_grid=param_grid, cv=kf, scoring="r2")
gs.fit(x_train, y_train)
print(gs.best_score_)
print(gs.best_params_)

0.8432878789768823
{'max_depth': 9, 'min_samples_split': 5, 'n_estimators': 200}


In [None]:
run_model(RandomForestRegressor(**gs.best_params_), x_train, y_train, x_valid, y_valid, x_test, y_test)

RMSE on train data using RandomForestRegressor(max_depth=9, min_samples_split=5, n_estimators=200) -> 123.54486687992636
RMSE on valid data using RandomForestRegressor(max_depth=9, min_samples_split=5, n_estimators=200) -> 145.53822951279275
RMSE on test data using RandomForestRegressor(max_depth=9, min_samples_split=5, n_estimators=200) -> 166.22700249570048
 
R2 score on train data using RandomForestRegressor(max_depth=9, min_samples_split=5, n_estimators=200) -> 0.85
R2 score on train data using RandomForestRegressor(max_depth=9, min_samples_split=5, n_estimators=200) -> 0.71
R2 score on train data using RandomForestRegressor(max_depth=9, min_samples_split=5, n_estimators=200) -> 0.79


In [76]:
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.ensemble import ExtraTreesRegressor
et = ExtraTreesRegressor(random_state=786, n_jobs=-1, n_estimators=100)
param_grid = {"n_estimators" : [50,100,150,200],
              "max_depth" : [3,5,7,9,11],
              "min_samples_split" : [2,4,6,8,10,12],
              "max_features" : [3,5,7]}
kf = KFold(n_splits=5, shuffle=False)
gs = GridSearchCV(estimator=et, param_grid=param_grid, cv=kf, scoring="r2")
gs.fit(x_train, y_train)
print(gs.best_score_)
print(gs.best_params_)

KeyboardInterrupt: 

## Best Model so far 

In [None]:
run_model(ExtraTreesRegressor(**gs.best_params_), x_train, y_train, x_test, y_test, x_test, y_test)

RMSE on train data using ExtraTreesRegressor(max_depth=11, max_features=7, n_estimators=50) -> 126.26126184050108
RMSE on valid data using ExtraTreesRegressor(max_depth=11, max_features=7, n_estimators=50) -> 117.45062650617486
RMSE on test data using ExtraTreesRegressor(max_depth=11, max_features=7, n_estimators=50) -> 133.80821050591942
 
R2 score on train data using ExtraTreesRegressor(max_depth=11, max_features=7, n_estimators=50) -> 0.85
R2 score on train data using ExtraTreesRegressor(max_depth=11, max_features=7, n_estimators=50) -> 0.81
R2 score on train data using ExtraTreesRegressor(max_depth=11, max_features=7, n_estimators=50) -> 0.86


In [None]:
from xgboost import plot_importance
import xgboost as xgb
import matplotlib.pyplot as plt
run_model(xgb.XGBRegressor(objective="reg:squarederror",max_depth=4, n_estimators=130,
          random_state=786, n_jobs=-1, learning_rate=0.05),
          x_train, y_train, x_valid, y_valid, x_test, y_test)

RMSE on train data using XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.05, max_delta_step=0,
             max_depth=4, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=130, n_jobs=-1,
             num_parallel_tree=1, predictor='auto', random_state=786,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None) -> 136.02744644793677
RMSE on valid data using XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.05, max_delta_step=0,
             max_depth

In [None]:
xgbclassifier = xgb.XGBRegressor(objective="reg:squarederror",max_depth=4, n_estimators=130,
                                random_state=786, n_jobs=-1, learning_rate=0.05).fit(x_train, y_train)
xgb_train_pred = xgbclassifier.predict(x_train)
xgb_valid_pred = xgbclassifier.predict(x_valid)
xgb_test_pred = xgbclassifier.predict(x_test)

print(f"TRAIN RMSE : {root_mean_squared_error(y_train, xgb_train_pred)}")
print(f"VALIDATION RMSE : {root_mean_squared_error(y_valid, xgb_valid_pred)}")
print(f"TEST RMSE : {root_mean_squared_error(y_test, xgb_test_pred)}")
print(" ")
print(f"TRAIN R2 : {r2_score(y_train, xgb_train_pred)}")
print(f"VALIDATION R2 : {r2_score(y_valid, xgb_valid_pred)}")
print(f"TEST R2 : {r2_score(y_test, xgb_test_pred)}")

TRAIN RMSE : 136.02744644793677
VALIDATION RMSE : 114.971414438052
TEST RMSE : 128.37018444841456
 
TRAIN R2 : 0.8215509250420785
VALIDATION R2 : 0.8207170947989331
TEST R2 : 0.8753450393432096


In [None]:
xgbclassifier = xgb.XGBRegressor(objective="reg:squarederror",max_depth=4, n_estimators=130,
                                random_state=786, n_jobs=-1, learning_rate=0.09).fit(x_train, y_train)
xgb_train_pred = xgbclassifier.predict(x_train)
xgb_valid_pred = xgbclassifier.predict(x_valid)
xgb_test_pred = xgbclassifier.predict(x_test)

print(f"TRAIN RMSE : {root_mean_squared_error(y_train, xgb_train_pred)}")
print(f"VALIDATION RMSE : {root_mean_squared_error(y_valid, xgb_valid_pred)}")
print(f"TEST RMSE : {root_mean_squared_error(y_test, xgb_test_pred)}")
print(" ")
print(f"TRAIN R2 : {r2_score(y_train, xgb_train_pred)}")
print(f"VALIDATION R2 : {r2_score(y_valid, xgb_valid_pred)}")
print(f"TEST R2 : {r2_score(y_test, xgb_test_pred)}")

TRAIN RMSE : 121.29312684517245
VALIDATION RMSE : 112.56764072896429
TEST RMSE : 118.20287239806034
 
TRAIN R2 : 0.85811594429258
VALIDATION R2 : 0.8281354677583918
TEST R2 : 0.8943091730105952


In [None]:
################# Do not run this cell #################
xgbregressor = xgb.XGBRegressor(objective="reg:squarederror",max_depth=4, n_estimators=130,colsample_bytree=0.5, subsample=0.6,random_state=786, n_jobs=-1)
param_grid = {"n_estimators" : [100,150],
              "max_depth" : [2,3,4,5],
              "learning_rate" : [0.10, 0.15],
              "booster" : ["gbtree", "gblinear" ,"dart"],
              "reg_lambda" : [0.01, 0.05, 0.10],
              "reg_alpha" : [0.01, 0.05, 0.10]}
kf = KFold(n_splits=5, shuffle=False)
gs = GridSearchCV(estimator=xgbregressor, param_grid=param_grid, cv=kf, scoring="r2")
gs.fit(x_train, y_train) 
print(gs.best_score_)
print(gs.best_params_)

0.7478982064168875
{'booster': 'dart', 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 150, 'reg_alpha': 0.1, 'reg_lambda': 0.1}


In [None]:
run_model(xgb.XGBRegressor(objective="reg:squarederror",**gs.best_params_, random_state=786, n_jobs=-1),
         x_train,y_train, x_valid, y_valid, x_test, y_test)

RMSE on train data using XGBRegressor(booster='dart', max_depth=5, n_estimators=150, n_jobs=-1,
             objective='reg:squarederror', random_state=786, reg_alpha=0.1,
             reg_lambda=0.1) -> 103.25845074166016
RMSE on valid data using XGBRegressor(booster='dart', max_depth=5, n_estimators=150, n_jobs=-1,
             objective='reg:squarederror', random_state=786, reg_alpha=0.1,
             reg_lambda=0.1) -> 121.48452153157334
RMSE on test data using XGBRegressor(booster='dart', max_depth=5, n_estimators=150, n_jobs=-1,
             objective='reg:squarederror', random_state=786, reg_alpha=0.1,
             reg_lambda=0.1) -> 128.9735964903564
 
R2 score on train data using XGBRegressor(booster='dart', max_depth=5, n_estimators=150, n_jobs=-1,
             objective='reg:squarederror', random_state=786, reg_alpha=0.1,
             reg_lambda=0.1) -> 0.9
R2 score on train data using XGBRegressor(booster='dart', max_depth=5, n_estimators=150, n_jobs=-1,
             object

In [6]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-1.6.1-py3-none-win_amd64.whl (125.4 MB)
Installing collected packages: xgboost
Successfully installed xgboost-1.6.1


In [34]:
import optuna
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [82]:
train = r"G:\Shivansh\iNeuron\Internship\Rental Bike Share Prediction\Rental Bike Demand Prediction\rental_bike_demand\artifact\data_transformation\2022-07-11-23-52-26\transformed_data\train\transformed_train.csv"
test = r"G:\Shivansh\iNeuron\Internship\Rental Bike Share Prediction\Rental Bike Demand Prediction\rental_bike_demand\artifact\data_transformation\2022-07-11-23-52-26\transformed_data\test\transformed_test.csv"

In [84]:
x_train = pd.read_csv(train).drop(columns = ["year","casual","member","total_count"],axis = 1)
x_train.set_index("date", inplace=True)
y_train = pd.read_csv(train).iloc[:,-1]

In [85]:
x_test = pd.read_csv(test).drop(columns = ["year","casual","member","total_count"],axis = 1)
x_test.set_index("date", inplace=True)
y_test = pd.read_csv(test).iloc[:,-1]

In [86]:
x_train

Unnamed: 0_level_0,month,hour,season,weekday,is_holiday,working_day,weather_sit,is_covid,temp,r_temp,wind,humidity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-01-01,1,0,1,0,1,0,1.0,0,-2.456119,-2.507362,-0.091120,-0.593651
2018-01-01,1,1,1,0,1,0,1.0,0,-2.456119,-2.507362,-0.091120,-0.381186
2018-01-01,1,2,1,0,1,0,1.0,0,-2.456119,-2.593935,0.402398,-0.381186
2018-01-01,1,3,1,0,1,0,1.0,0,-2.559649,-2.507362,-0.584638,-0.115605
2018-01-01,1,4,1,0,1,0,1.0,0,-2.559649,-2.680508,0.402398,-0.115605
...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-31,1,19,1,0,0,1,1.0,0,-1.627879,-1.641629,-0.091120,-0.487419
2022-01-31,1,20,1,0,0,1,3.0,0,-1.627879,-1.555056,-0.831397,-0.487419
2022-01-31,1,21,1,0,0,1,3.0,0,-1.627879,-1.555056,-0.831397,-0.274954
2022-01-31,1,22,1,0,0,1,3.0,0,-1.731409,-1.641629,-0.831397,-0.009372


In [87]:
def objective(trail,data = x_train, target = y_train):
  
    param = {
      #'tree_method' : 'gpu_hist',
      'lambda' : trail.suggest_loguniform('lambda', 1e-4, 10.0),
      'alpha' :  trail.suggest_loguniform('alpha', 1e-4, 10.0),
      'colsample_bytree' : trail.suggest_categorical('colsample_bytree', [.1,.2,.3,.4,.5,.6,.7,.8,.9,1]),
      'subsample' : trail.suggest_categorical('subsample', [.1,.2,.3,.4,.5,.6,.7,.8,.9,1]),
      'learning_rate' : trail.suggest_categorical('learning_rate',[.00001,.0003,.008,.02,.01,0.10,0.15,0.2,1,10,20]),
      'n_estimator' : 130,
      'max_depth' : trail.suggest_categorical('max_depth', [3,4,5,6,7,8,9,10,11,12]),
      'random_state' : 786,
      'min_child_weight' : trail.suggest_int('min_child_weight',1,200),
      'booster' : trail.suggest_categorical('booster',["gblinear","gbtree","dart"]),
      "reg_lambda" : trail.suggest_categorical("reg_lambda",[0.01, 0.05, 0.10]),
      "reg_alpha" : trail.suggest_categorical("reg_alpha",[0.01, 0.05, 0.10]),
      'verbosity' : 2
    }
    if param["booster"] in ['gbtree', 'dart']:
        param['gamma'] : trial.suggest_float('gamma', 1e-3, 4)
        param['eta'] : trial.suggest_float('eta', .001, 5)

    xgb_reg_model = xgb.XGBRegressor(**param)
    xgb_reg_model.fit(x_train,y_train, eval_set = [(x_test,y_test)], verbose = True)
    pred_xgb = xgb_reg_model.predict(x_test)
    rmse = mean_squared_error(y_test, pred_xgb)
    return rmse

In [91]:
find_param = optuna.create_study(direction='minimize')
find_param.optimize(objective, n_trials = 10)
find_param.best_trial.params

[32m[I 2022-07-12 00:01:22,123][0m A new study created in memory with name: no-name-626910d5-206e-4e91-8b4f-dd40ec71998e[0m


Parameters: { "n_estimator" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[00:01:22] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[0]	validation_0-rmse:207.36995
[00:01:22] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 0 pruned nodes, max_depth=4
[1]	validation_0-rmse:208.00866
[00:01:22] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[2]	validation_0-rmse:210.60381
[00:01:22] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1

[38]	validation_0-rmse:129.60944
[00:01:23] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 18 extra nodes, 0 pruned nodes, max_depth=4
[39]	validation_0-rmse:130.00837
[00:01:23] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 16 extra nodes, 0 pruned nodes, max_depth=4
[40]	validation_0-rmse:130.07360
[00:01:23] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=4
[41]	validation_0-rmse:131.52223
[00:01:23] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=4
[42]	validation_0-rmse:131.12592
[00:01:23] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 0 pruned

[00:01:24] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 18 extra nodes, 0 pruned nodes, max_depth=4
[79]	validation_0-rmse:136.95844
[00:01:24] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 26 extra nodes, 0 pruned nodes, max_depth=4
[80]	validation_0-rmse:137.85919
[00:01:24] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[81]	validation_0-rmse:137.67556
[00:01:24] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 22 extra nodes, 0 pruned nodes, max_depth=4
[82]	validation_0-rmse:137.23736
[00:01:24] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 24 extra nodes, 0 pruned nodes, max_depth=4
[83]	validati

[32m[I 2022-07-12 00:01:24,723][0m Trial 0 finished with value: 19425.376663438874 and parameters: {'lambda': 0.0002424107916855603, 'alpha': 0.040285415484144195, 'colsample_bytree': 0.7, 'subsample': 0.4, 'learning_rate': 1, 'max_depth': 4, 'random_state': 100, 'min_child_weight': 152, 'booster': 'gbtree', 'reg_lambda': 0.05, 'reg_alpha': 0.1}. Best is trial 0 with value: 19425.376663438874.[0m


Parameters: { "colsample_bytree", "max_depth", "min_child_weight", "n_estimator", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	validation_0-rmse:373.88828
[1]	validation_0-rmse:354.96236
[2]	validation_0-rmse:338.75379
[3]	validation_0-rmse:324.82007
[4]	validation_0-rmse:312.83539
[5]	validation_0-rmse:302.56288
[6]	validation_0-rmse:293.63210
[7]	validation_0-rmse:285.88882
[8]	validation_0-rmse:279.13967
[9]	validation_0-rmse:273.26054
[10]	validation_0-rmse:268.10074
[11]	validation_0-rmse:263.54528
[12]	validation_0-rmse:259.49206
[13]	validation_0-rmse:255.91070
[14]	validation_0-rmse:252.69081
[15]	validation_0-rmse:249.80631
[16]	validation_0-rmse:247.21422
[17]	validation_0-rmse:244.86531
[18]	validation_0-rmse:242.72451

[32m[I 2022-07-12 00:01:25,663][0m Trial 1 finished with value: 42580.90471131602 and parameters: {'lambda': 0.4920411544585592, 'alpha': 0.0008558137294025118, 'colsample_bytree': 0.4, 'subsample': 0.3, 'learning_rate': 0.02, 'max_depth': 5, 'random_state': 2000, 'min_child_weight': 21, 'booster': 'gblinear', 'reg_lambda': 0.05, 'reg_alpha': 0.05}. Best is trial 0 with value: 19425.376663438874.[0m


Parameters: { "n_estimator" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[00:01:25] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 318 extra nodes, 0 pruned nodes, max_depth=10
[00:01:25] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[0]	validation_0-rmse:362.72950
[00:01:25] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 320 extra nodes, 0 pruned nodes, max_depth=10
[00:01:25] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[1]	validation_0-rmse:344.88968
[00:01:25] INFO: C:/

[00:01:27] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 304 extra nodes, 0 pruned nodes, max_depth=10
[00:01:27] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[24]	validation_0-rmse:118.29146
[00:01:27] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 332 extra nodes, 0 pruned nodes, max_depth=10
[00:01:27] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[25]	validation_0-rmse:116.78595
[00:01:27] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 338 extra nodes, 0 pruned nodes, max_depth=10
[00:01:27] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[26]	validation_0-rmse:115.65859
[00:

[00:01:30] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 322 extra nodes, 0 pruned nodes, max_depth=10
[00:01:30] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[49]	validation_0-rmse:113.90713
[00:01:31] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 324 extra nodes, 0 pruned nodes, max_depth=10
[00:01:31] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[50]	validation_0-rmse:114.84144
[00:01:31] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 310 extra nodes, 0 pruned nodes, max_depth=10
[00:01:31] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[51]	validation_0-rmse:115.35344
[00:

[00:01:35] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 102 extra nodes, 0 pruned nodes, max_depth=10
[00:01:35] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[74]	validation_0-rmse:116.88959
[00:01:35] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 168 extra nodes, 0 pruned nodes, max_depth=10
[00:01:35] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[75]	validation_0-rmse:116.82736
[00:01:35] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 146 extra nodes, 0 pruned nodes, max_depth=10
[00:01:35] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[76]	validation_0-rmse:116.93773
[00:

[00:01:41] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 194 extra nodes, 0 pruned nodes, max_depth=10
[00:01:41] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[99]	validation_0-rmse:121.98120


[32m[I 2022-07-12 00:01:41,975][0m Trial 2 finished with value: 14879.413574106557 and parameters: {'lambda': 0.0424721124489363, 'alpha': 0.007145020478167854, 'colsample_bytree': 0.5, 'subsample': 0.8, 'learning_rate': 0.1, 'max_depth': 10, 'random_state': 10, 'min_child_weight': 101, 'booster': 'dart', 'reg_lambda': 0.01, 'reg_alpha': 0.05}. Best is trial 2 with value: 14879.413574106557.[0m


Parameters: { "colsample_bytree", "max_depth", "min_child_weight", "n_estimator", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	validation_0-rmse:396.07445
[1]	validation_0-rmse:396.06291
[2]	validation_0-rmse:396.05137
[3]	validation_0-rmse:396.03983
[4]	validation_0-rmse:396.02830
[5]	validation_0-rmse:396.01676
[6]	validation_0-rmse:396.00523
[7]	validation_0-rmse:395.99369
[8]	validation_0-rmse:395.98216
[9]	validation_0-rmse:395.97063
[10]	validation_0-rmse:395.95910
[11]	validation_0-rmse:395.94757
[12]	validation_0-rmse:395.93604
[13]	validation_0-rmse:395.92451
[14]	validation_0-rmse:395.91298
[15]	validation_0-rmse:395.90145
[16]	validation_0-rmse:395.88993
[17]	validation_0-rmse:395.87840
[18]	validation_0-rmse:395.86688

[32m[I 2022-07-12 00:01:42,966][0m Trial 3 finished with value: 155974.63826121582 and parameters: {'lambda': 0.2862637351331256, 'alpha': 6.0173398690359345, 'colsample_bytree': 0.9, 'subsample': 0.2, 'learning_rate': 1e-05, 'max_depth': 6, 'random_state': 2000, 'min_child_weight': 154, 'booster': 'gblinear', 'reg_lambda': 0.1, 'reg_alpha': 0.01}. Best is trial 2 with value: 14879.413574106557.[0m


Parameters: { "colsample_bytree", "max_depth", "min_child_weight", "n_estimator", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	validation_0-rmse:211.55302
[1]	validation_0-rmse:206.48004
[2]	validation_0-rmse:209.44120
[3]	validation_0-rmse:208.36315
[4]	validation_0-rmse:208.82011
[5]	validation_0-rmse:207.45814
[6]	validation_0-rmse:207.83207
[7]	validation_0-rmse:208.32762
[8]	validation_0-rmse:207.94487
[9]	validation_0-rmse:208.09307
[10]	validation_0-rmse:208.04531
[11]	validation_0-rmse:207.66747
[12]	validation_0-rmse:207.23115
[13]	validation_0-rmse:207.32053
[14]	validation_0-rmse:206.93800
[15]	validation_0-rmse:207.70503
[16]	validation_0-rmse:206.84177
[17]	validation_0-rmse:206.76208
[18]	validation_0-rmse:207.22710

[32m[I 2022-07-12 00:01:43,917][0m Trial 4 finished with value: 41333.791955121436 and parameters: {'lambda': 0.005465037846837513, 'alpha': 0.08277678506738684, 'colsample_bytree': 0.2, 'subsample': 0.9, 'learning_rate': 1, 'max_depth': 3, 'random_state': 20, 'min_child_weight': 93, 'booster': 'gblinear', 'reg_lambda': 0.01, 'reg_alpha': 0.1}. Best is trial 2 with value: 14879.413574106557.[0m


Parameters: { "colsample_bytree", "max_depth", "min_child_weight", "n_estimator", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	validation_0-rmse:278.31276
[1]	validation_0-rmse:245.64902
[2]	validation_0-rmse:231.34121
[3]	validation_0-rmse:224.12445
[4]	validation_0-rmse:218.77775
[5]	validation_0-rmse:215.29968
[6]	validation_0-rmse:212.87269
[7]	validation_0-rmse:210.81754
[8]	validation_0-rmse:208.87555
[9]	validation_0-rmse:207.42774
[10]	validation_0-rmse:206.29438
[11]	validation_0-rmse:205.48751
[12]	validation_0-rmse:204.71434
[13]	validation_0-rmse:203.90622
[14]	validation_0-rmse:203.40159
[15]	validation_0-rmse:202.92179
[16]	validation_0-rmse:202.40437
[17]	validation_0-rmse:202.10320
[18]	validation_0-rmse:201.74274

[32m[I 2022-07-12 00:01:44,867][0m Trial 5 finished with value: 40398.85207408425 and parameters: {'lambda': 0.002815087660711913, 'alpha': 1.8830324702744912, 'colsample_bytree': 1, 'subsample': 0.4, 'learning_rate': 0.15, 'max_depth': 3, 'random_state': 100, 'min_child_weight': 79, 'booster': 'gblinear', 'reg_lambda': 0.01, 'reg_alpha': 0.1}. Best is trial 2 with value: 14879.413574106557.[0m


Parameters: { "n_estimator" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[00:01:44] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[0]	validation_0-rmse:392.43878
[00:01:44] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[1]	validation_0-rmse:387.46503
[00:01:44] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[2]	validation_0-rmse:380.71420
[00:01:44] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1

[38]	validation_0-rmse:270.92228
[00:01:45] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[39]	validation_0-rmse:268.54444
[00:01:45] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[40]	validation_0-rmse:266.26701
[00:01:45] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 28 extra nodes, 0 pruned nodes, max_depth=4
[41]	validation_0-rmse:265.35281
[00:01:45] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[42]	validation_0-rmse:262.72144
[00:01:45] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned

[00:01:46] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[79]	validation_0-rmse:212.31650
[00:01:46] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[80]	validation_0-rmse:211.53128
[00:01:46] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[81]	validation_0-rmse:210.76117
[00:01:46] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[82]	validation_0-rmse:209.15321
[00:01:46] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 30 extra nodes, 0 pruned nodes, max_depth=4
[83]	validati

[32m[I 2022-07-12 00:01:46,566][0m Trial 6 finished with value: 39231.04391482656 and parameters: {'lambda': 0.000241330480355147, 'alpha': 0.15586128568740473, 'colsample_bytree': 0.3, 'subsample': 1, 'learning_rate': 0.02, 'max_depth': 4, 'random_state': 2000, 'min_child_weight': 13, 'booster': 'gbtree', 'reg_lambda': 0.01, 'reg_alpha': 0.01}. Best is trial 2 with value: 14879.413574106557.[0m


Parameters: { "n_estimator" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[00:01:46] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 58 extra nodes, 0 pruned nodes, max_depth=5
[00:01:46] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[0]	validation_0-rmse:394.04392
[00:01:46] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[00:01:46] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[1]	validation_0-rmse:392.45893
[00:01:46] INFO: C:/User

[00:01:47] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 46 extra nodes, 0 pruned nodes, max_depth=5
[00:01:47] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[24]	validation_0-rmse:349.88693
[00:01:47] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[00:01:47] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[25]	validation_0-rmse:348.69516
[00:01:47] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 46 extra nodes, 0 pruned nodes, max_depth=5
[00:01:47] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[26]	validation_0-rmse:346.03289
[00:01:48]

[00:01:50] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 38 extra nodes, 0 pruned nodes, max_depth=5
[00:01:50] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[49]	validation_0-rmse:310.72219
[00:01:50] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 52 extra nodes, 0 pruned nodes, max_depth=5
[00:01:50] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[50]	validation_0-rmse:309.81835
[00:01:50] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 58 extra nodes, 0 pruned nodes, max_depth=5
[00:01:50] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[51]	validation_0-rmse:308.64100
[00:01:50]

[00:01:53] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 56 extra nodes, 0 pruned nodes, max_depth=5
[00:01:53] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[74]	validation_0-rmse:285.11957
[00:01:53] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 38 extra nodes, 0 pruned nodes, max_depth=5
[00:01:53] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[75]	validation_0-rmse:284.60377
[00:01:54] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 58 extra nodes, 0 pruned nodes, max_depth=5
[00:01:54] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[76]	validation_0-rmse:283.16554
[00:01:54]

[00:01:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 60 extra nodes, 0 pruned nodes, max_depth=5
[00:01:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[99]	validation_0-rmse:262.77235


[32m[I 2022-07-12 00:01:58,160][0m Trial 7 finished with value: 69049.30746570409 and parameters: {'lambda': 7.511968995053867, 'alpha': 0.02856418931149955, 'colsample_bytree': 0.3, 'subsample': 0.8, 'learning_rate': 0.008, 'max_depth': 5, 'random_state': 30, 'min_child_weight': 84, 'booster': 'dart', 'reg_lambda': 0.01, 'reg_alpha': 0.05}. Best is trial 2 with value: 14879.413574106557.[0m


Parameters: { "n_estimator" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[00:01:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 164 extra nodes, 0 pruned nodes, max_depth=11
[0]	validation_0-rmse:354.92166
[00:01:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 162 extra nodes, 0 pruned nodes, max_depth=11
[1]	validation_0-rmse:332.62707
[00:01:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 96 extra nodes, 0 pruned nodes, max_depth=11
[2]	validation_0-rmse:318.72610
[00:01:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_rele

[00:01:59] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 160 extra nodes, 0 pruned nodes, max_depth=11
[38]	validation_0-rmse:144.99280
[00:01:59] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=4
[39]	validation_0-rmse:144.98794
[00:01:59] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 140 extra nodes, 0 pruned nodes, max_depth=11
[40]	validation_0-rmse:144.81006
[00:01:59] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 16 extra nodes, 0 pruned nodes, max_depth=5
[41]	validation_0-rmse:144.58448
[00:01:59] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3
[42]	vali

[00:02:00] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 168 extra nodes, 0 pruned nodes, max_depth=11
[78]	validation_0-rmse:136.72719
[00:02:00] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 162 extra nodes, 0 pruned nodes, max_depth=11
[79]	validation_0-rmse:136.60854
[00:02:00] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 6 extra nodes, 0 pruned nodes, max_depth=2
[80]	validation_0-rmse:136.70704
[00:02:00] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 60 extra nodes, 0 pruned nodes, max_depth=8
[81]	validation_0-rmse:136.66586
[00:02:00] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 178 extra nodes, 0 pruned nodes, max_depth=11
[82]	val

[32m[I 2022-07-12 00:02:01,058][0m Trial 8 finished with value: 17310.412330024814 and parameters: {'lambda': 0.4143987468155856, 'alpha': 0.00523751800827548, 'colsample_bytree': 0.2, 'subsample': 0.7, 'learning_rate': 0.15, 'max_depth': 11, 'random_state': 20, 'min_child_weight': 165, 'booster': 'gbtree', 'reg_lambda': 0.1, 'reg_alpha': 0.05}. Best is trial 2 with value: 14879.413574106557.[0m


Parameters: { "n_estimator" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[00:02:01] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 118 extra nodes, 0 pruned nodes, max_depth=11
[0]	validation_0-rmse:396.08315
[00:02:01] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 114 extra nodes, 0 pruned nodes, max_depth=10
[1]	validation_0-rmse:396.08122
[00:02:01] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 76 extra nodes, 0 pruned nodes, max_depth=9
[2]	validation_0-rmse:396.07960
[00:02:01] INFO: C:/Users/Administrator/workspace/xgboost-win64_relea

[00:02:02] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3
[39]	validation_0-rmse:395.99049
[00:02:02] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 110 extra nodes, 0 pruned nodes, max_depth=7
[40]	validation_0-rmse:395.98854
[00:02:02] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=4
[41]	validation_0-rmse:395.98618
[00:02:02] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3
[42]	validation_0-rmse:395.98385
[00:02:02] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 90 extra nodes, 0 pruned nodes, max_depth=9
[43]	validat

[32m[I 2022-07-12 00:02:03,805][0m Trial 9 finished with value: 156696.72405964395 and parameters: {'lambda': 0.0010564522226642384, 'alpha': 1.18024536860095, 'colsample_bytree': 0.2, 'subsample': 0.3, 'learning_rate': 1e-05, 'max_depth': 11, 'random_state': 20, 'min_child_weight': 102, 'booster': 'gbtree', 'reg_lambda': 0.1, 'reg_alpha': 0.01}. Best is trial 2 with value: 14879.413574106557.[0m


{'lambda': 0.0424721124489363,
 'alpha': 0.007145020478167854,
 'colsample_bytree': 0.5,
 'subsample': 0.8,
 'learning_rate': 0.1,
 'max_depth': 10,
 'random_state': 10,
 'min_child_weight': 101,
 'booster': 'dart',
 'reg_lambda': 0.01,
 'reg_alpha': 0.05}

In [72]:
find_param.best_params

{'lambda': 0.0004236644769347804,
 'alpha': 2.829802308317586,
 'colsample_bytree': 0.6,
 'subsample': 0.6,
 'learning_rate': 0.2,
 'max_depth': 5,
 'random_state': 30,
 'min_child_weight': 112,
 'booster': 'gblinear',
 'reg_lambda': 0.01,
 'reg_alpha': 0.1}

In [93]:
run_model(xgb.XGBRegressor(objective="reg:squarederror",**find_param.best_params),
         x_train,y_train, x_test, y_test, x_test, y_test)

[00:11:56] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 318 extra nodes, 0 pruned nodes, max_depth=10
[00:11:56] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:11:56] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 320 extra nodes, 0 pruned nodes, max_depth=10
[00:11:56] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:11:56] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 314 extra nodes, 0 pruned nodes, max_depth=10
[00:11:56] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:11:56] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc

[00:11:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:11:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 324 extra nodes, 0 pruned nodes, max_depth=10
[00:11:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:11:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 316 extra nodes, 0 pruned nodes, max_depth=10
[00:11:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:11:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 318 extra nodes, 0 pruned nodes, max_depth=10
[00:11:58] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: dr

[00:12:01] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 288 extra nodes, 0 pruned nodes, max_depth=10
[00:12:01] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:12:01] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 318 extra nodes, 0 pruned nodes, max_depth=10
[00:12:01] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:12:02] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 268 extra nodes, 0 pruned nodes, max_depth=10
[00:12:02] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:12:02] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc

[00:12:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:12:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 216 extra nodes, 0 pruned nodes, max_depth=10
[00:12:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:12:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 106 extra nodes, 0 pruned nodes, max_depth=10
[00:12:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: drop 0 trees, weight = 1
[00:12:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/tree/updater_prune.cc:101: tree pruning end, 160 extra nodes, 0 pruned nodes, max_depth=10
[00:12:06] INFO: C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/gbm/gbtree.cc:909: dr

In [45]:
a = {'lambda': 0.9838106234595098,
 'alpha': 7.079730820220626,
 'colsample_bytree': 0.7,
 'subsample': 0.5,
 'learning_rate': 0.1,
 'max_depth': 3,
 'random_state': 100,
 'min_child_weight': 48,
 'booster': 'dart',
 'reg_lambda': 0.1,
 'reg_alpha': 0.05}

In [48]:
best_params = {'alpha': 0.15293293099113212,
 'booster': 'dart',
 'colsample_bytree': 0.4,
 'eta': 0.09995384234477997,
 'gamma': 3.7988906375650893,
 'lambda': 9.97541355381705,
 'learning_rate': 0.2,
 'max_depth': 6,
 'min_child_weight': 94,
 'reg_alpha': 0.05,
 'reg_lambda': 0.1,
 'subsample': 0.1}