In [4]:
import numpy as np
import pandas as pd 
import os
from pandas.tseries.holiday import USFederalHolidayCalendar

import utils
# from utils import load_data, get_train_val_split, get_stratified_splitter
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedGroupKFold, train_test_split, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, make_scorer

from lightgbm import LGBMRegressor
from scipy.stats import kstest, kruskal, mannwhitneyu
from itertools import combinations
from collections import defaultdict
from tqdm import tqdm

import optuna
import pickle

In [2]:
import importlib
importlib.reload(utils)

<module 'utils' from 'C:\\Users\\johns\\Desktop\\probstats2\\EnergyPrediction-ASHRAE\\code\\utils.py'>

In [3]:
data_dict = utils.load_data('ashrae-energy-prediction')

Memory usage of dataframe is 0.07 MB
Memory usage after optimization is: 0.02 MB
Decreased by 73.88%


  weather_train['timestamp'] = pd.to_datetime(weather_train['timestamp'], infer_datetime_format = True, utc = True).astype('datetime64[ns]')


Memory usage of dataframe is 9.60 MB
Memory usage after optimization is: 3.07 MB
Decreased by 68.05%
Memory usage of dataframe is 19.04 MB
Memory usage after optimization is: 5.13 MB
Decreased by 73.04%


  train['timestamp'] = pd.to_datetime(train['timestamp'], infer_datetime_format = True, utc = True).astype('datetime64[ns]')


Memory usage of dataframe is 616.95 MB
Memory usage after optimization is: 289.19 MB
Decreased by 53.12%
Memory usage of dataframe is 1272.51 MB
Memory usage after optimization is: 358.53 MB
Decreased by 71.82%


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train[(train['site_id'] == 0) & (train['meter'] == 0)]['meter_reading'] = 0.2931 * train[(train['site_id'] == 0) & (train['meter'] == 0)]['meter_reading']


In [4]:
# Add weather features 
weather_features = ['cloud_coverage', 'dew_temperature', 'air_temperature', 
                    'sea_level_pressure', 'wind_direction', 'wind_speed', 'precip_depth_1_hr',]

hourly_by_site = data_dict["X_train"].groupby(['hour', 'month', 'site_id'])[weather_features].mean().reset_index()

data_dict["X_train"] = data_dict["X_train"].merge(
    hourly_by_site, 
    on=['hour', 'month', 'site_id'], 
    how='left', 
    suffixes=(None, '_hourly_by_site')
)

del hourly_by_site

for feature in weather_features:
    # Fill in NA values from weather with hourly by site columns 
    data_dict["X_train"][feature].fillna(
        data_dict["X_train"][feature + "_hourly_by_site"],
        inplace=True
    )
    
    # Fill in the rest with the median 
    data_dict["X_train"][feature].fillna(
        data_dict["X_train"][feature].median(),
        inplace=True
    )
    
    data_dict["X_train"][feature + "_diff_hourly_from_mean"] = data_dict["X_train"][feature] - \
        data_dict["X_train"][feature + "_hourly_by_site"]
    
data_dict["X_train"] = data_dict["X_train"].drop(columns = [feat + "_hourly_by_site" for feat in weather_features])

In [5]:
# Fill in NA with median values for floor count and year_built
for feature in ['year_built', 'floor_count']:
    data_dict["X_train"][feature].fillna(
        data_dict["X_train"][feature].median(), 
        inplace=True
    )

In [6]:
data_dict.keys()

dict_keys(['weather_test', 'X_train', 'X_test', 'y_train'])

## Examine Differences (Non-Parametric)
Using Bonferonni's Correction

### Milestone 2. 
- Show difference in sites across meter readings 
- get average meter reading per day per site 
- conduct a (non-parametric ANOVA) KS OR pairs (mann-whitney) to show that they are diff 
- train a model per site id (with rudimentary hyperparameter tuning) 
- John sites 0-7, Sharad sites 8-15 

### Milestone 3. Determine, per site, which primary uses are similar (if they have only a few buildings), which are diff
- for a given primary use, if diff, identify "clusters" of buildings that are similar 

In [2]:
features = ['year_built', 'floor_count', 'air_temperature',
       'cloud_coverage', 'dew_temperature', 'precip_depth_1_hr',
       'sea_level_pressure', 'wind_direction', 'wind_speed',
       'air_temperature_mean_lag7', 'air_temperature_max_lag7',
       'air_temperature_min_lag7', 'air_temperature_std_lag7',
       'cloud_coverage_mean_lag7', 'cloud_coverage_max_lag7',
       'cloud_coverage_min_lag7', 'cloud_coverage_std_lag7',
       'dew_temperature_mean_lag7', 'dew_temperature_max_lag7',
       'dew_temperature_min_lag7', 'dew_temperature_std_lag7',
       'precip_depth_1_hr_mean_lag7', 'precip_depth_1_hr_max_lag7',
       'precip_depth_1_hr_min_lag7', 'precip_depth_1_hr_std_lag7',
       'sea_level_pressure_mean_lag7', 'sea_level_pressure_max_lag7',
       'sea_level_pressure_min_lag7', 'sea_level_pressure_std_lag7',
       'wind_direction_mean_lag7', 'wind_direction_max_lag7',
       'wind_direction_min_lag7', 'wind_direction_std_lag7',
       'wind_speed_mean_lag7', 'wind_speed_max_lag7', 'wind_speed_min_lag7',
       'wind_speed_std_lag7', 'log_square_feet', 'weekday', 'hour', 'day',
       'weekend', 'month', 'primary_use_enc']

In [3]:
len(features)

44

In [8]:
def run_optuna_search_cv(
    site: int,
    meter: int,
    X_train: pd.DataFrame,
    y_train: pd.DataFrame,
    features: list,
    n_trials: int = 50,
    
):
    """
    Runs Optuna Search for LGBMRegressor
    """
    SITE_FILTER = X_train["site_id"] == site
    METER_FILTER = X_train["meter"] == meter
    
    X, y = X_train.loc[SITE_FILTER&METER_FILTER, features], y_train[SITE_FILTER&METER_FILTER]
    if X.shape[0] == 0:
        return None

    splitter_gen = utils.get_stratified_splitter(X_train[SITE_FILTER&METER_FILTER], y_train[SITE_FILTER&METER_FILTER])

    

    regressor = LGBMRegressor()

    param_distributions = {
        "max_depth": optuna.distributions.IntDistribution(-1, len(data_dict['X_train'].columns)),
        "num_leaves": optuna.distributions.IntDistribution(5, 50),
        "learning_rate": optuna.distributions.FloatDistribution(1e-7, 1, log=True),
        "n_estimators": optuna.distributions.IntDistribution(1, 300),
        "reg_alpha": optuna.distributions.FloatDistribution(1e-7, 1e7, log=True),
        "reg_lambda": optuna.distributions.FloatDistribution(1e-7, 1e7, log=True),            
    }

    """
    Parameters not searched over: 
    subsample_for_bin: int = 200000,
    min_split_gain: float = 0.0,
    min_child_weight: float = 0.001,
    min_child_samples: int = 20,
    subsample: float = 1.0,
    subsample_freq: int = 0,
    colsample_bytree: float = 1.0,
    random_state: Union[int, numpy.random.mtrand.RandomState, NoneType] = None,
    n_jobs: int = -1,
    """
    def rmse(estimator, X_test, y_test):
        y_pred = estimator.predict(X_test)
        return -1 * mean_squared_error(y_test, y_pred, squared=False)
    
    optuna_search = optuna.integration.OptunaSearchCV(
        regressor, 
        param_distributions,
        n_trials=n_trials,
        cv = splitter_gen,
        random_state=0, # IMPORTANT,
        refit=True,
        n_jobs=4,
        scoring = rmse,
        verbose=0
    )

    optuna_search.fit(X, y)
    y_pred = optuna_search.predict(X)
    
    return optuna_search

In [9]:
# This fits sites 0-7
N_TRIALS = 50
models = defaultdict(dict)

for site in tqdm(range(8)):
    for meter in range(4):
        
        optuna_search = run_optuna_search_cv(site, meter, data_dict["X_train"], data_dict["y_train"], features, N_TRIALS)
        
        model_identifier = "_".join([str(site), str(meter)])
        models[model_identifier] = optuna_search

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-08 21:44:51,324][0m A new study created in memory with name: no-name-490eba39-57bd-4d85-b81c-b45735de58b6[0m
[32m[I 2023-04-08 21:45:32,982][0m Trial 2 finished with value: -1.345808815239991 and parameters: {'max_depth': 39, 'num_leaves': 29, 'learning_rate': 1.8035122710050434e-06, 'n_estimators': 43, 'reg_alpha': 1310.7119392056818, 'reg_lambda': 2068.2275095410464}. Best is trial 2 with value: -1.345808815239991.[0m
[32m[I 2023-04-08 21:45:38,345][0m Trial 0 finished with value: -1.3457782840613832 and parameters: {'max_depth': 5, 'num_leaves': 23, 'learning_rate': 1.709952005735926e-06, 'n_estimators': 61, 'reg_alpha': 0.24097218382143726, 'reg_lambda': 1.1943322415318528e-05}. Best is trial 0 with value: -1.3457782840613832.[0m
[32m[I 2023-04-08 21:46:23,612][0m Trial 4 finished with value: -1.3456832680872148 and parameters: {'max_depth': 30, 'num_leaves': 18, 'learning_rate': 3.038196327328892e-06, '

[32m[I 2023-04-08 21:54:36,752][0m Trial 25 finished with value: -0.982925132947899 and parameters: {'max_depth': 46, 'num_leaves': 38, 'learning_rate': 0.23765322590623222, 'n_estimators': 106, 'reg_alpha': 0.01795350058962178, 'reg_lambda': 3332.515910947464}. Best is trial 21 with value: -0.9800276924140099.[0m
[32m[I 2023-04-08 21:54:52,241][0m Trial 24 finished with value: -0.9644114791203054 and parameters: {'max_depth': 43, 'num_leaves': 39, 'learning_rate': 0.15639877119149315, 'n_estimators': 162, 'reg_alpha': 0.03812814519574558, 'reg_lambda': 8132.218437949613}. Best is trial 24 with value: -0.9644114791203054.[0m
[32m[I 2023-04-08 21:54:57,565][0m Trial 27 finished with value: -1.130525576038727 and parameters: {'max_depth': 46, 'num_leaves': 26, 'learning_rate': 0.21327319860378344, 'n_estimators': 124, 'reg_alpha': 6.240270788079808, 'reg_lambda': 3.1759107882489057}. Best is trial 24 with value: -0.9644114791203054.[0m
[32m[I 2023-04-08 21:55:04,826][0m Trial 

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-08 22:02:22,026][0m A new study created in memory with name: no-name-43a5b610-e62f-4ed7-816a-58db6bc93c55[0m
[32m[I 2023-04-08 22:02:33,803][0m Trial 2 finished with value: -2.4061070980620327 and parameters: {'max_depth': 5, 'num_leaves': 31, 'learning_rate': 3.677419203827511e-05, 'n_estimators': 286, 'reg_alpha': 235116.29255904845, 'reg_lambda': 2.2165242507340274e-05}. Best is trial 2 with value: -2.4061070980620327.[0m
[32m[I 2023-04-08 22:02:42,543][0m Trial 3 finished with value: -2.3951750272953136 and parameters: {'max_depth': 29, 'num_leaves': 36, 'learning_rate': 0.0006683451391562364, 'n_estimators': 80, 'reg_alpha': 2370.962307553984, 'reg_lambda': 15233.351750751626}. Best is trial 3 with value: -2.3951750272953136.[0m
[32m[I 2023-04-08 22:02:56,597][0m Trial 5 finished with value: -2.3985710484562524 and parameters: {'max_depth': 55, 'num_leaves': 8, 'learning_rate': 0.0002822612545700129, 'n

[32m[I 2023-04-08 22:05:26,982][0m Trial 25 finished with value: -2.257257912098953 and parameters: {'max_depth': 50, 'num_leaves': 28, 'learning_rate': 0.1984956225296615, 'n_estimators': 38, 'reg_alpha': 10.707248018317197, 'reg_lambda': 66325.51596097056}. Best is trial 8 with value: -2.239951599133442.[0m
[32m[I 2023-04-08 22:05:31,398][0m Trial 26 finished with value: -2.2660946816755443 and parameters: {'max_depth': 51, 'num_leaves': 26, 'learning_rate': 0.2088108478679834, 'n_estimators': 51, 'reg_alpha': 30.133900869767793, 'reg_lambda': 68545.44016655705}. Best is trial 8 with value: -2.239951599133442.[0m
[32m[I 2023-04-08 22:05:35,925][0m Trial 27 finished with value: -2.272107926457301 and parameters: {'max_depth': 50, 'num_leaves': 26, 'learning_rate': 0.2792115195236119, 'n_estimators': 49, 'reg_alpha': 95.9448868036309, 'reg_lambda': 88242.36182308513}. Best is trial 8 with value: -2.239951599133442.[0m
[32m[I 2023-04-08 22:05:37,491][0m Trial 28 finished with

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-08 22:07:10,915][0m A new study created in memory with name: no-name-4f3054bd-10d4-431f-b278-8dfd17c65bd3[0m
[32m[I 2023-04-08 22:07:26,578][0m Trial 1 finished with value: -1.2896334182849347 and parameters: {'max_depth': 33, 'num_leaves': 9, 'learning_rate': 0.002836852190209392, 'n_estimators': 21, 'reg_alpha': 0.0001349136291226134, 'reg_lambda': 242.5609572735276}. Best is trial 1 with value: -1.2896334182849347.[0m
[32m[I 2023-04-08 22:07:27,006][0m Trial 0 finished with value: -1.0989454438904547 and parameters: {'max_depth': 53, 'num_leaves': 8, 'learning_rate': 0.2969886917659522, 'n_estimators': 26, 'reg_alpha': 5776.264399817051, 'reg_lambda': 18440.714578818144}. Best is trial 0 with value: -1.0989454438904547.[0m
[32m[I 2023-04-08 22:07:37,818][0m Trial 2 finished with value: -1.3154463087318593 and parameters: {'max_depth': 1, 'num_leaves': 17, 'learning_rate': 5.210847102294211e-06, 'n_estimat

[32m[I 2023-04-08 22:12:57,858][0m Trial 26 finished with value: -1.1205662553879248 and parameters: {'max_depth': 29, 'num_leaves': 6, 'learning_rate': 0.3649482650172662, 'n_estimators': 44, 'reg_alpha': 2098.813004017241, 'reg_lambda': 26713.35507523783}. Best is trial 14 with value: -1.076721513092178.[0m
[32m[I 2023-04-08 22:13:09,655][0m Trial 25 finished with value: -1.1094280780478656 and parameters: {'max_depth': 19, 'num_leaves': 22, 'learning_rate': 0.21015825416900705, 'n_estimators': 192, 'reg_alpha': 4954.423767857461, 'reg_lambda': 347562.209379484}. Best is trial 14 with value: -1.076721513092178.[0m
[32m[I 2023-04-08 22:13:40,935][0m Trial 23 finished with value: -1.3154909875388452 and parameters: {'max_depth': 7, 'num_leaves': 40, 'learning_rate': 1.684851349801689e-07, 'n_estimators': 185, 'reg_alpha': 0.3787013111486892, 'reg_lambda': 33.03847150274547}. Best is trial 14 with value: -1.076721513092178.[0m
[32m[I 2023-04-08 22:14:03,639][0m Trial 29 finis

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-08 22:21:01,349][0m A new study created in memory with name: no-name-2bbfe204-067d-47b8-a34c-8cddb378e859[0m
[32m[I 2023-04-08 22:21:04,611][0m Trial 3 finished with value: -2.361166370518383 and parameters: {'max_depth': 39, 'num_leaves': 48, 'learning_rate': 0.2649400348654928, 'n_estimators': 76, 'reg_alpha': 519372.2982678234, 'reg_lambda': 6.744711808859547e-07}. Best is trial 3 with value: -2.361166370518383.[0m
[32m[I 2023-04-08 22:21:04,809][0m Trial 1 finished with value: -2.356296980692046 and parameters: {'max_depth': 44, 'num_leaves': 28, 'learning_rate': 0.006468476401421289, 'n_estimators': 16, 'reg_alpha': 1.933212391575136, 'reg_lambda': 283171.1927589527}. Best is trial 1 with value: -2.356296980692046.[0m
[32m[I 2023-04-08 22:21:07,428][0m Trial 2 finished with value: -2.361166370518383 and parameters: {'max_depth': 40, 'num_leaves': 31, 'learning_rate': 0.1400228125085338, 'n_estimators': 

[32m[I 2023-04-08 22:22:21,654][0m Trial 26 finished with value: -2.175055230344408 and parameters: {'max_depth': 35, 'num_leaves': 22, 'learning_rate': 0.00349619535559888, 'n_estimators': 206, 'reg_alpha': 8883.573720963488, 'reg_lambda': 381.6255254812323}. Best is trial 20 with value: -2.023641790110549.[0m
[32m[I 2023-04-08 22:22:23,014][0m Trial 27 finished with value: -2.363251923938292 and parameters: {'max_depth': 33, 'num_leaves': 21, 'learning_rate': 0.013002789811541847, 'n_estimators': 204, 'reg_alpha': 89508.06049574724, 'reg_lambda': 8367165.65870312}. Best is trial 20 with value: -2.023641790110549.[0m
[32m[I 2023-04-08 22:22:26,644][0m Trial 28 finished with value: -2.4772548725868804 and parameters: {'max_depth': 33, 'num_leaves': 11, 'learning_rate': 0.012078211129315666, 'n_estimators': 152, 'reg_alpha': 118424.05796894839, 'reg_lambda': 4.6210106153117705}. Best is trial 20 with value: -2.023641790110549.[0m
[32m[I 2023-04-08 22:22:27,673][0m Trial 29 fi

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-08 22:23:27,842][0m A new study created in memory with name: no-name-43cec5b5-d9e3-4595-acb5-c23976b91c0e[0m
[32m[I 2023-04-08 22:24:18,528][0m Trial 0 finished with value: -1.3434301816758472 and parameters: {'max_depth': 15, 'num_leaves': 18, 'learning_rate': 3.124529027029532e-05, 'n_estimators': 98, 'reg_alpha': 0.06619283950786217, 'reg_lambda': 1919590.455162878}. Best is trial 0 with value: -1.3434301816758472.[0m
[32m[I 2023-04-08 22:24:37,414][0m Trial 4 finished with value: -1.3437055296702973 and parameters: {'max_depth': 26, 'num_leaves': 14, 'learning_rate': 1.8346819863757289e-07, 'n_estimators': 7, 'reg_alpha': 17385.870758429264, 'reg_lambda': 7392751.020609938}. Best is trial 0 with value: -1.3434301816758472.[0m
[32m[I 2023-04-08 22:26:43,323][0m Trial 2 finished with value: -1.3323974857254228 and parameters: {'max_depth': 43, 'num_leaves': 14, 'learning_rate': 5.655755874441142e-05, 'n_es

[32m[I 2023-04-08 22:35:04,140][0m Trial 25 finished with value: -1.123887655197308 and parameters: {'max_depth': 35, 'num_leaves': 31, 'learning_rate': 0.015508600670118371, 'n_estimators': 37, 'reg_alpha': 1.4866774717423842e-06, 'reg_lambda': 8.161421458273388}. Best is trial 11 with value: -1.0653243809458708.[0m
[32m[I 2023-04-08 22:35:07,643][0m Trial 26 finished with value: -1.131044672948757 and parameters: {'max_depth': 33, 'num_leaves': 24, 'learning_rate': 0.012411339865031425, 'n_estimators': 40, 'reg_alpha': 5.99434156730114e-06, 'reg_lambda': 8.521952457236187}. Best is trial 11 with value: -1.0653243809458708.[0m
[32m[I 2023-04-08 22:36:29,555][0m Trial 27 finished with value: -1.0930776136771883 and parameters: {'max_depth': 47, 'num_leaves': 24, 'learning_rate': 0.016530802172526927, 'n_estimators': 131, 'reg_alpha': 0.012353146796672118, 'reg_lambda': 11.476031879818636}. Best is trial 11 with value: -1.0653243809458708.[0m
[32m[I 2023-04-08 22:36:43,455][0

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-08 22:45:21,911][0m A new study created in memory with name: no-name-fafb7688-d720-454e-99b8-f2f1b8b27569[0m
[32m[I 2023-04-08 22:45:38,293][0m Trial 1 finished with value: -2.045603532052734 and parameters: {'max_depth': 4, 'num_leaves': 29, 'learning_rate': 6.481381442822796e-06, 'n_estimators': 1, 'reg_alpha': 0.004940534216899258, 'reg_lambda': 0.11086182708987587}. Best is trial 1 with value: -2.045603532052734.[0m
[32m[I 2023-04-08 22:45:48,614][0m Trial 0 finished with value: -2.045597793950084 and parameters: {'max_depth': 22, 'num_leaves': 8, 'learning_rate': 8.771456074840415e-07, 'n_estimators': 20, 'reg_alpha': 0.0006047057797180549, 'reg_lambda': 0.27924337236749963}. Best is trial 0 with value: -2.045597793950084.[0m
[32m[I 2023-04-08 22:46:14,552][0m Trial 5 finished with value: -1.8792345461887665 and parameters: {'max_depth': 10, 'num_leaves': 40, 'learning_rate': 0.03524130772019482, 'n_est

[32m[I 2023-04-08 22:54:59,284][0m Trial 23 finished with value: -1.754479254476589 and parameters: {'max_depth': 7, 'num_leaves': 45, 'learning_rate': 0.02908555872039845, 'n_estimators': 138, 'reg_alpha': 6.51851487594305, 'reg_lambda': 5122.331203866735}. Best is trial 17 with value: -1.7503538772282436.[0m
[32m[I 2023-04-08 22:55:09,555][0m Trial 24 finished with value: -1.7316270144354693 and parameters: {'max_depth': 5, 'num_leaves': 45, 'learning_rate': 0.01635422350734579, 'n_estimators': 212, 'reg_alpha': 21.696946490972078, 'reg_lambda': 2.5520932957813898}. Best is trial 24 with value: -1.7316270144354693.[0m
[32m[I 2023-04-08 22:55:25,824][0m Trial 27 finished with value: -1.7538714723252764 and parameters: {'max_depth': 4, 'num_leaves': 35, 'learning_rate': 0.013140697961083254, 'n_estimators': 162, 'reg_alpha': 16216.761740516331, 'reg_lambda': 1018.0304823967143}. Best is trial 24 with value: -1.7316270144354693.[0m
[32m[I 2023-04-08 22:55:59,572][0m Trial 26 

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-08 23:04:53,002][0m A new study created in memory with name: no-name-70a47be8-8c0b-4aa1-8f06-99935390c6db[0m
[32m[I 2023-04-08 23:05:51,439][0m Trial 2 finished with value: -1.9420095736494112 and parameters: {'max_depth': 18, 'num_leaves': 40, 'learning_rate': 0.00030607435978345193, 'n_estimators': 205, 'reg_alpha': 252.37182371038938, 'reg_lambda': 502811.94974565174}. Best is trial 2 with value: -1.9420095736494112.[0m
[32m[I 2023-04-08 23:05:59,483][0m Trial 0 finished with value: -1.9468989730480646 and parameters: {'max_depth': 27, 'num_leaves': 33, 'learning_rate': 2.827134732056512e-07, 'n_estimators': 133, 'reg_alpha': 0.012674348250325875, 'reg_lambda': 5.62985253783601e-07}. Best is trial 2 with value: -1.9420095736494112.[0m
[32m[I 2023-04-08 23:06:11,408][0m Trial 3 finished with value: -1.9451007223906056 and parameters: {'max_depth': 42, 'num_leaves': 46, 'learning_rate': 2.2731392377977267e-

[32m[I 2023-04-08 23:11:30,118][0m Trial 24 finished with value: -1.881640653057405 and parameters: {'max_depth': 23, 'num_leaves': 13, 'learning_rate': 0.47770076435419956, 'n_estimators': 234, 'reg_alpha': 0.1026326235127053, 'reg_lambda': 2.7611485241010537e-05}. Best is trial 7 with value: -1.690452929771825.[0m
[32m[I 2023-04-08 23:12:12,345][0m Trial 25 finished with value: -1.6908071457618814 and parameters: {'max_depth': 9, 'num_leaves': 13, 'learning_rate': 0.008200146383214105, 'n_estimators': 237, 'reg_alpha': 1.1723413007667878e-05, 'reg_lambda': 4.237991691761584e-05}. Best is trial 7 with value: -1.690452929771825.[0m
[32m[I 2023-04-08 23:12:56,444][0m Trial 27 finished with value: -1.7238592516443725 and parameters: {'max_depth': 7, 'num_leaves': 26, 'learning_rate': 0.007770913022823087, 'n_estimators': 264, 'reg_alpha': 3.72633748398559e-06, 'reg_lambda': 1.1089890548098028e-05}. Best is trial 7 with value: -1.690452929771825.[0m
[32m[I 2023-04-08 23:13:03,67

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-08 23:18:41,531][0m A new study created in memory with name: no-name-81a6a668-5950-4bd0-9f19-d032ce2d4352[0m
[32m[I 2023-04-08 23:20:10,197][0m Trial 2 finished with value: -1.4042517809213173 and parameters: {'max_depth': 24, 'num_leaves': 16, 'learning_rate': 1.0342425555781993e-05, 'n_estimators': 80, 'reg_alpha': 2985672.7079586578, 'reg_lambda': 1192719.2213240673}. Best is trial 2 with value: -1.4042517809213173.[0m
[32m[I 2023-04-08 23:21:28,777][0m Trial 1 finished with value: -1.2957117470995423 and parameters: {'max_depth': 49, 'num_leaves': 15, 'learning_rate': 0.0021141578575491256, 'n_estimators': 75, 'reg_alpha': 35.475321908552864, 'reg_lambda': 927.7016755034615}. Best is trial 1 with value: -1.2957117470995423.[0m
[32m[I 2023-04-08 23:22:19,778][0m Trial 3 finished with value: -1.2798946871789203 and parameters: {'max_depth': 42, 'num_leaves': 20, 'learning_rate': 0.018298663609088158, 'n_es

[32m[I 2023-04-08 23:59:42,239][0m Trial 24 finished with value: -1.0233243714255873 and parameters: {'max_depth': 17, 'num_leaves': 11, 'learning_rate': 0.007934896451059329, 'n_estimators': 213, 'reg_alpha': 1.023245511030383e-07, 'reg_lambda': 0.00513969827690218}. Best is trial 19 with value: -0.9762500530064298.[0m
[32m[I 2023-04-09 00:00:31,323][0m Trial 26 finished with value: -1.0330258870780966 and parameters: {'max_depth': 19, 'num_leaves': 5, 'learning_rate': 0.13586956134792477, 'n_estimators': 293, 'reg_alpha': 3.0882880367587654e-07, 'reg_lambda': 14.524900635113747}. Best is trial 19 with value: -0.9762500530064298.[0m
[32m[I 2023-04-09 00:01:49,850][0m Trial 23 finished with value: -1.0474637694392404 and parameters: {'max_depth': 17, 'num_leaves': 11, 'learning_rate': 0.003744613396976263, 'n_estimators': 298, 'reg_alpha': 2.2242004378624557e-07, 'reg_lambda': 0.0026698890554881804}. Best is trial 19 with value: -0.9762500530064298.[0m
[32m[I 2023-04-09 00:05

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-09 00:38:37,605][0m A new study created in memory with name: no-name-b8f5ae54-fd54-450f-9d7a-92f18a6d8453[0m
[32m[I 2023-04-09 00:38:56,057][0m Trial 1 finished with value: -1.5080259500550492 and parameters: {'max_depth': 51, 'num_leaves': 7, 'learning_rate': 0.013820308336315215, 'n_estimators': 14, 'reg_alpha': 6.852423249437741e-07, 'reg_lambda': 2.1356950157428385e-06}. Best is trial 1 with value: -1.5080259500550492.[0m
[32m[I 2023-04-09 00:39:18,595][0m Trial 2 finished with value: -1.7414342182100844 and parameters: {'max_depth': 43, 'num_leaves': 32, 'learning_rate': 6.357097410239457e-05, 'n_estimators': 206, 'reg_alpha': 455003.9567070654, 'reg_lambda': 3.923639225841054e-06}. Best is trial 1 with value: -1.5080259500550492.[0m
[32m[I 2023-04-09 00:39:33,534][0m Trial 5 finished with value: -1.7157904979253076 and parameters: {'max_depth': 1, 'num_leaves': 23, 'learning_rate': 0.000696010764809821

[32m[I 2023-04-09 00:49:42,962][0m Trial 26 finished with value: -1.6407972435886307 and parameters: {'max_depth': 53, 'num_leaves': 40, 'learning_rate': 0.0011523852676946181, 'n_estimators': 68, 'reg_alpha': 2.3686123365214694e-06, 'reg_lambda': 3591.9036583083625}. Best is trial 24 with value: -0.8688200016579419.[0m
[32m[I 2023-04-09 00:50:28,051][0m Trial 27 finished with value: -1.6004173825108678 and parameters: {'max_depth': 48, 'num_leaves': 40, 'learning_rate': 0.0018393022980724857, 'n_estimators': 63, 'reg_alpha': 5.077914091068042e-06, 'reg_lambda': 5282.805053321294}. Best is trial 24 with value: -0.8688200016579419.[0m
[32m[I 2023-04-09 00:51:42,993][0m Trial 23 finished with value: -0.8758097888971675 and parameters: {'max_depth': 57, 'num_leaves': 40, 'learning_rate': 0.06639294272462447, 'n_estimators': 299, 'reg_alpha': 1.2505766665385067e-06, 'reg_lambda': 3692.7019785453426}. Best is trial 24 with value: -0.8688200016579419.[0m
[32m[I 2023-04-09 00:52:29,

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-09 01:11:11,682][0m A new study created in memory with name: no-name-5038ccba-1b93-40f6-8fb3-fccfff1711d7[0m
[32m[I 2023-04-09 01:11:25,550][0m Trial 1 finished with value: -1.2963486962705975 and parameters: {'max_depth': 46, 'num_leaves': 6, 'learning_rate': 4.698691815820031e-05, 'n_estimators': 36, 'reg_alpha': 7053221.561484169, 'reg_lambda': 1.6105649509257753e-07}. Best is trial 1 with value: -1.2963486962705975.[0m
[32m[I 2023-04-09 01:11:41,785][0m Trial 0 finished with value: -1.230563242741994 and parameters: {'max_depth': 43, 'num_leaves': 31, 'learning_rate': 0.005592570371929066, 'n_estimators': 25, 'reg_alpha': 3.5389923448129997e-06, 'reg_lambda': 0.0008422850980542025}. Best is trial 0 with value: -1.230563242741994.[0m
[32m[I 2023-04-09 01:11:45,722][0m Trial 3 finished with value: -1.29634339340013 and parameters: {'max_depth': 13, 'num_leaves': 38, 'learning_rate': 1.9860222511225629e-07,

[32m[I 2023-04-09 01:17:13,137][0m Trial 24 finished with value: -1.0902564657627987 and parameters: {'max_depth': 30, 'num_leaves': 14, 'learning_rate': 0.1346417034498625, 'n_estimators': 286, 'reg_alpha': 1.0569156011353886, 'reg_lambda': 19.598744122218154}. Best is trial 7 with value: -1.0353575410596239.[0m
[32m[I 2023-04-09 01:17:14,477][0m Trial 26 finished with value: -1.0853325515956465 and parameters: {'max_depth': 32, 'num_leaves': 13, 'learning_rate': 0.1421280179832608, 'n_estimators': 235, 'reg_alpha': 0.00014741837029870865, 'reg_lambda': 74.94771327744414}. Best is trial 7 with value: -1.0353575410596239.[0m
[32m[I 2023-04-09 01:17:20,078][0m Trial 25 finished with value: -1.0877018472676534 and parameters: {'max_depth': 30, 'num_leaves': 13, 'learning_rate': 0.11639310443059013, 'n_estimators': 281, 'reg_alpha': 1.5160800962987024, 'reg_lambda': 60.943106648625836}. Best is trial 7 with value: -1.0353575410596239.[0m
[32m[I 2023-04-09 01:17:25,082][0m Trial

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-09 01:24:06,293][0m A new study created in memory with name: no-name-86c246c7-289d-4d40-97cb-ebf67d7c5cdb[0m
[32m[I 2023-04-09 01:24:14,583][0m Trial 1 finished with value: -1.1538488016282629 and parameters: {'max_depth': 27, 'num_leaves': 7, 'learning_rate': 8.515867448804224e-06, 'n_estimators': 27, 'reg_alpha': 297.1778308861434, 'reg_lambda': 66.41885712517791}. Best is trial 1 with value: -1.1538488016282629.[0m
[32m[I 2023-04-09 01:24:18,738][0m Trial 3 finished with value: -1.153641563640911 and parameters: {'max_depth': 3, 'num_leaves': 31, 'learning_rate': 1.0910320078274299e-05, 'n_estimators': 72, 'reg_alpha': 2.9495776249751525e-07, 'reg_lambda': 8.620958757979907e-05}. Best is trial 3 with value: -1.153641563640911.[0m
[32m[I 2023-04-09 01:24:33,484][0m Trial 5 finished with value: -1.1115969479832073 and parameters: {'max_depth': 9, 'num_leaves': 10, 'learning_rate': 0.38424332729106725, 'n_es

[32m[I 2023-04-09 01:29:25,310][0m Trial 24 finished with value: -1.014225840542691 and parameters: {'max_depth': 44, 'num_leaves': 6, 'learning_rate': 0.002061754097140268, 'n_estimators': 255, 'reg_alpha': 0.6367812766662428, 'reg_lambda': 0.06129001040458121}. Best is trial 21 with value: -0.9549624095987927.[0m
[32m[I 2023-04-09 01:29:49,409][0m Trial 26 finished with value: -1.0651106581842664 and parameters: {'max_depth': 30, 'num_leaves': 11, 'learning_rate': 0.07470461638080468, 'n_estimators': 225, 'reg_alpha': 0.13263591766062688, 'reg_lambda': 0.03864890810955911}. Best is trial 21 with value: -0.9549624095987927.[0m
[32m[I 2023-04-09 01:29:59,286][0m Trial 27 finished with value: -1.0756491973017765 and parameters: {'max_depth': 29, 'num_leaves': 11, 'learning_rate': 0.06483185232855289, 'n_estimators': 220, 'reg_alpha': 0.06958161571490336, 'reg_lambda': 0.000601666900272773}. Best is trial 21 with value: -0.9549624095987927.[0m
[32m[I 2023-04-09 01:30:03,462][0

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-09 01:34:50,155][0m A new study created in memory with name: no-name-bffc3a8f-1719-4851-8d28-1716d0abb733[0m
[32m[I 2023-04-09 01:34:54,610][0m Trial 3 finished with value: -2.758890702266378 and parameters: {'max_depth': 54, 'num_leaves': 46, 'learning_rate': 1.0354179169199244e-06, 'n_estimators': 7, 'reg_alpha': 0.0001550268480156383, 'reg_lambda': 0.0027200303285281743}. Best is trial 3 with value: -2.758890702266378.[0m
[32m[I 2023-04-09 01:34:57,235][0m Trial 1 finished with value: -2.644657082016932 and parameters: {'max_depth': 41, 'num_leaves': 23, 'learning_rate': 0.05342041743040408, 'n_estimators': 31, 'reg_alpha': 236.06286473643206, 'reg_lambda': 1.2121066850512157}. Best is trial 1 with value: -2.644657082016932.[0m
[32m[I 2023-04-09 01:35:00,900][0m Trial 4 finished with value: -2.7464251071316266 and parameters: {'max_depth': 2, 'num_leaves': 33, 'learning_rate': 0.00027704420130209413, 'n_e

[32m[I 2023-04-09 01:37:20,303][0m Trial 23 finished with value: -2.58407580312242 and parameters: {'max_depth': -1, 'num_leaves': 15, 'learning_rate': 0.028134039572269537, 'n_estimators': 212, 'reg_alpha': 3.857260195586644e-06, 'reg_lambda': 57762.21977987297}. Best is trial 13 with value: -2.5347844281495617.[0m
[32m[I 2023-04-09 01:37:46,617][0m Trial 28 finished with value: -2.591644713386243 and parameters: {'max_depth': 17, 'num_leaves': 9, 'learning_rate': 0.23047896741888502, 'n_estimators': 257, 'reg_alpha': 1.1567924554700002e-07, 'reg_lambda': 6222446.291972189}. Best is trial 13 with value: -2.5347844281495617.[0m
[32m[I 2023-04-09 01:37:48,120][0m Trial 27 finished with value: -2.583144000135388 and parameters: {'max_depth': 17, 'num_leaves': 10, 'learning_rate': 0.23209512880013752, 'n_estimators': 255, 'reg_alpha': 2.4014730322842647e-07, 'reg_lambda': 5554632.749133326}. Best is trial 13 with value: -2.5347844281495617.[0m
[32m[I 2023-04-09 01:37:48,900][0m

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-09 01:40:00,888][0m A new study created in memory with name: no-name-7166a6fa-df40-4b07-aaea-1b28d3a9e307[0m
[32m[I 2023-04-09 01:40:07,166][0m Trial 0 finished with value: -3.1172357222671105 and parameters: {'max_depth': 0, 'num_leaves': 34, 'learning_rate': 0.0019855169952365764, 'n_estimators': 192, 'reg_alpha': 1187494.316518869, 'reg_lambda': 1.9951924627751157e-07}. Best is trial 0 with value: -3.1172357222671105.[0m
[32m[I 2023-04-09 01:40:18,773][0m Trial 2 finished with value: -3.11672773328156 and parameters: {'max_depth': 26, 'num_leaves': 44, 'learning_rate': 5.9915639187970536e-05, 'n_estimators': 142, 'reg_alpha': 5678.432263719489, 'reg_lambda': 13205.334755296022}. Best is trial 2 with value: -3.11672773328156.[0m
[32m[I 2023-04-09 01:40:24,505][0m Trial 1 finished with value: -3.117680511302352 and parameters: {'max_depth': 50, 'num_leaves': 33, 'learning_rate': 0.0005743393910840358, 'n_es

[32m[I 2023-04-09 01:42:40,922][0m Trial 12 finished with value: -3.117222236761987 and parameters: {'max_depth': 56, 'num_leaves': 32, 'learning_rate': 1.2577664279660702e-06, 'n_estimators': 254, 'reg_alpha': 3.1794701546643804e-05, 'reg_lambda': 1.0020858524842029e-05}. Best is trial 22 with value: -3.10650365224238.[0m
[32m[I 2023-04-09 01:42:41,256][0m Trial 23 finished with value: -3.8216792125374326 and parameters: {'max_depth': 57, 'num_leaves': 13, 'learning_rate': 0.03628889232297975, 'n_estimators': 179, 'reg_alpha': 0.22866706058414804, 'reg_lambda': 0.0828822228053984}. Best is trial 22 with value: -3.10650365224238.[0m
[32m[I 2023-04-09 01:42:50,238][0m Trial 29 finished with value: -3.1119163502038236 and parameters: {'max_depth': 4, 'num_leaves': 21, 'learning_rate': 0.00155953420759869, 'n_estimators': 40, 'reg_alpha': 50.654359962984394, 'reg_lambda': 0.002031997855578815}. Best is trial 22 with value: -3.10650365224238.[0m
[32m[I 2023-04-09 01:43:06,543][0

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-09 01:45:27,380][0m A new study created in memory with name: no-name-e4d196aa-afae-4523-aa2b-e8b43468d81b[0m
[32m[I 2023-04-09 01:45:30,955][0m Trial 2 finished with value: -2.2829398493129194 and parameters: {'max_depth': 35, 'num_leaves': 34, 'learning_rate': 3.572948345255681e-07, 'n_estimators': 141, 'reg_alpha': 3326261.8595324517, 'reg_lambda': 441.8638929699737}. Best is trial 2 with value: -2.2829398493129194.[0m
[32m[I 2023-04-09 01:45:31,527][0m Trial 0 finished with value: -2.2829398493129194 and parameters: {'max_depth': 14, 'num_leaves': 16, 'learning_rate': 0.0017928807702305598, 'n_estimators': 185, 'reg_alpha': 7665791.373242302, 'reg_lambda': 3439.3107021626847}. Best is trial 2 with value: -2.2829398493129194.[0m
[32m[I 2023-04-09 01:45:33,365][0m Trial 4 finished with value: -2.098941265517958 and parameters: {'max_depth': 35, 'num_leaves': 45, 'learning_rate': 0.8682857937283799, 'n_estim

[32m[I 2023-04-09 01:46:49,002][0m Trial 27 finished with value: -2.192501739504079 and parameters: {'max_depth': 19, 'num_leaves': 50, 'learning_rate': 0.2102235476861367, 'n_estimators': 4, 'reg_alpha': 38555.07770362419, 'reg_lambda': 2690.8948863592404}. Best is trial 24 with value: -2.050066691012388.[0m
[32m[I 2023-04-09 01:46:51,541][0m Trial 25 finished with value: -2.071278794666407 and parameters: {'max_depth': 18, 'num_leaves': 50, 'learning_rate': 0.09494956397246149, 'n_estimators': 83, 'reg_alpha': 17281.564780740984, 'reg_lambda': 2.094029467631736}. Best is trial 24 with value: -2.050066691012388.[0m
[32m[I 2023-04-09 01:46:53,128][0m Trial 28 finished with value: -2.2829398493129194 and parameters: {'max_depth': 5, 'num_leaves': 46, 'learning_rate': 0.14003005205671476, 'n_estimators': 72, 'reg_alpha': 127859.29626902472, 'reg_lambda': 76553.22051844416}. Best is trial 24 with value: -2.050066691012388.[0m
[32m[I 2023-04-09 01:46:53,867][0m Trial 26 finished

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-09 01:47:45,009][0m A new study created in memory with name: no-name-6678854e-79af-4f70-842d-17579b21af95[0m
[32m[I 2023-04-09 01:47:53,279][0m Trial 1 finished with value: -3.063744609037508 and parameters: {'max_depth': 6, 'num_leaves': 42, 'learning_rate': 1.9999378887323218e-05, 'n_estimators': 240, 'reg_alpha': 150836.47271082964, 'reg_lambda': 26998.396253316812}. Best is trial 1 with value: -3.063744609037508.[0m
[32m[I 2023-04-09 01:47:56,493][0m Trial 4 finished with value: -3.063744609037508 and parameters: {'max_depth': 57, 'num_leaves': 35, 'learning_rate': 0.0002885255409677681, 'n_estimators': 37, 'reg_alpha': 1811708.3823371648, 'reg_lambda': 0.018665375153013822}. Best is trial 1 with value: -3.063744609037508.[0m
[32m[I 2023-04-09 01:48:02,671][0m Trial 5 finished with value: -3.063744609037508 and parameters: {'max_depth': 26, 'num_leaves': 44, 'learning_rate': 5.2230115977774205e-05, 'n_es

[32m[I 2023-04-09 01:50:24,087][0m Trial 25 finished with value: -2.4441386885758476 and parameters: {'max_depth': 19, 'num_leaves': 14, 'learning_rate': 0.004262054470770819, 'n_estimators': 202, 'reg_alpha': 7.560093437768215e-06, 'reg_lambda': 802.6369080297256}. Best is trial 13 with value: -2.37771853020383.[0m
[32m[I 2023-04-09 01:50:31,088][0m Trial 23 finished with value: -2.3853932488483776 and parameters: {'max_depth': 50, 'num_leaves': 27, 'learning_rate': 0.004095828919829719, 'n_estimators': 286, 'reg_alpha': 1.1002986696211768e-05, 'reg_lambda': 774.3896114027214}. Best is trial 13 with value: -2.37771853020383.[0m
[32m[I 2023-04-09 01:50:36,491][0m Trial 26 finished with value: -2.4611783641257863 and parameters: {'max_depth': 17, 'num_leaves': 13, 'learning_rate': 0.003036916286102049, 'n_estimators': 276, 'reg_alpha': 1.0421471123077446e-05, 'reg_lambda': 1212.0718969489333}. Best is trial 13 with value: -2.37771853020383.[0m
[32m[I 2023-04-09 01:50:50,057][

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-09 01:53:09,138][0m A new study created in memory with name: no-name-19429ff2-9b7d-4bb5-8cf3-1566a07fc4d6[0m
[32m[I 2023-04-09 01:53:13,623][0m Trial 0 finished with value: -3.434216437894476 and parameters: {'max_depth': 33, 'num_leaves': 50, 'learning_rate': 4.1907769138965713e-07, 'n_estimators': 181, 'reg_alpha': 922721.0599922147, 'reg_lambda': 1.1367691031438167}. Best is trial 0 with value: -3.434216437894476.[0m
[32m[I 2023-04-09 01:53:15,308][0m Trial 2 finished with value: -3.4224993755225848 and parameters: {'max_depth': 35, 'num_leaves': 19, 'learning_rate': 0.00014040208316199747, 'n_estimators': 42, 'reg_alpha': 0.042185155661792643, 'reg_lambda': 0.013462668608606898}. Best is trial 2 with value: -3.4224993755225848.[0m
[32m[I 2023-04-09 01:53:18,981][0m Trial 5 finished with value: -3.434216437894476 and parameters: {'max_depth': 42, 'num_leaves': 29, 'learning_rate': 0.012417871606563817, 'n

[32m[I 2023-04-09 01:54:18,669][0m Trial 24 finished with value: -2.4689030359421196 and parameters: {'max_depth': 10, 'num_leaves': 16, 'learning_rate': 0.1280523048002569, 'n_estimators': 88, 'reg_alpha': 1.728816294439611e-07, 'reg_lambda': 3.737443195145088e-05}. Best is trial 22 with value: -2.430646530561046.[0m
[32m[I 2023-04-09 01:54:20,642][0m Trial 25 finished with value: -2.446173283821682 and parameters: {'max_depth': 12, 'num_leaves': 17, 'learning_rate': 0.08108897016379833, 'n_estimators': 90, 'reg_alpha': 1.1342618036466432e-07, 'reg_lambda': 1.4915600377991973e-05}. Best is trial 22 with value: -2.430646530561046.[0m
[32m[I 2023-04-09 01:54:20,860][0m Trial 26 finished with value: -2.4809954476551 and parameters: {'max_depth': 28, 'num_leaves': 19, 'learning_rate': 0.1418485129496149, 'n_estimators': 60, 'reg_alpha': 5.556195076854146e-06, 'reg_lambda': 1.8144415528308242e-05}. Best is trial 22 with value: -2.430646530561046.[0m
[32m[I 2023-04-09 01:54:28,293

  optuna_search = optuna.integration.OptunaSearchCV(
[32m[I 2023-04-09 01:55:26,581][0m A new study created in memory with name: no-name-2900b5ab-9417-4d77-b1b9-0c60f79237cd[0m
Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic

Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 820, in predict
    data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\bas

Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 820, in predict
    data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\bas

Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 820, in predict
    data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\bas

Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 820, in predict
    data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\bas

Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 820, in predict
    data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\bas

Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 820, in predict
    data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\bas

Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 820, in predict
    data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\bas

Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 820, in predict
    data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\bas

Traceback (most recent call last):
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\sklearn\model_selection\_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\johns\AppData\Local\Temp\ipykernel_5792\659673755.py", line 48, in rmse
    y_pred = estimator.predict(X_test)
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\sklearn.py", line 803, in predict
    return self._Booster.predict(X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\basic.py", line 820, in predict
    data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
  File "C:\Users\johns\anaconda3\envs\probstats\lib\site-packages\lightgbm\bas

In [24]:
for key in models.keys():
    if models[key]:
        with open(f"./models/{key}.pkl", "wb") as f:
            pickle.dump(models[key].best_estimator_, f)