In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/playground-series-s5e4/sample_submission.csv
/kaggle/input/playground-series-s5e4/train.csv
/kaggle/input/playground-series-s5e4/test.csv


In [2]:
import warnings 
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import ExtraTreesRegressor, BaggingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import make_scorer, mean_squared_error

import optuna

In [3]:
# Data reading 
train_data = pd.read_csv("/kaggle/input/playground-series-s5e4/train.csv")
test_data = pd.read_csv("/kaggle/input/playground-series-s5e4/test.csv")
submission = pd.read_csv("/kaggle/input/playground-series-s5e4/sample_submission.csv")

In [4]:
# Configuration 
numerical_variables = ['Episode_Length_minutes', 'Host_Popularity_percentage', 'Guest_Popularity_percentage', 'Number_of_Ads']
categorial_valiable = ['Podcast_Name', 'Episode_Title', 'Genre', 'Publication_Day', 'Publication_Time', 'Episode_Sentiment'] 
target_variable = 'Listening_Time_minutes'

In [5]:
# Utility function 

def noise_removal(df):
    df["Number_of_Ads"] = df['Number_of_Ads'].apply(lambda x: x if x < 4 else 4)
    df["Episode_Length_minutes"] = df['Episode_Length_minutes'].apply(lambda x: x if x < 150 else 150)
    return df

def missing_value_imputation(df):
    df["Episode_Length_minutes"] = df.groupby("Podcast_Name")["Episode_Length_minutes"].transform(lambda x: x.fillna(x.mean()))
    df["Guest_Popularity_percentage"] = df.groupby("Podcast_Name")["Guest_Popularity_percentage"].transform(lambda x: x.fillna(x.mean()))
    df["Number_of_Ads"] = df["Number_of_Ads"].fillna(2)
    return df

def bin_popularity(value):
    if value < 20:
        return 'Low'
    elif value < 60:
        return 'Medium'
    else:
        return 'High'

INTERACT = []
def feature_engineering(train, test):
    global numerical_variables
    # Numerical columns 
    ## Interaction features
    if len(INTERACT) > 0:
        INTERACT.clear()
    for i,c1 in enumerate(numerical_variables):
        for j,c2 in enumerate(numerical_variables[i+1:]):
            feature = f"{c1}_{c2}"
            train[feature] = train[c1] * train[c2]
            test[feature] = test[c1] * test[c2]
            INTERACT.append(feature)
    numerical_variables += INTERACT
    print(f"There are {len(INTERACT)} interaction features:")
    print(INTERACT)
    
    train['Host_Guest_Avg_Popularity'] = (train['Host_Popularity_percentage'] + train['Guest_Popularity_percentage']) / 2
    test['Host_Guest_Avg_Popularity'] = (test['Host_Popularity_percentage'] + test['Guest_Popularity_percentage']) / 2
    numerical_variables.append("Host_Guest_Avg_Popularity")
    
    train["Host_Popularity_Level"] = train["Host_Popularity_percentage"].apply(bin_popularity)
    test["Host_Popularity_Level"] = test["Host_Popularity_percentage"].apply(bin_popularity)
    categorial_valiable.append("Host_Popularity_Level")
    train["Guest_Popularity_Level"] = train["Guest_Popularity_percentage"].apply(bin_popularity)
    test["Guest_Popularity_Level"] = test["Guest_Popularity_percentage"].apply(bin_popularity)
    categorial_valiable.append("Guest_Popularity_Level")
    
    train['Has_Ads'] = train['Number_of_Ads'].apply(lambda x: 1 if x > 0 else 0)
    test['Has_Ads'] = test['Number_of_Ads'].apply(lambda x: 1 if x > 0 else 0)
    
    train["Ads_Per_Minutes"] = train["Episode_Length_minutes"] / (train["Number_of_Ads"]+1)
    test["Ads_Per_Minutes"] = test["Episode_Length_minutes"] / (test["Number_of_Ads"]+1)
    numerical_variables.append("Ads_Per_Minutes")

    # Categorical features
    train['Is_Weekend'] = train['Publication_Day'].apply(lambda x: 1 if x == "Saturday" or x == "Sunday" else 0)
    test['Is_Weekend'] = test['Publication_Day'].apply(lambda x: 1 if x == "Saturday" or x == "Sunday" else 0)

    return train, test

In [6]:
def one_hot_encode_features(train, test, categorical_features):
    # Initialize the OneHotEncoder with drop='first'
    encoder = OneHotEncoder(sparse=False)
    
    # Fit and transform the train set
    train_encoded = encoder.fit_transform(train[categorical_features])
    
    # Transform the test set
    test_encoded = encoder.transform(test[categorical_features])
    
    # Convert the encoded arrays to DataFrame
    train_encoded_df = pd.DataFrame(train_encoded, columns=encoder.get_feature_names_out(categorical_features))
    test_encoded_df = pd.DataFrame(test_encoded, columns=encoder.get_feature_names_out(categorical_features))
    
    # Drop the original categorical columns from train and test DataFrames
    train.drop(columns=categorical_features, inplace=True)
    test.drop(columns=categorical_features, inplace=True)
    
    # Concatenate the one-hot encoded features to the original DataFrames
    train = pd.concat([train, train_encoded_df], axis=1)
    test = pd.concat([test, test_encoded_df], axis=1)
    
    return train, test


def standardize_features(train, test, numerical_features):
    # Initialize the StandardScaler
    scaler = StandardScaler()
    
    # Fit and transform the training data
    train_scaled = scaler.fit_transform(train[numerical_features])
    
    # Transform the test data
    test_scaled = scaler.transform(test[numerical_features])
    
    # Convert the scaled arrays back to DataFrames
    train_scaled_df = pd.DataFrame(train_scaled, columns=numerical_features)
    test_scaled_df = pd.DataFrame(test_scaled, columns=numerical_features)
    
    # Replace the original columns with the standardized columns
    train[numerical_features] = train_scaled_df
    test[numerical_features] = test_scaled_df
    
    return train, test

In [7]:
# Data Processing 
train_data = noise_removal(train_data)
test_data = noise_removal(test_data)
train_data = missing_value_imputation(train_data)
test_data = missing_value_imputation(test_data)
train_data, test_data = feature_engineering(train_data, test_data)

There are 6 interaction features:
['Episode_Length_minutes_Host_Popularity_percentage', 'Episode_Length_minutes_Guest_Popularity_percentage', 'Episode_Length_minutes_Number_of_Ads', 'Host_Popularity_percentage_Guest_Popularity_percentage', 'Host_Popularity_percentage_Number_of_Ads', 'Guest_Popularity_percentage_Number_of_Ads']


In [8]:
print(INTERACT)
print(train_data.shape)
print(test_data.shape)
print(categorial_valiable)
print(numerical_variables)

['Episode_Length_minutes_Host_Popularity_percentage', 'Episode_Length_minutes_Guest_Popularity_percentage', 'Episode_Length_minutes_Number_of_Ads', 'Host_Popularity_percentage_Guest_Popularity_percentage', 'Host_Popularity_percentage_Number_of_Ads', 'Guest_Popularity_percentage_Number_of_Ads']
(750000, 24)
(250000, 23)
['Podcast_Name', 'Episode_Title', 'Genre', 'Publication_Day', 'Publication_Time', 'Episode_Sentiment', 'Host_Popularity_Level', 'Guest_Popularity_Level']
['Episode_Length_minutes', 'Host_Popularity_percentage', 'Guest_Popularity_percentage', 'Number_of_Ads', 'Episode_Length_minutes_Host_Popularity_percentage', 'Episode_Length_minutes_Guest_Popularity_percentage', 'Episode_Length_minutes_Number_of_Ads', 'Host_Popularity_percentage_Guest_Popularity_percentage', 'Host_Popularity_percentage_Number_of_Ads', 'Guest_Popularity_percentage_Number_of_Ads', 'Host_Guest_Avg_Popularity', 'Ads_Per_Minutes']


In [9]:
train_data, test_data = one_hot_encode_features(train_data, test_data, categorial_valiable)

#train_data, test_data = standardize_features(train_data, test_data, numerical_variables)

In [10]:
print(train_data.shape)
print(test_data.shape)

(750000, 194)
(250000, 193)


In [11]:
X = train_data.drop(['Listening_Time_minutes', 'id'], axis=1)
y = train_data['Listening_Time_minutes']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
print(len(y_train), len(y_val))

600000 150000


In [12]:
cv = KFold(n_splits=5, shuffle=True, random_state=42)

In [17]:
def objective_random_forest(trial):
    # Define the hyperparameters to optimize
    et_params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 500),
        "max_depth": trial.suggest_int("max_depth", 5, 15),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "max_features": trial.suggest_categorical("max_features", ["auto", "sqrt", "log2"]),
        "n_jobs": -1,
        "random_state": 42
    }
    clf = ExtraTreesRegressor(**et_params)
    score = cross_val_score(clf, X_train, y_train, cv=cv, scoring='neg_mean_squared_error').mean()
    rmse_score = np.sqrt(-score)
    
    return rmse_score

study_rf = optuna.create_study(direction="minimize")
study_rf.optimize(objective_random_forest, n_trials=30)
best_param_rf = study_rf.best_trial.params
print("Best params for ExtraTreesRegressor:", best_param_rf)

[I 2025-04-04 15:21:52,942] A new study created in memory with name: no-name-e329c089-c5b8-424b-9a26-58434ea80b02
[W 2025-04-04 15:32:46,008] Trial 0 failed with parameters: {'n_estimators': 406, 'max_depth': 12, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-17-ff4f712d834f>", line 13, in objective_random_forest
    score = cross_val_score(clf, X_train, y_train, cv=cv, scoring='neg_mean_squared_error').mean()
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 515, in cross_val_score
    cv_results = cross_validate(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 266, in cross_validate
    results = parallel(
  File "/usr/local/lib/py

KeyboardInterrupt: 

In [26]:
base_estimator = DecisionTreeRegressor(max_depth=5, random_state=42)
bagging_model = BaggingRegressor(
    base_estimator=base_estimator,  # Use our shallow decision tree
    n_estimators=100,             # Number of trees in the ensemble
    random_state=42,               # For reproducibility
    n_jobs=-1,                     # Use all available CPU cores for parallel training
    verbose=1000
)
bagging_model.fit(X_train, y_train)

#score = cross_val_score(bagging_model, X_train, y_train, cv=cv, scoring='neg_mean_squared_error').mean()
#root_mean_square_error = np.sqrt(-score)
#print("DecisionTreeRegressor CV Root mean square error:", root_mean_square_error)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:  4.9min
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  4.9min remaining:  4.9min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  5.0min finished


In [27]:
#bagging_model.fit(X_train, y_train)
y_val_pred = bagging_model.predict(X_val)
print('DecisionTreeRegressor validation Room mean square error:', np.sqrt(mean_squared_error(y_val, y_val_pred)))

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    6.4s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    6.4s remaining:    6.4s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    6.4s finished
DecisionTreeRegressor validation Room mean square error: 13.350951179147103


In [28]:
base_estimator_2 = DecisionTreeRegressor(max_depth=8, random_state=42)
bagging_model_2 = BaggingRegressor(
    base_estimator=base_estimator_2,  # Use our shallow decision tree
    n_estimators=200,             # Number of trees in the ensemble
    random_state=42,               # For reproducibility
    n_jobs=-1,                     # Use all available CPU cores for parallel training
    verbose=10000
)
bagging_model_2.fit(X_train, y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed: 15.9min
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed: 16.0min remaining: 16.0min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed: 16.1min finished


In [29]:
y_val_pred = bagging_model_2.predict(X_val)
print('DecisionTreeRegressor validation Room mean square error:', np.sqrt(mean_squared_error(y_val, y_val_pred)))

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   13.6s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:   13.9s remaining:   13.9s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:   13.9s finished
DecisionTreeRegressor validation Room mean square error: 13.12998959187659


In [31]:
base_estimator_3 = DecisionTreeRegressor(max_depth=8, random_state=42)
bagging_model_3 = BaggingRegressor(
    base_estimator=base_estimator_3,  # Use our shallow decision tree
    n_estimators=300,             # Number of trees in the ensemble
    random_state=42,               # For reproducibility
    n_jobs=-1,                     # Use all available CPU cores for parallel training
    verbose=10000
)
bagging_model_3.fit(X_train, y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed: 23.8min
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed: 23.8min remaining: 23.8min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed: 23.9min finished


In [32]:
y_val_pred = bagging_model_3.predict(X_val)
print('BaggingRegressor validation Room mean square error:', np.sqrt(mean_squared_error(y_val, y_val_pred)))

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   18.7s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:   18.8s remaining:   18.8s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:   19.5s finished
BaggingRegressor validation Room mean square error: 13.129694708714666


In [38]:
model = RandomForestRegressor(
    max_depth = 8,
    n_estimators=300,             # Number of trees in the ensemble
    random_state=42,               # For reproducibility
    n_jobs=-1,                     # Use all available CPU cores for parallel training
    verbose=10
)
model.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.


building tree 1 of 300building tree 2 of 300building tree 3 of 300


building tree 4 of 300
building tree 5 of 300
building tree 6 of 300
building tree 7 of 300
building tree 8 of 300
building tree 9 of 300


[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   48.8s


building tree 10 of 300
building tree 11 of 300
building tree 12 of 300
building tree 13 of 300
building tree 14 of 300


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  1.2min


building tree 15 of 300
building tree 16 of 300
building tree 17 of 300
building tree 18 of 300
building tree 19 of 300
building tree 20 of 300


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  1.8min


building tree 21 of 300
building tree 22 of 300
building tree 23 of 300
building tree 24 of 300
building tree 25 of 300
building tree 26 of 300
building tree 27 of 300


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:  2.2min


building tree 28 of 300
building tree 29 of 300
building tree 30 of 300
building tree 31 of 300
building tree 32 of 300
building tree 33 of 300
building tree 34 of 300
building tree 35 of 300
building tree 36 of 300


[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  3.0min


building tree 37 of 300
building tree 38 of 300
building tree 39 of 300
building tree 40 of 300
building tree 41 of 300
building tree 42 of 300
building tree 43 of 300
building tree 44 of 300
building tree 45 of 300


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  3.7min


building tree 46 of 300
building tree 47 of 300
building tree 48 of 300
building tree 49 of 300
building tree 50 of 300
building tree 51 of 300
building tree 52 of 300
building tree 53 of 300
building tree 54 of 300
building tree 55 of 300
building tree 56 of 300
building tree 57 of 300
building tree 58 of 300


[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:  4.6min


building tree 59 of 300
building tree 60 of 300
building tree 61 of 300
building tree 62 of 300
building tree 63 of 300
building tree 64 of 300
building tree 65 of 300
building tree 66 of 300
building tree 67 of 300


[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:  5.3min


building tree 68 of 300
building tree 69 of 300
building tree 70 of 300
building tree 71 of 300
building tree 72 of 300
building tree 73 of 300
building tree 74 of 300
building tree 75 of 300
building tree 76 of 300
building tree 77 of 300
building tree 78 of 300
building tree 79 of 300
building tree 80 of 300


[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:  6.5min


building tree 81 of 300
building tree 82 of 300
building tree 83 of 300
building tree 84 of 300
building tree 85 of 300
building tree 86 of 300
building tree 87 of 300
building tree 88 of 300
building tree 89 of 300
building tree 90 of 300
building tree 91 of 300
building tree 92 of 300
building tree 93 of 300


[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:  7.5min


building tree 94 of 300
building tree 95 of 300
building tree 96 of 300
building tree 97 of 300
building tree 98 of 300
building tree 99 of 300
building tree 100 of 300
building tree 101 of 300
building tree 102 of 300
building tree 103 of 300
building tree 104 of 300
building tree 105 of 300
building tree 106 of 300
building tree 107 of 300
building tree 108 of 300
building tree 109 of 300


[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:  8.7min


building tree 110 of 300
building tree 111 of 300
building tree 112 of 300
building tree 113 of 300
building tree 114 of 300
building tree 115 of 300
building tree 116 of 300
building tree 117 of 300
building tree 118 of 300
building tree 119 of 300
building tree 120 of 300
building tree 121 of 300
building tree 122 of 300
building tree 123 of 300
building tree 124 of 300


[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:  9.8min


building tree 125 of 300
building tree 126 of 300
building tree 127 of 300
building tree 128 of 300
building tree 129 of 300
building tree 130 of 300
building tree 131 of 300
building tree 132 of 300
building tree 133 of 300
building tree 134 of 300
building tree 135 of 300
building tree 136 of 300
building tree 137 of 300
building tree 138 of 300
building tree 139 of 300
building tree 140 of 300


[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed: 11.2min


building tree 141 of 300
building tree 142 of 300
building tree 143 of 300
building tree 144 of 300
building tree 145 of 300
building tree 146 of 300
building tree 147 of 300
building tree 148 of 300
building tree 149 of 300
building tree 150 of 300
building tree 151 of 300
building tree 152 of 300
building tree 153 of 300
building tree 154 of 300
building tree 155 of 300
building tree 156 of 300
building tree 157 of 300


[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed: 12.5min


building tree 158 of 300
building tree 159 of 300
building tree 160 of 300
building tree 161 of 300
building tree 162 of 300
building tree 163 of 300
building tree 164 of 300
building tree 165 of 300
building tree 166 of 300
building tree 167 of 300
building tree 168 of 300
building tree 169 of 300
building tree 170 of 300
building tree 171 of 300
building tree 172 of 300
building tree 173 of 300
building tree 174 of 300
building tree 175 of 300
building tree 176 of 300


[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed: 14.1min


building tree 177 of 300
building tree 178 of 300
building tree 179 of 300
building tree 180 of 300
building tree 181 of 300
building tree 182 of 300
building tree 183 of 300
building tree 184 of 300
building tree 185 of 300
building tree 186 of 300
building tree 187 of 300
building tree 188 of 300
building tree 189 of 300
building tree 190 of 300
building tree 191 of 300
building tree 192 of 300
building tree 193 of 300
building tree 194 of 300
building tree 195 of 300


[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed: 15.5min


building tree 196 of 300
building tree 197 of 300
building tree 198 of 300
building tree 199 of 300
building tree 200 of 300
building tree 201 of 300
building tree 202 of 300
building tree 203 of 300
building tree 204 of 300
building tree 205 of 300
building tree 206 of 300
building tree 207 of 300
building tree 208 of 300
building tree 209 of 300
building tree 210 of 300
building tree 211 of 300
building tree 212 of 300
building tree 213 of 300
building tree 214 of 300
building tree 215 of 300
building tree 216 of 300


[Parallel(n_jobs=-1)]: Done 213 tasks      | elapsed: 17.3min


building tree 217 of 300
building tree 218 of 300
building tree 219 of 300
building tree 220 of 300
building tree 221 of 300
building tree 222 of 300
building tree 223 of 300
building tree 224 of 300
building tree 225 of 300
building tree 226 of 300
building tree 227 of 300
building tree 228 of 300
building tree 229 of 300
building tree 230 of 300
building tree 231 of 300
building tree 232 of 300
building tree 233 of 300
building tree 234 of 300
building tree 235 of 300
building tree 236 of 300
building tree 237 of 300
building tree 238 of 300


[Parallel(n_jobs=-1)]: Done 234 tasks      | elapsed: 18.8min


building tree 239 of 300
building tree 240 of 300
building tree 241 of 300
building tree 242 of 300
building tree 243 of 300
building tree 244 of 300
building tree 245 of 300
building tree 246 of 300
building tree 247 of 300
building tree 248 of 300
building tree 249 of 300
building tree 250 of 300
building tree 251 of 300
building tree 252 of 300
building tree 253 of 300
building tree 254 of 300
building tree 255 of 300
building tree 256 of 300
building tree 257 of 300
building tree 258 of 300
building tree 259 of 300
building tree 260 of 300


[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed: 20.7min


building tree 261 of 300
building tree 262 of 300
building tree 263 of 300
building tree 264 of 300
building tree 265 of 300
building tree 266 of 300
building tree 267 of 300
building tree 268 of 300
building tree 269 of 300
building tree 270 of 300
building tree 271 of 300
building tree 272 of 300
building tree 273 of 300
building tree 274 of 300
building tree 275 of 300
building tree 276 of 300
building tree 277 of 300
building tree 278 of 300
building tree 279 of 300
building tree 280 of 300
building tree 281 of 300
building tree 282 of 300
building tree 283 of 300
building tree 284 of 300


[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed: 22.4min


building tree 285 of 300
building tree 286 of 300
building tree 287 of 300
building tree 288 of 300
building tree 289 of 300
building tree 290 of 300
building tree 291 of 300
building tree 292 of 300
building tree 293 of 300
building tree 294 of 300
building tree 295 of 300
building tree 296 of 300
building tree 297 of 300
building tree 298 of 300
building tree 299 of 300
building tree 300 of 300


[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 23.9min finished


In [39]:
y_val_pred = model.predict(X_val)
print('BaggingRegressor validation Room mean square error:', np.sqrt(mean_squared_error(y_val, y_val_pred)))

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:    0.9s
[Para

BaggingRegressor validation Room mean square error: 13.129658403711158


[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    1.5s
[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:    1.7s finished


In [40]:
model = RandomForestRegressor(
    max_depth = 9,
    min_samples_split = 8,
    min_samples_leaf = 9,
    n_estimators=300,             # Number of trees in the ensemble
    random_state=42,               # For reproducibility
    n_jobs=-1,                     # Use all available CPU cores for parallel training
    verbose=1
)
model.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed: 16.8min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 26.2min finished


In [41]:
y_val_pred = model.predict(X_val)
print('RandomForestRegressor validation Room mean square error:', np.sqrt(mean_squared_error(y_val, y_val_pred)))

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    1.2s


RandomForestRegressor validation Room mean square error: 13.105816636978151


[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:    1.9s finished


In [43]:
model = RandomForestRegressor(
    max_depth = 9,
    min_samples_split = 8,
    min_samples_leaf = 9,
    n_estimators=400,             # Number of trees in the ensemble
    random_state=42,               # For reproducibility
    n_jobs=-1,                     # Use all available CPU cores for parallel training
    verbose=2
)
model.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.


building tree 1 of 400building tree 2 of 400
building tree 3 of 400building tree 4 of 400


building tree 5 of 400
building tree 6 of 400
building tree 7 of 400
building tree 8 of 400
building tree 9 of 400
building tree 10 of 400
building tree 11 of 400
building tree 12 of 400
building tree 13 of 400
building tree 14 of 400
building tree 15 of 400
building tree 16 of 400
building tree 17 of 400
building tree 18 of 400
building tree 19 of 400
building tree 20 of 400
building tree 21 of 400
building tree 22 of 400
building tree 23 of 400
building tree 24 of 400
building tree 25 of 400
building tree 26 of 400
building tree 27 of 400
building tree 28 of 400
building tree 29 of 400
building tree 30 of 400
building tree 31 of 400
building tree 32 of 400
building tree 33 of 400
building tree 34 of 400
building tree 35 of 400
building tree 36 of 400


[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  3.1min


building tree 37 of 400
building tree 38 of 400
building tree 39 of 400
building tree 40 of 400
building tree 41 of 400
building tree 42 of 400
building tree 43 of 400
building tree 44 of 400
building tree 45 of 400
building tree 46 of 400
building tree 47 of 400
building tree 48 of 400
building tree 49 of 400
building tree 50 of 400
building tree 51 of 400
building tree 52 of 400
building tree 53 of 400
building tree 54 of 400
building tree 55 of 400
building tree 56 of 400
building tree 57 of 400
building tree 58 of 400
building tree 59 of 400
building tree 60 of 400
building tree 61 of 400
building tree 62 of 400
building tree 63 of 400
building tree 64 of 400
building tree 65 of 400
building tree 66 of 400
building tree 67 of 400
building tree 68 of 400
building tree 69 of 400
building tree 70 of 400
building tree 71 of 400
building tree 72 of 400
building tree 73 of 400
building tree 74 of 400
building tree 75 of 400
building tree 76 of 400
building tree 77 of 400
building tree 78

[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed: 13.7min


building tree 158 of 400
building tree 159 of 400
building tree 160 of 400
building tree 161 of 400
building tree 162 of 400
building tree 163 of 400
building tree 164 of 400
building tree 165 of 400
building tree 166 of 400
building tree 167 of 400
building tree 168 of 400
building tree 169 of 400
building tree 170 of 400
building tree 171 of 400
building tree 172 of 400
building tree 173 of 400
building tree 174 of 400
building tree 175 of 400
building tree 176 of 400
building tree 177 of 400
building tree 178 of 400
building tree 179 of 400
building tree 180 of 400
building tree 181 of 400
building tree 182 of 400
building tree 183 of 400
building tree 184 of 400
building tree 185 of 400
building tree 186 of 400
building tree 187 of 400
building tree 188 of 400
building tree 189 of 400
building tree 190 of 400
building tree 191 of 400
building tree 192 of 400
building tree 193 of 400
building tree 194 of 400
building tree 195 of 400
building tree 196 of 400
building tree 197 of 400


[Parallel(n_jobs=-1)]: Done 357 tasks      | elapsed: 31.5min


building tree 361 of 400
building tree 362 of 400
building tree 363 of 400
building tree 364 of 400
building tree 365 of 400
building tree 366 of 400
building tree 367 of 400
building tree 368 of 400
building tree 369 of 400
building tree 370 of 400
building tree 371 of 400
building tree 372 of 400
building tree 373 of 400
building tree 374 of 400
building tree 375 of 400
building tree 376 of 400
building tree 377 of 400
building tree 378 of 400
building tree 379 of 400
building tree 380 of 400
building tree 381 of 400
building tree 382 of 400
building tree 383 of 400
building tree 384 of 400
building tree 385 of 400
building tree 386 of 400
building tree 387 of 400
building tree 388 of 400
building tree 389 of 400
building tree 390 of 400
building tree 391 of 400
building tree 392 of 400
building tree 393 of 400
building tree 394 of 400
building tree 395 of 400
building tree 396 of 400
building tree 397 of 400
building tree 398 of 400
building tree 399 of 400
building tree 400 of 400


[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 35.2min finished


In [44]:
y_val_pred = model.predict(X_val)
print('RandomForestRegressor validation Room mean square error:', np.sqrt(mean_squared_error(y_val, y_val_pred)))

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done 357 tasks      | elapsed:    2.1s


RandomForestRegressor validation Room mean square error: 13.10612887822906


[Parallel(n_jobs=4)]: Done 400 out of 400 | elapsed:    2.3s finished


In [13]:
model = RandomForestRegressor(
    max_depth = 8,
    min_samples_split = 4,
    min_samples_leaf = 4,
    n_estimators=400,             # Number of trees in the ensemble
    random_state=42,               # For reproducibility
    n_jobs=-1,                     # Use all available CPU cores for parallel training
    verbose=10
)
model.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.


building tree 1 of 400building tree 2 of 400

building tree 3 of 400
building tree 4 of 400
building tree 5 of 400
building tree 6 of 400
building tree 7 of 400
building tree 8 of 400


[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   45.8s


building tree 9 of 400
building tree 10 of 400
building tree 11 of 400
building tree 12 of 400
building tree 13 of 400


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  1.1min


building tree 14 of 400
building tree 15 of 400
building tree 16 of 400
building tree 17 of 400
building tree 18 of 400
building tree 19 of 400
building tree 20 of 400


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  1.7min


building tree 21 of 400
building tree 22 of 400
building tree 23 of 400
building tree 24 of 400
building tree 25 of 400
building tree 26 of 400
building tree 27 of 400


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:  2.0min


building tree 28 of 400
building tree 29 of 400
building tree 30 of 400
building tree 31 of 400
building tree 32 of 400
building tree 33 of 400
building tree 34 of 400
building tree 35 of 400
building tree 36 of 400


[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  2.9min


building tree 37 of 400
building tree 38 of 400
building tree 39 of 400
building tree 40 of 400
building tree 41 of 400
building tree 42 of 400
building tree 43 of 400
building tree 44 of 400
building tree 45 of 400
building tree 46 of 400


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  3.5min


building tree 47 of 400
building tree 48 of 400
building tree 49 of 400
building tree 50 of 400
building tree 51 of 400
building tree 52 of 400
building tree 53 of 400
building tree 54 of 400
building tree 55 of 400
building tree 56 of 400


[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:  4.4min


building tree 57 of 400
building tree 58 of 400
building tree 59 of 400
building tree 60 of 400
building tree 61 of 400
building tree 62 of 400
building tree 63 of 400
building tree 64 of 400
building tree 65 of 400
building tree 66 of 400
building tree 67 of 400


[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:  5.0min


building tree 68 of 400
building tree 69 of 400
building tree 70 of 400
building tree 71 of 400
building tree 72 of 400
building tree 73 of 400
building tree 74 of 400
building tree 75 of 400
building tree 76 of 400
building tree 77 of 400
building tree 78 of 400
building tree 79 of 400
building tree 80 of 400


[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:  6.2min


building tree 81 of 400
building tree 82 of 400
building tree 83 of 400
building tree 84 of 400
building tree 85 of 400
building tree 86 of 400
building tree 87 of 400
building tree 88 of 400
building tree 89 of 400
building tree 90 of 400
building tree 91 of 400
building tree 92 of 400
building tree 93 of 400
building tree 94 of 400


[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:  7.1min


building tree 95 of 400
building tree 96 of 400
building tree 97 of 400
building tree 98 of 400
building tree 99 of 400
building tree 100 of 400
building tree 101 of 400
building tree 102 of 400
building tree 103 of 400
building tree 104 of 400
building tree 105 of 400
building tree 106 of 400
building tree 107 of 400
building tree 108 of 400
building tree 109 of 400
building tree 110 of 400


[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:  8.3min


building tree 111 of 400
building tree 112 of 400
building tree 113 of 400
building tree 114 of 400
building tree 115 of 400
building tree 116 of 400
building tree 117 of 400
building tree 118 of 400
building tree 119 of 400
building tree 120 of 400
building tree 121 of 400
building tree 122 of 400
building tree 123 of 400
building tree 124 of 400


[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:  9.3min


building tree 125 of 400
building tree 126 of 400
building tree 127 of 400
building tree 128 of 400
building tree 129 of 400
building tree 130 of 400
building tree 131 of 400
building tree 132 of 400
building tree 133 of 400
building tree 134 of 400
building tree 135 of 400
building tree 136 of 400
building tree 137 of 400
building tree 138 of 400
building tree 139 of 400
building tree 140 of 400


[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed: 10.7min


building tree 141 of 400
building tree 142 of 400
building tree 143 of 400
building tree 144 of 400
building tree 145 of 400
building tree 146 of 400
building tree 147 of 400
building tree 148 of 400
building tree 149 of 400
building tree 150 of 400
building tree 151 of 400
building tree 152 of 400
building tree 153 of 400
building tree 154 of 400
building tree 155 of 400
building tree 156 of 400
building tree 157 of 400


[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed: 11.9min


building tree 158 of 400
building tree 159 of 400
building tree 160 of 400
building tree 161 of 400
building tree 162 of 400
building tree 163 of 400
building tree 164 of 400
building tree 165 of 400
building tree 166 of 400
building tree 167 of 400
building tree 168 of 400
building tree 169 of 400
building tree 170 of 400
building tree 171 of 400
building tree 172 of 400
building tree 173 of 400
building tree 174 of 400
building tree 175 of 400
building tree 176 of 400


[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed: 13.3min


building tree 177 of 400
building tree 178 of 400
building tree 179 of 400
building tree 180 of 400
building tree 181 of 400
building tree 182 of 400
building tree 183 of 400
building tree 184 of 400
building tree 185 of 400
building tree 186 of 400
building tree 187 of 400
building tree 188 of 400
building tree 189 of 400
building tree 190 of 400
building tree 191 of 400
building tree 192 of 400
building tree 193 of 400
building tree 194 of 400
building tree 195 of 400


[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed: 14.7min


building tree 196 of 400
building tree 197 of 400
building tree 198 of 400
building tree 199 of 400
building tree 200 of 400
building tree 201 of 400
building tree 202 of 400
building tree 203 of 400
building tree 204 of 400
building tree 205 of 400
building tree 206 of 400
building tree 207 of 400
building tree 208 of 400
building tree 209 of 400
building tree 210 of 400
building tree 211 of 400
building tree 212 of 400
building tree 213 of 400
building tree 214 of 400
building tree 215 of 400
building tree 216 of 400


[Parallel(n_jobs=-1)]: Done 213 tasks      | elapsed: 16.3min


building tree 217 of 400
building tree 218 of 400
building tree 219 of 400
building tree 220 of 400
building tree 221 of 400
building tree 222 of 400
building tree 223 of 400
building tree 224 of 400
building tree 225 of 400
building tree 226 of 400
building tree 227 of 400
building tree 228 of 400
building tree 229 of 400
building tree 230 of 400
building tree 231 of 400
building tree 232 of 400
building tree 233 of 400
building tree 234 of 400
building tree 235 of 400
building tree 236 of 400
building tree 237 of 400


[Parallel(n_jobs=-1)]: Done 234 tasks      | elapsed: 17.9min


building tree 238 of 400
building tree 239 of 400
building tree 240 of 400
building tree 241 of 400
building tree 242 of 400
building tree 243 of 400
building tree 244 of 400
building tree 245 of 400
building tree 246 of 400
building tree 247 of 400
building tree 248 of 400
building tree 249 of 400
building tree 250 of 400
building tree 251 of 400
building tree 252 of 400
building tree 253 of 400
building tree 254 of 400
building tree 255 of 400
building tree 256 of 400
building tree 257 of 400
building tree 258 of 400
building tree 259 of 400
building tree 260 of 400


[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed: 19.6min


building tree 261 of 400
building tree 262 of 400
building tree 263 of 400
building tree 264 of 400
building tree 265 of 400
building tree 266 of 400
building tree 267 of 400
building tree 268 of 400
building tree 269 of 400
building tree 270 of 400
building tree 271 of 400
building tree 272 of 400
building tree 273 of 400
building tree 274 of 400
building tree 275 of 400
building tree 276 of 400
building tree 277 of 400
building tree 278 of 400
building tree 279 of 400
building tree 280 of 400
building tree 281 of 400
building tree 282 of 400
building tree 283 of 400
building tree 284 of 400


[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed: 21.2min


building tree 285 of 400
building tree 286 of 400
building tree 287 of 400
building tree 288 of 400
building tree 289 of 400
building tree 290 of 400
building tree 291 of 400
building tree 292 of 400
building tree 293 of 400
building tree 294 of 400
building tree 295 of 400
building tree 296 of 400
building tree 297 of 400
building tree 298 of 400
building tree 299 of 400
building tree 300 of 400
building tree 301 of 400
building tree 302 of 400
building tree 303 of 400
building tree 304 of 400
building tree 305 of 400
building tree 306 of 400
building tree 307 of 400
building tree 308 of 400


[Parallel(n_jobs=-1)]: Done 305 tasks      | elapsed: 23.2min


building tree 309 of 400
building tree 310 of 400
building tree 311 of 400
building tree 312 of 400
building tree 313 of 400
building tree 314 of 400
building tree 315 of 400
building tree 316 of 400
building tree 317 of 400
building tree 318 of 400
building tree 319 of 400
building tree 320 of 400
building tree 321 of 400
building tree 322 of 400
building tree 323 of 400
building tree 324 of 400
building tree 325 of 400
building tree 326 of 400
building tree 327 of 400
building tree 328 of 400
building tree 329 of 400
building tree 330 of 400
building tree 331 of 400
building tree 332 of 400
building tree 333 of 400


[Parallel(n_jobs=-1)]: Done 330 tasks      | elapsed: 25.0min


building tree 334 of 400
building tree 335 of 400
building tree 336 of 400
building tree 337 of 400
building tree 338 of 400
building tree 339 of 400
building tree 340 of 400
building tree 341 of 400
building tree 342 of 400
building tree 343 of 400
building tree 344 of 400
building tree 345 of 400
building tree 346 of 400
building tree 347 of 400
building tree 348 of 400
building tree 349 of 400
building tree 350 of 400
building tree 351 of 400
building tree 352 of 400
building tree 353 of 400
building tree 354 of 400
building tree 355 of 400
building tree 356 of 400
building tree 357 of 400
building tree 358 of 400
building tree 359 of 400
building tree 360 of 400


[Parallel(n_jobs=-1)]: Done 357 tasks      | elapsed: 27.0min


building tree 361 of 400
building tree 362 of 400
building tree 363 of 400
building tree 364 of 400
building tree 365 of 400
building tree 366 of 400
building tree 367 of 400
building tree 368 of 400
building tree 369 of 400
building tree 370 of 400
building tree 371 of 400
building tree 372 of 400
building tree 373 of 400
building tree 374 of 400
building tree 375 of 400
building tree 376 of 400
building tree 377 of 400
building tree 378 of 400
building tree 379 of 400
building tree 380 of 400
building tree 381 of 400
building tree 382 of 400
building tree 383 of 400
building tree 384 of 400
building tree 385 of 400
building tree 386 of 400
building tree 387 of 400


[Parallel(n_jobs=-1)]: Done 384 tasks      | elapsed: 28.9min


building tree 388 of 400
building tree 389 of 400
building tree 390 of 400
building tree 391 of 400
building tree 392 of 400
building tree 393 of 400
building tree 394 of 400
building tree 395 of 400
building tree 396 of 400
building tree 397 of 400
building tree 398 of 400
building tree 399 of 400
building tree 400 of 400


[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed: 30.0min finished


In [14]:
y_val_pred = model.predict(X_val)
print('RandomForestRegressor validation Room mean square error:', np.sqrt(mean_squared_error(y_val, y_val_pred)))

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:    0.5s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.6s
[Parallel(n_jobs=4)]: Done 137 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done 173 tasks      | elapsed:    0.9s
[Para

RandomForestRegressor validation Room mean square error: 13.129937770807233


[Parallel(n_jobs=4)]: Done 400 out of 400 | elapsed:    1.9s finished


In [None]:
a