*For the final submission, the hyperparameter tuning of the best performing models (which were determined in initial training) is done with GridSearchCV, which fits all possible candidates in a search space. Thus giving the most optimal parameters. The model are then trained with these parameters on the entire dataset for each of the camera id* \
For training the models, the same apporach explained in `train.ipynb` is used here.

### Importing Dependencies

In [1]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Tuple

from sklearn.preprocessing import StandardScaler
from joblib import dump, load
from sklearn.model_selection import train_test_split

from cuml.linear_model import LinearRegression, Lasso, ElasticNet
from cuml.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score
from joblib import parallel_backend
from ray.util.joblib import register_ray
register_ray()
import ray
from cuml.common.device_selection import set_global_device_type

np.random.seed(42)

### Connect to the ray cluster

In [2]:
set_global_device_type("GPU")
ray.init("auto")

2024-08-25 10:55:37,428	INFO worker.py:1596 -- Connecting to existing Ray cluster at address: 10.19.3.211:6379...
2024-08-25 10:55:37,444	INFO worker.py:1772 -- Connected to Ray cluster. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.10.12
Ray version:,2.34.0
Dashboard:,http://127.0.0.1:8265


In [3]:
dataset_dir = Path(r"/home/user1/codes/bangaluru_mobility_codes/submission/future_counts/data")
save_dir = Path(r"/home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id")

In [4]:
acc_models = {"RandomForest","LinearRegression","Lasso","ElasticNet","XGBRegressor","CatBoostingRegressor"}
target_column = "count"

In [5]:
# Specify parameters and distributions to sample from for hyper-parameter tuning

params={
        "RandomForest":{
            'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
            'max_features':['sqrt','log2'],
            'n_estimators': [8,16,32,64,128,256,512,1024],
            'bootstrap': [False, True]
        },
        "GradientBoosting":{
            'loss':['squared_error', 'huber', 'absolute_error', 'quantile'],
            'learning_rate':[.1,.01,.05,.001],
            'subsample':[0.6,0.7,0.75,0.8,0.85,0.9],
            'criterion':['squared_error', 'friedman_mse'],
            'max_features':['sqrt','log2'],
            'n_estimators': [8,16,32,64,128,256]
        },
        "LinearRegression":{
            'algorithm': ['svd', 'eig', 'qr', 'svd-qr', 'svd-jacobi']
        },
        "XGBRegressor":{
            'learning_rate': np.linspace(0.001,0.2,8),
            'n_estimators': np.arange(8,512,50),
            'gamma': np.linspace(0,5,10),
            'reg_alpha': np.logspace(-3,1,10),
            'reg_lambda': np.logspace(-3,1,10)
        },
        "CatBoostingRegressor":{
            'depth': [4,6,8,10],
            'l2_leaf_reg': np.logspace(-2,1,10),
            'learning_rate': np.linspace(0.001,0.2,8),
            'iterations': np.arange(10,500,50)
        },
        "AdaBoostRegressor":{
            'learning_rate': np.linspace(0.001,0.2,8),
            'loss':['linear','square','exponential'],
            'n_estimators': np.arange(8,512,50),
        },
        "Lasso":{
            'alpha':  (np.logspace(-8, 8, 100)),
        },
        "ElasticNet": {
            'alpha': np.logspace(-4,2,30),
            'l1_ratio': np.linspace(0,1,30),
            'fit_intercept': [True, False]
        }
 }

In [6]:
def load_dateset(path: str) -> pd.DataFrame:
    """
    Load and preprocess a dataset from a CSV file.

    Removes any rows containing NaN values, and converts certain columns to more memory-efficient data types.
    The columns are then reordered for easier access.

    Parameters:
    path (str): The file path to the CSV file containing the dataset.

    Returns:
    pd.DataFrame
    """
    df = pd.read_csv(path)
    df = df.dropna()
    df["class"] = df["class"].astype(np.int8)
    df["zone_in"] = df["zone_in"].astype(np.int8)
    df["zone_out"] = df["zone_out"].astype(np.int8)
    df["count"] = df["count"].astype(np.int16)
    df["last_15_min_count"] = df["last_15_min_count"].astype(np.int16)
    df["last_30_min_count"] = df["last_30_min_count"].astype(np.int16)
    df = df.iloc[:,[0,1,2,4,5,3]]

    return df


In [7]:
def load_models(model_name: str):
    """
    Initializes and returns a machine learning models. 
    """
    models = {
        "ElasticNet": ElasticNet(copy_X=False),
        "XGBRegressor":XGBRegressor(),
        "LinearRegression":LinearRegression(copy_X=False),
        "RandomForest":RandomForestRegressor(),
        "GradientBoosting":GradientBoostingRegressor(),
        "CatBoostingRegressor":CatBoostRegressor(verbose=False),
        "AdaBoostRegressor":AdaBoostRegressor(),
        "Lasso": Lasso(copy_X=False),
    }

    return models[model_name]

In [8]:
def split_dataset(df: pd.DataFrame, cam_id: str) -> Tuple[np.ndarray, np.ndarray]:
    """
    Splits the dataset into matrix of features and dependent variable, scales the input features, and saves the scaler into a pickled file. 

    Parameters:
    -----------
    df (pd.DataFrame): The DataFrame containing the dataset to be split and scaled.
    
    cam_id (str): A unique identifier for the camera or dataset being processed. This ID is used to save the scaler with a specific name.

    Returns:
    --------
    Tuple[np.ndarray, np.ndarray]
    """
    scaler = StandardScaler()
    
    input_feature_train_df = df.drop([target_column], axis=1)
    target_feature_train_df = pd.DataFrame(df[target_column])

    input_feature_train_arr = scaler.fit_transform(input_feature_train_df)

    train_arr = np.c_[
        input_feature_train_arr, np.array(target_feature_train_df)
    ]

    dump(scaler, save_dir / "scalers" / f"{cam_id}.pkl")
    print(f"SC saved to {save_dir / 'scalers' / f'{cam_id}.pkl'}")

    X_train, y_train = train_arr[:, :-1], train_arr[:, -1]

    return X_train, y_train


In [9]:
model_for_location = {
    "18th_Crs_BsStp_JN_FIX_2": "GradientBoosting",
    "Ayyappa_Temple_FIX_1": "CatBoostingRegressor",
    "Stn_HD_1": "CatBoostingRegressor",
    "SBI_Bnk_JN_FIX_3": "CatBoostingRegressor",
    "Ramaiah_BsStp_JN_FIX_2": "CatBoostingRegressor",
    "Mattikere_JN_FIX_1": "GradientBoosting",
    "Buddha_Vihara_Temple": "XGBRegressor",
    "HP_Ptrl_Bnk_BEL_Rd_FIX_2": "CatBoostingRegressor",
    "Sundaranagar_Entrance": "GradientBoosting",
    "80ft_Road": "GradientBoosting",
    "Devasandra_Sgnl_JN_FIX_1": "CatBoostingRegressor",
    "Devasandra_Sgnl_JN_FIX_3": "CatBoostingRegressor",
    "Sty_Wll_Ldge_FIX_3": "GradientBoosting",
    "Ramaiah_BsStp_JN_FIX_1": "CatBoostingRegressor",
    "MS_Ramaiah_JN_FIX_1": "RandomForest",
    "MS_Ramaiah_JN_FIX_2": "CatBoostingRegressor",
    "18th_Crs_Bus_Stop_FIX_2": "CatBoostingRegressor",
    "SBI_Bnk_JN_FIX_1": "CatBoostingRegressor",
    "Mattikere_JN_FIX_3": "CatBoostingRegressor",
    "Mattikere_JN_HD_1": "Lasso",
    "Kuvempu_Circle_FIX_2": "RandomForest",
    "Kuvempu_Circle_FIX_1": "GradientBoosting",
    "Mattikere_JN_FIX_2": "GradientBoosting"
}

### Train models

In [10]:
models_report = dict() # cam_id -> (model_score, model_name, parameters)

# Iterate over each dataset in the dataset directory
for file in os.listdir(dataset_dir):
    if not file.endswith(".csv"): continue

    cam_id = file.removesuffix(".csv")
    print(f"Training for {cam_id} ...")
    models_report[cam_id] = dict()
    
    # Load the dataset
    df = load_dateset(dataset_dir / file)

    # Scaler features and target variable
    X_train, y_train = split_dataset(df, cam_id)

    model_name = model_for_location[cam_id]
    
    # Initialize previous best performing model
    model = load_models(model_name)
    
    os.makedirs(save_dir / "models" / cam_id, exist_ok=True)
    print(f"Now evaluating: {model_name}\n\n")

    para = params[model_name]
    
    # Hyperparameter tuning for GPU supported models
    if model_name in acc_models:
        gs = GridSearchCV(model, para, cv=5, verbose=4, n_jobs=10)
        with parallel_backend('ray', ray_remote_args=dict(num_gpus=0.01)):
            gs.fit(X_train, y_train)
    else:
        # Hyperparameter tuning for non-GPU supported models
        gs = GridSearchCV(model, para, cv=5, verbose=4, n_jobs=-1)
        with parallel_backend('ray'):
            gs.fit(X_train, y_train)

        
    model.set_params(**gs.best_params_)
    # Fit the model with best parameters
    model.fit(X_train, y_train)

    model_score = gs.best_score_
    parameters = gs.best_params_
    
    # Save a pickled copy of the model
    dump(model, save_dir / "models" / cam_id / f"{model_name}.pkl")
    print(f"{model_name} saved to {save_dir / 'models' / cam_id}") 


    models_report[cam_id] = (model_score, model_name, parameters)
    
    print("Best model score and parameters:\n")
    print(models_report[cam_id])
    print("\n\n\n")       



Training for 18th_Crs_BsStp_JN_FIX_2 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/18th_Crs_BsStp_JN_FIX_2.pkl
[I] [10:56:03.213188] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [10:56:03.281393] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: GradientBoosting


Fitting 5 folds for each of 2304 candidates, totalling 11520 fits
GradientBoosting saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/18th_Crs_BsStp_JN_FIX_2
Best model score and parameters:

(0.9375828231461629, 'GradientBoosting', {'criterion': 'friedman_mse', 'learning_rate': 0.1, 'loss': 'huber', 'max_features': 'sqrt', 'n_estimators': 64, 'subsample': 0.8})




Training for Ayyappa_Temple_FIX_1 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/mode

[36m(PoolActor pid=2752347)[0m [CV 2/5] END criterion=squared_error, learning_rate=0.1, loss=squared_error, max_features=sqrt, n_estimators=8, subsample=0.6;, score=0.756 total time=   0.0s
[36m(PoolActor pid=2752428)[0m [CV 1/5] END criterion=squared_error, learning_rate=0.1, loss=squared_error, max_features=log2, n_estimators=8, subsample=0.7;, score=0.766 total time=   0.0s
[36m(PoolActor pid=2752389)[0m [CV 3/5] END criterion=squared_error, learning_rate=0.1, loss=huber, max_features=sqrt, n_estimators=8, subsample=0.8;, score=0.613 total time=   0.0s
[36m(PoolActor pid=2753067)[0m [CV 5/5] END criterion=squared_error, learning_rate=0.1, loss=huber, max_features=log2, n_estimators=8, subsample=0.6;, score=0.547 total time=   0.0s
[36m(PoolActor pid=2753067)[0m [CV 1/5] END criterion=squared_error, learning_rate=0.1, loss=huber, max_features=log2, n_estimators=8, subsample=0.7;, score=0.627 total time=   0.0s
[36m(PoolActor pid=2752382)[0m [CV 1/5] END criterion=squared_

  _data = np.array(data, dtype=dtype, copy=copy,


CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Ayyappa_Temple_FIX_1
Best model score and parameters:

(0.9539773126983102, 'CatBoostingRegressor', {'depth': 4, 'iterations': 60, 'l2_leaf_reg': 0.01, 'learning_rate': 0.11471428571428573})




Training for Stn_HD_1 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/Stn_HD_1.pkl
[I] [11:11:42.090120] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [11:11:42.090515] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: CatBoostingRegressor


Fitting 5 folds for each of 3200 candidates, totalling 16000 fits
CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Stn_HD_1
Best model score and

  _data = np.array(data, dtype=dtype, copy=copy,


CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/HP_Ptrl_Bnk_BEL_Rd_FIX_2
Best model score and parameters:

(0.940219821195492, 'CatBoostingRegressor', {'depth': 4, 'iterations': 60, 'l2_leaf_reg': 4.641588833612777, 'learning_rate': 0.11471428571428573})




Training for Sundaranagar_Entrance ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/Sundaranagar_Entrance.pkl
[I] [13:10:25.044210] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [13:10:25.044554] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: GradientBoosting


Fitting 5 folds for each of 2304 candidates, totalling 11520 fits


  _data = np.array(data, dtype=dtype, copy=copy,


GradientBoosting saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Sundaranagar_Entrance
Best model score and parameters:

(0.971410295836016, 'GradientBoosting', {'criterion': 'squared_error', 'learning_rate': 0.05, 'loss': 'squared_error', 'max_features': 'sqrt', 'n_estimators': 128, 'subsample': 0.8})




Training for 80ft_Road ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/80ft_Road.pkl
[I] [13:11:19.785214] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [13:11:19.785932] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: GradientBoosting


Fitting 5 folds for each of 2304 candidates, totalling 11520 fits
GradientBoosting saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_i

  _data = np.array(data, dtype=dtype, copy=copy,


CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Devasandra_Sgnl_JN_FIX_1
Best model score and parameters:

(0.9575427696621877, 'CatBoostingRegressor', {'depth': 6, 'iterations': 110, 'l2_leaf_reg': 10.0, 'learning_rate': 0.057857142857142864})




Training for Devasandra_Sgnl_JN_FIX_3 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/Devasandra_Sgnl_JN_FIX_3.pkl
[I] [13:25:45.013684] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [13:25:45.014063] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: CatBoostingRegressor


Fitting 5 folds for each of 3200 candidates, totalling 16000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Devasandra_Sgnl_JN_FIX_3
Best model score and parameters:

(0.9464676834278187, 'CatBoostingRegressor', {'depth': 8, 'iterations': 110, 'l2_leaf_reg': 0.4641588833612777, 'learning_rate': 0.0862857142857143})




Training for Sty_Wll_Ldge_FIX_3 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/Sty_Wll_Ldge_FIX_3.pkl
[I] [13:47:21.149689] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [13:47:21.149960] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: GradientBoosting


Fitting 5 folds for each of 2304 candidates, totalling 11520 fits


  _data = np.array(data, dtype=dtype, copy=copy,


GradientBoosting saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Sty_Wll_Ldge_FIX_3
Best model score and parameters:

(0.9315495412846608, 'GradientBoosting', {'criterion': 'friedman_mse', 'learning_rate': 0.05, 'loss': 'huber', 'max_features': 'log2', 'n_estimators': 128, 'subsample': 0.75})




Training for Ramaiah_BsStp_JN_FIX_1 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/Ramaiah_BsStp_JN_FIX_1.pkl
[I] [13:48:24.598676] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [13:48:24.599051] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: CatBoostingRegressor


Fitting 5 folds for each of 3200 candidates, totalling 16000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Ramaiah_BsStp_JN_FIX_1
Best model score and parameters:

(0.956038625421775, 'CatBoostingRegressor', {'depth': 6, 'iterations': 210, 'l2_leaf_reg': 4.641588833612777, 'learning_rate': 0.029428571428571432})




Training for MS_Ramaiah_JN_FIX_1 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/MS_Ramaiah_JN_FIX_1.pkl
[I] [14:02:38.292729] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [14:02:38.293008] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: RandomForest


Fitting 5 folds for each of 128 candidates, totalling 640 fits


[36m(PoolActor pid=3060204)[0m   ret = func(*args, **kwargs)


[36m(PoolActor pid=3060201)[0m [CV 2/5] END bootstrap=False, criterion=squared_error, max_features=sqrt, n_estimators=8;, score=0.936 total time=   0.4s
[36m(PoolActor pid=2548253)[0m [CV 5/5] END depth=10, iterations=460, l2_leaf_reg=10.0, learning_rate=0.2;, score=0.939 total time=   1.1s[32m [repeated 10x across cluster][0m
[36m(PoolActor pid=3060203)[0m [CV 5/5] END bootstrap=False, criterion=squared_error, max_features=sqrt, n_estimators=128;, score=0.921 total time=   4.0s[32m [repeated 20x across cluster][0m
[36m(PoolActor pid=3060205)[0m [CV 3/5] END bootstrap=False, criterion=squared_error, max_features=sqrt, n_estimators=256;, score=0.949 total time=   8.0s[32m [repeated 6x across cluster][0m
[36m(PoolActor pid=3060198)[0m [CV 1/5] END bootstrap=False, criterion=squared_error, max_features=sqrt, n_estimators=512;, score=0.942 total time=  13.5s[32m [repeated 4x across cluster][0m
[36m(PoolActor pid=3060198)[0m [CV 1/5] END bootstrap=False, criterion=square

  ret = func(*args, **kwargs)


[36m(PoolActor pid=3060202)[0m Exception ignored in: <cyfunction RandomForestRegressor.__del__ at 0x7167ea507850>
[36m(PoolActor pid=3060202)[0m Traceback (most recent call last):
[36m(PoolActor pid=3060202)[0m   File "randomforestregressor.pyx", line 321, in cuml.ensemble.randomforestregressor.RandomForestRegressor.__del__
[36m(PoolActor pid=3060202)[0m   File "randomforestregressor.pyx", line 337, in cuml.ensemble.randomforestregressor.RandomForestRegressor._reset_forest_data
[36m(PoolActor pid=3060202)[0m AttributeError: 'NoneType' object has no attribute 'free_treelite_model'
[36m(PoolActor pid=3060206)[0m   ret = func(*args, **kwargs)[32m [repeated 9x across cluster][0m


RandomForest saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/MS_Ramaiah_JN_FIX_1
Best model score and parameters:

(0.9382061819552459, 'RandomForest', {'bootstrap': True, 'criterion': 'squared_error', 'max_features': 'sqrt', 'n_estimators': 128})




Training for MS_Ramaiah_JN_FIX_2 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/MS_Ramaiah_JN_FIX_2.pkl
[I] [14:10:54.243590] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [14:10:54.243855] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: CatBoostingRegressor


Fitting 5 folds for each of 3200 candidates, totalling 16000 fits


[36m(PoolActor pid=3061896)[0m [CV 1/5] END depth=4, iterations=10, l2_leaf_reg=0.01, learning_rate=0.001;, score=0.016 total time=   0.1s
[36m(PoolActor pid=3060197)[0m [CV 5/5] END bootstrap=True, criterion=poisson, max_features=log2, n_estimators=1024;, score=0.921 total time=   7.3s[32m [repeated 2x across cluster][0m
[36m(PoolActor pid=3061900)[0m [CV 4/5] END depth=4, iterations=10, l2_leaf_reg=10.0, learning_rate=0.14314285714285716;, score=0.851 total time=   0.2s[32m [repeated 386x across cluster][0m
[36m(PoolActor pid=3061900)[0m [CV 4/5] END depth=4, iterations=60, l2_leaf_reg=0.21544346900318834, learning_rate=0.029428571428571432;, score=0.884 total time=   0.3s[32m [repeated 174x across cluster][0m
[36m(PoolActor pid=3061900)[0m [CV 4/5] END depth=4, iterations=60, l2_leaf_reg=4.641588833612777, learning_rate=0.14314285714285716;, score=0.932 total time=   0.3s[32m [repeated 183x across cluster][0m
[36m(PoolActor pid=3061897)[0m [CV 4/5] END depth=4, i

  _data = np.array(data, dtype=dtype, copy=copy,


CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/MS_Ramaiah_JN_FIX_2
Best model score and parameters:

(0.913071675646625, 'CatBoostingRegressor', {'depth': 10, 'iterations': 110, 'l2_leaf_reg': 4.641588833612777, 'learning_rate': 0.0862857142857143})




Training for 18th_Crs_Bus_Stop_FIX_2 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/18th_Crs_Bus_Stop_FIX_2.pkl
[I] [14:53:31.763057] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [14:53:31.763318] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: CatBoostingRegressor


Fitting 5 folds for each of 3200 candidates, totalling 16000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/18th_Crs_Bus_Stop_FIX_2
Best model score and parameters:

(0.9638750502979606, 'CatBoostingRegressor', {'depth': 6, 'iterations': 210, 'l2_leaf_reg': 10.0, 'learning_rate': 0.029428571428571432})




Training for SBI_Bnk_JN_FIX_1 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/SBI_Bnk_JN_FIX_1.pkl
[I] [15:17:43.354854] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [15:17:43.355128] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: CatBoostingRegressor


Fitting 5 folds for each of 3200 candidates, totalling 16000 fits


  _data = np.array(data, dtype=dtype, copy=copy,


CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/SBI_Bnk_JN_FIX_1
Best model score and parameters:

(0.9654133069477758, 'CatBoostingRegressor', {'depth': 4, 'iterations': 210, 'l2_leaf_reg': 10.0, 'learning_rate': 0.11471428571428573})




Training for Mattikere_JN_FIX_3 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/Mattikere_JN_FIX_3.pkl
[I] [15:35:00.189650] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [15:35:00.189978] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: CatBoostingRegressor


Fitting 5 folds for each of 3200 candidates, totalling 16000 fits
CatBoostingRegressor saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Mattikere_JN

[36m(PoolActor pid=3060206)[0m Exception ignored in: <cyfunction RandomForestRegressor.__del__ at 0x7ed2f9103850>[32m [repeated 9x across cluster][0m
[36m(PoolActor pid=3060206)[0m Traceback (most recent call last):[32m [repeated 9x across cluster][0m
[36m(PoolActor pid=3060206)[0m   File "randomforestregressor.pyx", line 321, in cuml.ensemble.randomforestregressor.RandomForestRegressor.__del__[32m [repeated 9x across cluster][0m
[36m(PoolActor pid=3060206)[0m   File "randomforestregressor.pyx", line 337, in cuml.ensemble.randomforestregressor.RandomForestRegressor._reset_forest_data[32m [repeated 9x across cluster][0m
[36m(PoolActor pid=3060206)[0m AttributeError: 'NoneType' object has no attribute 'free_treelite_model'[32m [repeated 9x across cluster][0m
[36m(PoolActor pid=1413421)[0m   ret = func(*args, **kwargs)
[36m(PoolActor pid=1413423)[0m   ret = func(*args, **kwargs)


[36m(PoolActor pid=1413419)[0m [CV 4/5] END bootstrap=False, criterion=squared_error, max_features=sqrt, n_estimators=8;, score=0.920 total time=   0.3s
[36m(PoolActor pid=1412568)[0m [CV 5/5] END ..........alpha=68926121.04349709;, score=-0.000 total time=   0.0s[32m [repeated 496x across cluster][0m
[36m(PoolActor pid=1413423)[0m [CV 3/5] END bootstrap=False, criterion=squared_error, max_features=sqrt, n_estimators=256;, score=0.961 total time=   6.4s[32m [repeated 25x across cluster][0m
[36m(PoolActor pid=1413417)[0m [CV 2/5] END bootstrap=False, criterion=squared_error, max_features=sqrt, n_estimators=512;, score=0.950 total time=  10.5s[32m [repeated 5x across cluster][0m
[36m(PoolActor pid=1413417)[0m [CV 2/5] END bootstrap=False, criterion=squared_error, max_features=log2, n_estimators=8;, score=0.911 total time=   0.2s
[36m(PoolActor pid=1413416)[0m [CV 1/5] END bootstrap=False, criterion=squared_error, max_features=sqrt, n_estimators=512;, score=0.946 total t

  _data = np.array(data, dtype=dtype, copy=copy,
  ret = func(*args, **kwargs)


[36m(PoolActor pid=1413417)[0m Exception ignored in: <cyfunction RandomForestRegressor.__del__ at 0x7c1d472a3850>
[36m(PoolActor pid=1413417)[0m Traceback (most recent call last):
[36m(PoolActor pid=1413417)[0m   File "randomforestregressor.pyx", line 321, in cuml.ensemble.randomforestregressor.RandomForestRegressor.__del__
[36m(PoolActor pid=1413417)[0m   File "randomforestregressor.pyx", line 337, in cuml.ensemble.randomforestregressor.RandomForestRegressor._reset_forest_data
[36m(PoolActor pid=1413417)[0m AttributeError: 'NoneType' object has no attribute 'free_treelite_model'
[36m(PoolActor pid=1413422)[0m   ret = func(*args, **kwargs)[32m [repeated 8x across cluster][0m
[36m(PoolActor pid=1413418)[0m Exception ignored in: <cyfunction RandomForestRegressor.__del__ at 0x79e3a52ff850>
[36m(PoolActor pid=1413418)[0m Traceback (most recent call last):
[36m(PoolActor pid=1413418)[0m   File "randomforestregressor.pyx", line 321, in cuml.ensemble.randomforestregressor.

RandomForest saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Kuvempu_Circle_FIX_2
Best model score and parameters:

(0.954165213748946, 'RandomForest', {'bootstrap': True, 'criterion': 'squared_error', 'max_features': 'sqrt', 'n_estimators': 1024})




Training for Kuvempu_Circle_FIX_1 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/Kuvempu_Circle_FIX_1.pkl
[I] [15:49:36.646390] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [15:49:36.646633] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: GradientBoosting


Fitting 5 folds for each of 2304 candidates, totalling 11520 fits


[36m(PoolActor pid=1415059)[0m [CV 4/5] END criterion=squared_error, learning_rate=0.1, loss=squared_error, max_features=sqrt, n_estimators=8, subsample=0.6;, score=0.779 total time=   0.0s
[36m(PoolActor pid=1413422)[0m [CV 2/5] END bootstrap=True, criterion=poisson, max_features=log2, n_estimators=1024;, score=0.953 total time=   3.5s
[36m(PoolActor pid=1415076)[0m [CV 2/5] END criterion=squared_error, learning_rate=0.1, loss=squared_error, max_features=log2, n_estimators=8, subsample=0.75;, score=0.772 total time=   0.0s
[36m(PoolActor pid=1415083)[0m [CV 1/5] END criterion=squared_error, learning_rate=0.1, loss=huber, max_features=sqrt, n_estimators=8, subsample=0.75;, score=0.725 total time=   0.0s
[36m(PoolActor pid=1415079)[0m [CV 1/5] END criterion=squared_error, learning_rate=0.1, loss=huber, max_features=log2, n_estimators=8, subsample=0.6;, score=0.688 total time=   0.0s
[36m(PoolActor pid=1415079)[0m [CV 2/5] END criterion=squared_error, learning_rate=0.1, loss=

  _data = np.array(data, dtype=dtype, copy=copy,


GradientBoosting saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Kuvempu_Circle_FIX_1
Best model score and parameters:

(0.955481135448275, 'GradientBoosting', {'criterion': 'squared_error', 'learning_rate': 0.1, 'loss': 'absolute_error', 'max_features': 'sqrt', 'n_estimators': 128, 'subsample': 0.75})




Training for Mattikere_JN_FIX_2 ...
SC saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/scalers/Mattikere_JN_FIX_2.pkl
[I] [15:50:43.142823] Unused keyword parameter: copy_X during cuML estimator initialization
[I] [15:50:43.143107] Unused keyword parameter: copy_X during cuML estimator initialization
Now evaluating: GradientBoosting


Fitting 5 folds for each of 2304 candidates, totalling 11520 fits


  _data = np.array(data, dtype=dtype, copy=copy,


GradientBoosting saved to /home/user1/codes/bangaluru_mobility_codes/submission/future_counts/model_training/results_entire_dataset_each_cam_id/models/Mattikere_JN_FIX_2
Best model score and parameters:

(0.9745166889694582, 'GradientBoosting', {'criterion': 'squared_error', 'learning_rate': 0.1, 'loss': 'squared_error', 'max_features': 'sqrt', 'n_estimators': 64, 'subsample': 0.7})






In [11]:
models_report

{'18th_Crs_BsStp_JN_FIX_2': (0.9375828231461629,
  'GradientBoosting',
  {'criterion': 'friedman_mse',
   'learning_rate': 0.1,
   'loss': 'huber',
   'max_features': 'sqrt',
   'n_estimators': 64,
   'subsample': 0.8}),
 'Ayyappa_Temple_FIX_1': (0.9539773126983102,
  'CatBoostingRegressor',
  {'depth': 4,
   'iterations': 60,
   'l2_leaf_reg': 0.01,
   'learning_rate': 0.11471428571428573}),
 'Stn_HD_1': (0.9601979753572177,
  'CatBoostingRegressor',
  {'depth': 10,
   'iterations': 160,
   'l2_leaf_reg': 0.09999999999999999,
   'learning_rate': 0.029428571428571432}),
 'SBI_Bnk_JN_FIX_3': (0.9611470844309009,
  'CatBoostingRegressor',
  {'depth': 10,
   'iterations': 160,
   'l2_leaf_reg': 0.09999999999999999,
   'learning_rate': 0.029428571428571432}),
 'Ramaiah_BsStp_JN_FIX_2': (0.9599139671339081,
  'CatBoostingRegressor',
  {'depth': 4,
   'iterations': 210,
   'l2_leaf_reg': 10.0,
   'learning_rate': 0.029428571428571432}),
 'Mattikere_JN_FIX_1': (0.9211983083109135,
  'Gradient