# Imports 

In [None]:
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import utils
from sklearn.metrics import mean_squared_error
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from catboost import CatBoostRegressor

In [None]:
X, y = utils.get_train_data()
X_final_test = pd.read_parquet(Path("data") / "final_test.parquet")

## GridSearchCV

In [None]:
num_features = ['temp', 'precip', 'windspeed', 'visibility']
cat_features = ['counter_id']
time_features = ['hour','month','weekday','day']

col_transformer = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_features),
        ('cat', OneHotEncoder(sparse=False), cat_features),
        ('time', FunctionTransformer(utils.encode_cyclical_features), time_features)
    ],
    remainder='passthrough'
)

X, y = utils.get_train_data()

X_final_test = pd.read_parquet(Path("data") / "final_test.parquet")
X_train, y_train, X_test, y_test = utils.train_test_split_temporal(X, y, delta_threshold="30 days")

In [None]:
pipe = Pipeline([
    ('prepro',FunctionTransformer(utils.prepro)),
    ('col', col_transformer),
])

X_train_merged = pipe.fit_transform(X_train)

In [None]:
param_grid = {
    'depth': list(range(5, 14)),
    'iterations': list(range(500, 1501, 250)),
    'rsm': [round(x, 2) for x in list(np.arange(0.25, 0.41, 0.05))],
    'subsample': [round(x, 1) for x in list(np.arange(0.5, 0.81, 0.1))],  # Ranges from 0.5 to 0.8 in steps of 0.1
    'verbose': [0]
}

catboost_model = CatBoostRegressor()


grid_search = GridSearchCV(catboost_model, param_grid, scoring='neg_mean_squared_error', cv=5, n_jobs=-1)
grid_search.fit(X_train_merged, y_train)

# Print the best parameters
best_params = grid_search.best_params_
print("Best Parameters:", best_params)