In [None]:
import pandas as pd
import numpy as np

In [None]:
RANDOM_SEED = 65

np.random.seed(RANDOM_SEED)

In [None]:
def get_data_item(name: str) -> pd.DataFrame:
    df = pd.read_csv(f"./{name}")
    if df.columns[0] == 'Unnamed: 0':
        print('Removing first column')
        df.drop(columns=df.columns[0], axis=1, inplace=True)
    return df

In [None]:
X_train = get_data_item('X_train-holds-binary.csv')
y_train = get_data_item('y_train-holds-binary.csv')

In [None]:
X_train.head()

In [None]:
y_train.head()

In [None]:
X = X_train.values
y = y_train.values.ravel()

In [None]:
X.shape, y.shape

## Random Forest on the GPU

In [None]:
def get_random_percentage_of_data(X, y, percentage):
    num_samples = int(len(X) * percentage)
    indices = np.random.choice(len(X), num_samples, replace=False)
    return X[indices], y[indices]

In [None]:
X, y = get_random_percentage_of_data(X, y, 0.1)
print(f"X: {X.shape}, y: {y.shape}")

In [None]:
import copy as cp

X_gpu = cp.array(X)
y_gpu = cp.array(y)

In [None]:
from cuml.ensemble import RandomForestRegressor

rgr = RandomForestRegressor()

In [None]:
param_grid = {
    'n_estimators': [300, 500],
    # 'max_features': ['sqrt', 'log2', None], # was None
    'max_depth': [30, 50],
    'max_leaf_nodes': [21, 40],
}

unique_combinations = np.prod([len(param_grid[key]) for key in param_grid.keys()])
print('Unique combinations: ', unique_combinations)

In [None]:
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score

mse_scorer = make_scorer(mean_squared_error, greater_is_better=False)
mae_scorer = make_scorer(mean_absolute_error, greater_is_better=False)
r2_scorer = make_scorer(r2_score)

scoring = {'MSE': mse_scorer, 'MAE': mae_scorer, 'R2': r2_scorer}

In [None]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

grid_search = GridSearchCV(rgr, param_grid=param_grid, cv=5, scoring=scoring, refit='MSE')
grid_search.fit(X_gpu, y_gpu)

In [None]:
print(f"Best params: {grid_search.best_params_}")
print(f"Best MSE score: {-grid_search.cv_results_['mean_test_MSE'][grid_search.best_index_]}")
print(f"Best MAE score: {-grid_search.cv_results_['mean_test_MAE'][grid_search.best_index_]}")
print(f"Best R2 score: {grid_search.cv_results_['mean_test_R2'][grid_search.best_index_]}")

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3d14fd08-487f-450a-b532-6cfd474a7f71' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>