In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, cross_validate
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import RandomOverSampler

CSV_PATH = "https://docs.google.com/spreadsheets/d/e/2PACX-1vROupcEcGFWafl16RmdNcSg7J3ZfCyD1socrrhGBwE0JBD_G7GN7r8YvYKSvyQzsxRW19MYpLkRClrU/pub?gid=0&single=true&output=csv"
df = pd.read_csv(CSV_PATH)
X = df.drop(['type', 'y'], axis=1)
y = df['y']

ros = RandomOverSampler()
X_resampled, y_resampled = ros.fit_resample(X, y)

kf = KFold(n_splits=3, shuffle=True)
metric = 'f1_micro'

# Define hyperparameters to tune
hyperparameters = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

best_f1_score = 0.0
best_model = None

# Perform hyperparameter tuning
for n_estimators in hyperparameters['n_estimators']:
    for max_depth in hyperparameters['max_depth']:
        for min_samples_split in hyperparameters['min_samples_split']:
            for min_samples_leaf in hyperparameters['min_samples_leaf']:
                for max_features in hyperparameters['max_features']:
                    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
                                                   min_samples_split=min_samples_split,
                                                   min_samples_leaf=min_samples_leaf, max_features=max_features)

                    cv_results = cross_validate(model, X_resampled, y_resampled, cv=kf, scoring=metric)
                    average_f1_score = np.mean(cv_results['test_score'])

                    if average_f1_score > best_f1_score:
                        best_f1_score = average_f1_score
                        best_model = model

# Print the best F1 score and the corresponding model
print('Best F1 Score:', best_f1_score)
print('Best Model:', best_model)
