In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm.notebook import tnrange
from sklearn.model_selection import cross_val_score, train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score as f1

In [None]:
def gridSearch(path):
    units = list(range(2,11))
    tens = list(range(25,101,25))
    param_grid = {
        'n_estimators': units + tens,
        'min_samples_split': list(range(8,51,2)),
        'min_samples_leaf': list(range(1,31,2)),
        'max_depth': units + tens,
        'max_samples': [round(0.1*i,2) for i in range(1,10)]
    }

    df = pd.read_csv('../datasets/' + path + '_training.csv')
    X = df.drop(columns = 'target')
    y = df['target']

    X, XT, y, yT = train_test_split(X, y, test_size = 0.25)

    random_grid_search = RandomizedSearchCV(estimator = RandomForestClassifier(max_features = 'sqrt'), param_distributions = param_grid, n_iter = 100, scoring = 'f1_weighted', n_jobs = 4).fit(X,y)

    n_estimators = []
    min_samples_split = []
    min_samples_leaf = []
    max_depth = []
    max_samples = []

    for combination in random_grid_search.cv_results_['params']:
        n_estimators.append(combination['n_estimators'])
        min_samples_split.append(combination['min_samples_split'])
        min_samples_leaf.append(combination['min_samples_leaf'])
        max_depth.append(combination['max_depth'])
        max_samples.append(combination['max_samples'])
    f1_scores              = random_grid_search.cv_results_['mean_test_score']

    df = pd.DataFrame()
    df['n_estimators'] = n_estimators
    df['min_samples_split'] = min_samples_split
    df['min_samples_leaf'] = min_samples_leaf
    df['max_depth'] = max_depth
    df['max_samples'] = max_samples
    df['f1_scores'] = f1_scores

    return df.sort_values(by = 'f1_scores').iloc[-1]

In [None]:
datasets = ['s0', 's1', 's2', 's3', 's4', 's5', 's6']
direction = ['horz', 'vert']

df = pd.DataFrame()
columns = []

for i in tnrange(len(datasets)):
    for d in direction:
        columns.append(datasets[i] + '_' + d)
        grid = gridSearch(datasets[i] + '/' + d + '_' + datasets[i])
        
        df = pd.concat([df, grid], axis = 1, sort = False)
        
df.columns = columns

In [None]:
df