# Tuning with Grid Search - Random Forest (150 samples)

## 01- Import and Prepare Data

In [25]:
# Load libraries
import numpy as np
from numpy import arange
import pickle
from pandas import read_csv
#
import matplotlib.pyplot as plt

#
import pandas as pd
from pandas import read_csv

from sklearn.metrics import confusion_matrix,  classification_report, f1_score
from sklearn.model_selection import train_test_split, KFold,StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler

import tensorflow
from sklearn.ensemble import RandomForestClassifier

import warnings
import seaborn as sbs
import sys

warnings.filterwarnings('ignore')

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tensorflow.random.set_seed(RANDOM_SEED)
np.set_printoptions(threshold=sys.maxsize)

with open('data/X2.pkl', 'rb') as f:
    X = pickle.load(f)

with open('data/y2.pkl', 'rb') as f:
    y = pickle.load(f)
    
labels = 5
samples = 150
X = X[:labels*samples]
y = y[:labels*samples]

classes = np.unique(y).tolist()
for i in range(len(classes)):
    y = np.where(y==classes[i], i, y)
    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=RANDOM_SEED)

y = np.array(y)

scaler = RobustScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## 02 - Prepare hyperparameters

In [26]:
# Load the model parameters to be test 
model_params = {
    'n_estimators': [50, 100, 150],
    'max_features':  ['sqrt', 0.3, 0.6, 0.9 , 1.0],
    'min_samples_split': [0.1 0.3, 0.6]
}

## Grid Search SKlearn

In [27]:
rf_model = RandomForestClassifier()
clf = GridSearchCV(rf_model, model_params, cv=10)
model = clf.fit(X_train, y_train)

In [28]:
print(model.best_estimator_.get_params())

{'bootstrap': True, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 0.3, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 0.1, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 150, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}


In [30]:
pred = model.predict(X_test)

print(classification_report(pred, y_test))

              precision    recall  f1-score   support

           0       0.50      0.43      0.46        35
           1       0.47      0.78      0.58        18
           2       0.33      0.59      0.43        17
           3       0.67      0.59      0.62        34
           4       0.90      0.59      0.71        46

    accuracy                           0.57       150
   macro avg       0.57      0.59      0.56       150
weighted avg       0.64      0.57      0.59       150

