# **Random Forest**

#### Import libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from statistics import mean

#### General configurations

In [3]:
np.random.seed(1)

#### Initialise k-fold

In [4]:
kf = StratifiedKFold(n_splits=5)

#### Load and prepare data

In [5]:
train_data = np.genfromtxt('../dataset/sign_mnist_train.csv', delimiter=',')
test_data = np.genfromtxt('../dataset/sign_mnist_test.csv', delimiter=',')

In [6]:
X_train = train_data[1:, 1:]
y_train = train_data[1:, 0]

X_test = test_data[1:, 1:]
y_test = test_data[1:, 0]

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(27455, 784) (27455,)
(7172, 784) (7172,)


#### Random Forest

In [9]:
f1_scores = []

for train_index, test_index in kf.split(X_train, y_train):
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]
    
    # prediction
    model = RandomForestClassifier(max_depth=17, random_state=0)
    model.fit(X_train_fold, y_train_fold.ravel())
    y_predicted = model.predict(X_test_fold)
    
    # calculate score
    score = f1_score(y_test_fold, y_predicted, average='micro')
    f1_scores.append(score)

print(f1_scores)
print('Average F1 Score:', mean(f1_scores))

[0.9737752686213804, 0.9737752686213804, 0.9699508286286651, 0.9750500819522856, 0.978328173374613]
Average F1 Score: 0.9741759242396649


#### Final Result

In [10]:
y_predicted = model.predict(X_test)
score = f1_score(y_test, y_predicted, average='micro')
print('F1 Score:', score)

F1 Score: 0.734941438929169
