# Import Module

In [25]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold

# Prepare Data

In [26]:
x = pd.read_csv("./data/1_X_train.csv").values
y = pd.read_csv("./data/1_Y_train.csv").values.reshape(-1)

sequence_length = 16
num_days = int(x.shape[0] / sequence_length)
feature_size = x.shape[1]

# Set Free Parameters

In [27]:
max_depth = 10
n_estimators = 50
min_samples_split = 5

num_splits = 10
num_repeats = 10

# Instantiate Model

In [28]:
rf = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators,
                            min_samples_split=min_samples_split)

# Training

In [29]:
ratio_accs = []
accs = []

for repeat in range(num_repeats):
    kf = KFold(n_splits=num_splits, random_state=repeat, shuffle=True)
    running_acc = 0
    ratio_running_acc = 0

    for train_idx, test_idx in kf.split(x):
        # split data into train, test
        train_x, test_x = x[train_idx], x[test_idx]
        train_y, test_y = y[train_idx], y[test_idx]

        # training and test

        # -ratio
        # columns 0,7,8 of train_x and test_x
        # corresponds to energy data ratio feature
        rf.fit(train_x[:, [1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13]], train_y)
        pred_y = rf.predict(test_x[:, [1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13]])
        running_acc += sum(pred_y == test_y) / int(test_y.shape[0])

        # +ratio
        rf.fit(train_x, train_y)
        pred_y = rf.predict(test_x)
        ratio_running_acc += sum(pred_y == test_y) / int(test_y.shape[0])

    mean_acc = running_acc / num_splits
    accs.append(mean_acc)

    ratio_mean_acc = ratio_running_acc / num_splits
    ratio_accs.append(ratio_mean_acc)

print(f"-ratio:{np.mean(accs)}({np.std(accs)})")
print(f"+ratio:{np.mean(ratio_accs)}({np.std(ratio_accs)})")

-ratio:0.8092647058823529(0.00474478406026519)
+ratio:0.839485294117647(0.0030325868569560637)
