# Import Object

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC

# Prepare Data

In [2]:
train_x = pd.read_csv("./data/2_X_train.csv").values
train_y = pd.read_csv("./data/2_Y_train.csv").values.reshape(-1)

sequence_length = 16
num_days = int(train_x.shape[0] / sequence_length)
feature_size = train_x.shape[1]
train_x = train_x.reshape(num_days, sequence_length, feature_size)
train_y = train_y.reshape(num_days, sequence_length, 1)
train_x, test_x, train_y, test_y = train_test_split(train_x,
                                                    train_y,
                                                    test_size=0.1,
                                                    shuffle=True)

train_x = train_x.reshape(-1, train_x.shape[2])
train_y = train_y.reshape(-1)

test_x = test_x.reshape(-1, test_x.shape[2])
test_y = test_y.reshape(-1)

# Set Free Parameters

In [3]:
max_depth = 10
n_estimators = 50
min_samples_split = 5
gamma = 'auto'


# Instantiate Model, Criterion, Optimizer

In [4]:
rf = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators,
                            min_samples_split=min_samples_split)

linear_svm = make_pipeline(StandardScaler(),
                           SVC(gamma=gamma, kernel='linear'))

rbf_svm = make_pipeline(StandardScaler(),
                        SVC(gamma=gamma, kernel='rbf'))

# Training

In [5]:
rf.fit(train_x, train_y)

linear_svm.fit(train_x, train_y)

rbf_svm.fit(train_x, train_y)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(gamma='auto'))])

# Accuracy

In [6]:
pred_y = rf.predict(test_x)
acc = sum(pred_y == test_y) / int(test_y.shape[0])

print(classification_report(test_y, pred_y))
print(f"Random Forest Accuracy: {acc}")

              precision    recall  f1-score   support

           0       0.97      0.80      0.88        89
           1       0.79      0.97      0.87        71

    accuracy                           0.88       160
   macro avg       0.88      0.88      0.87       160
weighted avg       0.89      0.88      0.88       160

Random Forest Accuracy: 0.875


In [7]:
pred_y = linear_svm.predict(test_x)
acc = sum(pred_y == test_y) / int(test_y.shape[0])

print(classification_report(test_y, pred_y))
print(f"Linear Support Vector Machine Accuracy: {acc}")

              precision    recall  f1-score   support

           0       0.95      0.78      0.85        89
           1       0.77      0.94      0.85        71

    accuracy                           0.85       160
   macro avg       0.86      0.86      0.85       160
weighted avg       0.87      0.85      0.85       160

Linear Support Vector Machine Accuracy: 0.85


In [8]:
pred_y = rbf_svm.predict(test_x)
acc = sum(pred_y == test_y) / int(test_y.shape[0])

print(classification_report(test_y, pred_y))
print(f"RBF Support Vector Machine Accuracy: {acc}")

              precision    recall  f1-score   support

           0       0.97      0.82      0.89        89
           1       0.81      0.97      0.88        71

    accuracy                           0.89       160
   macro avg       0.89      0.90      0.89       160
weighted avg       0.90      0.89      0.89       160

RBF Support Vector Machine Accuracy: 0.8875
