In [2]:
from data_wrangle import data_wrangle as dw
from datetime import datetime, date, timedelta
import time
import itertools
import pickle

# Basics
import numpy as np
import pandas as pd

#Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Optimization part
from pypfopt import risk_models, expected_returns, plotting, EfficientFrontier

# sclearn RandomForest - Returns, ML Metrics and tools
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV, cross_val_score, GridSearchCV
from sklearn.pipeline import make_pipeline

# GARCH - Volatility
from arch import arch_model
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [3]:
with open("./validation_data/random_forest_val_results.pkl", "rb") as f:
    random_forest_val_results = pickle.load(f)
    
with open("./validation_data/param_grid_random_forest.pkl", "rb") as f:
    param_grid_random_forest = pickle.load(f)
    
with open("./validation_data/RF_params.pkl", "rb") as f:
    RF_params = pickle.load(f)
    
with open("./validation_data/binary_realized.pkl", "rb") as f:
    binary_realized = pickle.load(f)

with open("./validation_data/ohlc_realized.pkl", "rb") as f:
    ohlc_realized = pickle.load(f)

# Random Forest

In [4]:
# compare 'binary_realized' and 'random_forest_val_results'
binary_actual = binary_realized.loc[random_forest_val_results[0].index] # filter out training window frame

In [5]:

f1_scores = {}
recall_scores = {}
precision_scores = {}
accuracy_scores = {}

for model in range(0, len(RF_params)):
    f1_list = []
    recall_list = []
    precision_list = []
    accuracy_list = []
    
    
    for company in binary_actual.columns:
        
        f1 = metrics.f1_score(binary_actual[company], random_forest_val_results[model][company])
        recall = metrics.recall_score(binary_actual[company], random_forest_val_results[model][company])
        precision = metrics.precision_score(binary_actual[company], random_forest_val_results[model][company])
        accuracy = metrics.accuracy_score(binary_actual[company], random_forest_val_results[model][company])
        
        f1_list.append(f1)
        recall_list.append(recall)
        precision_list.append(precision)
        accuracy_list.append(accuracy)
        
        
    f1_scores[RF_params[model]] = f1_list
    recall_scores[RF_params[model]] = recall_list
    precision_scores[RF_params[model]] = precision_list
    accuracy_scores[RF_params[model]] = accuracy_list
        
        



In [6]:
df = pd.DataFrame(index=RF_params)
df['accuracy_scores'] = pd.DataFrame.from_dict(accuracy_scores, orient='index', columns=binary_actual.columns).mean(axis=1)
df['precision_scores'] = pd.DataFrame.from_dict(precision_scores, orient='index', columns=binary_actual.columns).mean(axis=1)
df['recall_scores'] = pd.DataFrame.from_dict(recall_scores, orient='index', columns=binary_actual.columns).mean(axis=1)
df['f1_scores'] = pd.DataFrame.from_dict(f1_scores, orient='index', columns=binary_actual.columns).mean(axis=1)


In [7]:
df

Unnamed: 0,accuracy_scores,precision_scores,recall_scores,f1_scores
"(25, 3, 3)",0.511975,0.543496,0.652759,0.592269
"(25, 3, 6)",0.507294,0.544025,0.604316,0.572067
"(25, 6, 3)",0.511975,0.543496,0.652759,0.592269
"(25, 6, 6)",0.506332,0.544638,0.590063,0.565972
"(50, 3, 3)",0.510261,0.542665,0.643152,0.587555
"(50, 3, 6)",0.508046,0.544422,0.606452,0.57308
"(50, 6, 3)",0.510261,0.542665,0.643152,0.587555
"(50, 6, 6)",0.505831,0.543956,0.591044,0.565993
