In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from sklearn import metrics
import matplotlib as plt
import warnings
import os
from HAR_model import *
%matplotlib inline
warnings.filterwarnings('ignore')

In [2]:

raw_data = pd.read_csv('data/SPY_data_5min.csv')
raw_data['time'] = pd.to_datetime(raw_data['time'])

# Filter trades within the time window from 09:30 to 16:00
raw_data = raw_data[(raw_data['time'].dt.time >= pd.to_datetime('09:30').time()) & 
                 (raw_data['time'].dt.time <= pd.to_datetime('16:00').time())]

raw_data.drop(['Unnamed: 0','money','open','high','low'], axis=1, inplace=True)
raw_data['time'] = pd.to_datetime(raw_data['time']).dt.strftime('%H:%M')


In [3]:
# Defining features

futures =[1, 5, 20]
semi_variance = [True, False]
log_transformation = [True, False]


In [4]:

models ={}

for future in futures:
    for variance in semi_variance:
        for log_t in log_transformation:
            models[(future,variance,log_t)] = HARModel(

                raw_data = raw_data,
                future = future,
                lags = [4, 20],
                feature= 'RV',
                semi_variance= variance,
                log_transformation= log_t,
                period_train= list(
                    [
                        pd.to_datetime("2006-01-01",format = "%Y-%m-%d"),
                        pd.to_datetime("2015-12-31",format = "%Y-%m-%d"),
                    ]
                ),
                period_test= list(
                    [
                        pd.to_datetime("2016-01-01",format = "%Y-%m-%d"),
                        pd.to_datetime("2018-12-31",format = "%Y-%m-%d"),
                    ]
                ),
            )
            m = models[(future,variance,log_t)]
            m.data_transformation()
            m.jump_detection()
            m.predict_values()
            m.make_accurate_measures()

In [5]:
output_directory = "../output/HAR_outputdata"

os.makedirs(output_directory,exist_ok=True)

for k,model in models.items():
    future  = model.future
    semi_variance = model.semi_variance
    log = model.log_transformation

    output_file_path = os.path.join(output_directory,f'HAR_{future}_{semi_variance}_{log}.txt')

    with open(output_file_path, 'w') as output_file:

        output_file.write(f"{model.estimation_results} \n")
        output_file.write("Train Accuracy \n")
        output_file.write(f"{model.train_accuracy} \n")
        output_file.write("Test Accuracy \n")
        output_file.write(f"{model.test_accuracy} \n")