In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import datetime

from helper import make_alphas_state_matrix_with_day_step
from helper import instrument_return
from helper import AlphaStats, calc_alphas_corr
from helper import normalize, neutralize, neutralize_with_dropout

import manipulate_alpha as mpa

from test import test1, test2, test3

SLICE_INDEX = 2436

# Prepare Data

In [104]:
dataset_dir = './USA_2010-2014_HLOCV'
os.listdir(dataset_dir)

['Open.csv', 'High.csv', 'Close.csv', 'Low.csv', 'Volume.csv']

In [125]:
open_df = pd.read_csv(dataset_dir + '/Open.csv')
high_df = pd.read_csv(dataset_dir + '/High.csv')
close_df = pd.read_csv(dataset_dir + '/Close.csv')
low_df = pd.read_csv(dataset_dir + '/Low.csv')
volume_df = pd.read_csv(dataset_dir + '/Volume.csv')

In [126]:
open_df.shape

(2436, 1258)

In [127]:
open_df = open_df.drop(open_df.columns[0], axis=1)
close_df = close_df.drop(close_df.columns[0], axis=1)
high_df = high_df.drop(high_df.columns[0], axis=1)
low_df = low_df.drop(low_df.columns[0], axis=1)
volume_df = volume_df.drop(volume_df.columns[0], axis=1)

In [128]:
# Get unique years
dates = list(open_df.columns)
years = [date.split('-')[0] for date in dates]
unique_years = np.sort(np.unique(years))
unique_years

array(['2010', '2011', '2012', '2013', '2014'], dtype='<U4')

In [129]:
train_split_date = f'01-01-{unique_years[-2]}'
val_split_date = f'01-01-{unique_years[-1]}'

print(train_split_date, "\t", val_split_date)

01-01-2013 	 01-01-2014


In [130]:
open_df.shape

(2436, 1257)

In [131]:
def split_samples(train_split, val_split, df):
    _df = df.T.reset_index()
    train_df = _df[pd.to_datetime(_df[_df.columns[0]]) < train_split].drop(_df.columns[0], axis=1).T
    val_df = _df[(pd.to_datetime(_df[_df.columns[0]]) >= train_split)&(
        pd.to_datetime(_df[_df.columns[0]]) < val_split)].drop(_df.columns[0], axis=1).T
    test_df = _df[pd.to_datetime(_df[_df.columns[0]]) >= val_split].drop(_df.columns[0], axis=1).T

    return train_df, val_df, test_df

# Constructing Alphas

## 1. Reverse Alpha

$\frac{close(d-n)}{close(d-1)}$

In [132]:
train_close, val_close, test_close = split_samples(train_split_date, val_split_date, close_df)
train_open, val_open, test_open = split_samples(train_split_date, val_split_date, open_df)
train_high, val_high, test_high = split_samples(train_split_date, val_split_date, high_df)
train_low, val_low, test_low = split_samples(train_split_date, val_split_date, low_df)
train_volume, val_volume, test_volume = split_samples(train_split_date, val_split_date, volume_df)

In [133]:
def make_first_alpha(close_df, window):
    return (close_df.shift(window)/close_df.shift(1)).fillna(0)

In [137]:
window = 6
alpha_matrix1 = normalize(neutralize(make_first_alpha(train_close, window).T.to_numpy()))

In [143]:
alpha = normalize(neutralize(alpha_matrix1))

In [144]:
test1(alpha)
test2(alpha)

Neutrality test passed
Normality test passed


In [139]:
alpha_1_data, alpha_1_cumpnl = AlphaStats(alpha_matrix1, train_close)
alpha_1_data.head()

AttributeError: 'int' object has no attribute 'split'