In [102]:
import pandas as pd
import numpy as np
from os import path
from CSVUtils import *
import ta
import matplotlib.pyplot as plt
import seaborn as sn
from scipy.stats import zscore

In [56]:
def exponential_moving_avg(df: pd.DataFrame, price_col='Price', window_size=15, center=True):
    if center == True:
        ema_df = df[price_col].shift(int(window_size/2)).ewm(span=window_size).mean()
        return _remove_trailing_data(ema_df, window_size)
    else:
        ema_df = pd.Series.ewm(df[price_col], span=window_size).mean()
        return ema_df
    
def _remove_trailing_data(df: pd.DataFrame, window_size):
    return df.shift(-int(window_size/2)*2).shift(int(window_size/2))

In [61]:
DIR = "./input/yahoo"
nameList = ["RSX", "EWH", "IVV"]
trainStartDate = pd.to_datetime("2000-01-01")
trainEndDate = pd.to_datetime("2014-12-31")
testStartDate = pd.to_datetime("2015-01-01")
testEndDate = pd.to_datetime("2019-12-31")
df_list = []

for name in nameList:
#     df = csv2df(csv_path=DIR, csv_name=nameList[2]+".csv", source="yahoo")
    df = csv2df(csv_path=DIR, csv_name=name+".csv", source="yahoo")
    df_list.append(df)

In [169]:
# df = df_list[2]
window_size = 15
for df in df_list:
    df['EMA'] = df['Price'].ewm(span=window_size).mean()
    df['MACD_diff'] = ta.trend.macd_diff(df['Price'])
    macd_direction = df['MACD_diff']/np.abs(df['MACD_diff'])
    df['MACD_change'] = (-1*macd_direction*macd_direction.shift(1)+1)/2
    
    delta_time = []
    for i in df['MACD_change']:
        if len(delta_time) == 0:
            result = 0
        elif i==0:
            result = delta_time[-1]+1
        else: #Nan or 1
            result = 0
        delta_time.append(result)
    df['delta_time'] = delta_time
    df = df.drop(['Open','High','Low','Price','Vol'], axis=1)
    df = df.dropna()
    df = df.reset_index(drop=True)

In [158]:
a = df_list[0][['EMA', 'MACD_diff', 'delta_time']][150:165]
b = df_list[1][['EMA', 'MACD_diff', 'delta_time']][150:165]
result = pd.concat([a, b], axis=1, sort=False)
result.columns = ['EMA_h', 'MACD_diff_h', 'delta_time_h', 'EMA_m', 'MACD_diff_m', 'delta_time_m']
result.reset_index(inplace=True, drop=True)
result[['EMA_h', 'EMA_m']] /= result[['EMA_h', 'EMA_m']].iloc[0]
result.values

array([[ 1.00000000e+00, -2.26594579e-02,  1.30000000e+01,
         1.00000000e+00,  1.13222550e-02,  2.50000000e+01],
       [ 1.00360157e+00,  3.33217408e-02,  0.00000000e+00,
         1.00314158e+00,  9.84822307e-03,  2.60000000e+01],
       [ 1.00695778e+00,  6.83277296e-02,  1.00000000e+00,
         1.00424060e+00,  1.24558588e-03,  2.70000000e+01],
       [ 1.01488797e+00,  1.72313420e-01,  2.00000000e+00,
         1.00685210e+00,  5.39036706e-04,  2.80000000e+01],
       [ 1.02221099e+00,  2.32722260e-01,  3.00000000e+00,
         1.00858725e+00, -3.12648272e-03,  0.00000000e+00],
       [ 1.02779919e+00,  2.40979053e-01,  4.00000000e+00,
         1.00955548e+00, -8.44773181e-03,  1.00000000e+00],
       [ 1.03706777e+00,  3.06879765e-01,  5.00000000e+00,
         1.00820286e+00, -2.04663831e-02,  2.00000000e+00],
       [ 1.04005625e+00,  2.37872930e-01,  6.00000000e+00,
         1.00646932e+00, -3.00481984e-02,  3.00000000e+00],
       [ 1.04553924e+00,  2.27728309e-01,  7.000

In [162]:
np.argmax([0,1,0])

1

In [75]:
macd_change = (-1*macd_direction*macd_direction.shift(1)+1)/2
macd_change

0       NaN
1       NaN
2       NaN
3       NaN
4       NaN
       ... 
4967    0.0
4968    0.0
4969    1.0
4970    0.0
4971    0.0
Name: MACD_diff, Length: 4972, dtype: float64

In [108]:
df_namelist = {"high": "RSX", "mid": "EWH", "low": "IVV"}
for i in df_namelist:
    print (df_namelist[i])

RSX
EWH
IVV


In [110]:
len(df_namelist)

3

In [173]:
df.iloc[100-6:100]

Unnamed: 0,Date,Change,EMA,MACD_diff,MACD_change,delta_time
94,2000-11-20,-0.015429,95.501575,-0.312889,0.0,5
95,2000-11-21,0.007197,95.244938,-0.316921,0.0,6
96,2000-11-22,-0.024548,94.733632,-0.443702,0.0,7
97,2000-11-24,0.017486,94.485485,-0.391643,0.0,8
98,2000-11-27,0.008244,94.363935,-0.282252,0.0,9
99,2000-11-28,-0.013014,94.105455,-0.26848,0.0,10


In [174]:
df.iloc[100]

Date           2000-11-29 00:00:00
Change                 -0.00186684
EMA                        93.8577
MACD_diff                -0.248082
MACD_change                      0
delta_time                      11
Name: 100, dtype: object