In [4]:
from datetime import datetime
from ta import add_all_ta_features
import pandas as pd
import requests
import math
AV_KEY='5AK7ZPDAGCNO39B7'

In [7]:
STOCKS = ['500.PAR'] # ['500.PAR','IBM']
CRYPTOS = ['BTC','ETH']
LABEL_SYMBOL = '500.PAR'
DIFFS = {1:3, 5:3, 20:3}
LABEL = 'close'
LOOK_AHEAD = ['open']
NAN_LIMIT = 100 #remove 
NO_SAMPLES = 500
TRAINING_RATIO = 0.5

In [5]:
TECHNICAL_COLS = ['volume_adi', 'volume_obv',
       'volume_cmf', 'volume_fi', 'volume_mfi', 'volume_em', 'volume_vpt',
       'volume_nvi', 'volume_vwap', 'volatility_atr', 'volatility_bbm',
       'volatility_bbh', 'volatility_bbl', 'volatility_bbw', 'volatility_bbp',
       'volatility_bbhi', 'volatility_bbli', 'volatility_kcc',
       'volatility_kch', 'volatility_kcl', 'volatility_kcw', 'volatility_kcp',
       'volatility_kchi', 'volatility_kcli', 'volatility_dcl',
       'volatility_dch', 'volatility_dcm', 'volatility_dcw', 'volatility_dcp',
       'volatility_ui', 'trend_macd', 'trend_macd_signal', 'trend_macd_diff',
       'trend_sma_fast', 'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow',
       'trend_adx', 'trend_adx_pos', 'trend_adx_neg', 'trend_vortex_ind_pos',
       'trend_vortex_ind_neg', 'trend_vortex_ind_diff', 'trend_trix',
       'trend_mass_index', 'trend_cci', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv',
       'trend_ichimoku_base', 'trend_ichimoku_a', 'trend_ichimoku_b',
       'trend_visual_ichimoku_a', 'trend_visual_ichimoku_b', 'trend_aroon_up',
       'trend_aroon_down', 'trend_aroon_ind', 'trend_psar_up_indicator',
       'trend_psar_down_indicator', 'trend_stc', 'momentum_rsi',
       'momentum_stoch_rsi', 'momentum_stoch_rsi_k', 'momentum_stoch_rsi_d',
       'momentum_tsi', 'momentum_uo', 'momentum_stoch',
       'momentum_stoch_signal', 'momentum_wr', 'momentum_ao', 'momentum_kama',
       'momentum_roc', 'momentum_ppo', 'momentum_ppo_signal',
       'momentum_ppo_hist', 'others_dr', 'others_dlr', 'others_cr']

In [10]:
def GetStockPriceDF(symbol): 
    cols = ['open', 'high', 'low', 'close', 'volume']
    url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol='+symbol+'&outputsize=full&apikey='+AV_KEY
    print(url)
    r = requests.get(url)
    data = r.json()
    dic = data['Time Series (Daily)']
    df = pd.DataFrame.from_dict(dic, orient='index')
    df = df[['1. open', '2. high', '3. low', '4. close', '6. volume']]
    df = df.rename(columns={'1. open': 'open', '2. high': 'high', '3. low': 'low','4. close':'close', '6. volume':'volume'})
    df.index.name = 'date'
    df = df.sort_index(ascending = True)
    df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
    
    df = df.tail(30)
    
    return df

def GetCryptoPriceDF(symbol): 
    cols = ['open', 'high', 'low', 'close', 'volume']
    url = 'https://www.alphavantage.co/query?function=DIGITAL_CURRENCY_DAILY&symbol='+symbol+'&market=CNY&apikey='+AV_KEY
    print(url)
    r = requests.get(url)
    data = r.json()
    dic = data['Time Series (Digital Currency Daily)']
    df = pd.DataFrame.from_dict(dic, orient='index')
    df = df[['1a. open (CNY)', '2a. high (CNY)', '3a. low (CNY)', '4a. close (CNY)', '5. volume']]
    df = df.rename(columns={'1a. open (CNY)': 'open', '2a. high (CNY)': 'high', '3a. low (CNY)': 'low', '4a. close (CNY)': 'close', '5. volume':'volume'})
    df.index.name = 'date'
    df = df.sort_index(ascending = True)
    df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
    return df

def CalculateTechnicals(df, technical_cols):
    cols = ['open', 'high', 'low', 'close', 'volume']
    df = add_all_ta_features(df, open="open", high="high", low="low", close="close", volume="volume")
    df = df[technical_cols]
    return df 

def DownloadData(stocks, cryptos):
    dfs = []
    for symbol in stocks:
        df = GetStockPriceDF(symbol)
        df = AddTechnicalFeatures(df, symbol, technical_cols)
        dfs.append(df)
    for symbol in cryptos:
        df = GetCryptoPriceDF(symbol)
        df = AddTechnicalFeatures(df, symbol, technical_cols)
        dfs.append(df)
    df = pd.concat(dfs, axis=1, join="inner")
    return df

def ExtractSampleSet(df, date, no_samples):
    ind = df.index.get_loc(date)
    if ind-no_samples > 0:
        df = df.iloc[ind-no_samples:ind,:]
        return df
    return None

def PivotData(df, diffs, lookahead, label_column, label_symbol):
    final_dfs = []
    for diff in diffs.keys():
        diff_df = pd.DataFrame()   
        for col in df.columns:
            diff_df[col+'_diff'+str(diff)] = df[col].rolling(window=diff+1).apply(lambda x: x.iloc[diff] - x.iloc[0])
        sampled_dfs = []
        for base_shift in diffs[diff]:
            shift = base_shift
            sampled_df = diff_df.shift(periods=shift)
            sampled_df = sampled_df.add_suffix('_shift'+str(shift))
            sampled_dfs.append(sampled_df)
        final_df = pd.concat(sampled_dfs, axis=1, join="inner")
        final_dfs.append(final_df)
    
    label_df = None
    #lookahead and label
    for col in df.columns:
        coltp = col.split('_')[1]
        colsym = col.split('_')[0]
        if coltp == lookahead or (coltp == label_column and colsym == label_symbol):
            diff_df = pd.DataFrame()
            diff, shift = 1, 0
            diff_df[col+'_diff'+str(diff)] = df[col].rolling(window=diff+1).apply(lambda x: x.iloc[diff] - x.iloc[0])
            sampled_df = diff_df.shift(periods=shift)
            sampled_df = sampled_df.add_suffix('_shift'+str(shift))
            if (coltp == label_column and colsym == label_symbol):
                label_df = sampled_df
            else:
                final_dfs.append(sampled_df)
    if label_df is not None:
        final_dfs.append(label_df)
    
    pivot_df = pd.concat(final_dfs, axis=1, join="inner")
    return pivot_df

def DiffShift(df, columns, diffs):
    final_dfs = []
    for diff in diffs.keys():
        diff_df = pd.DataFrame()   
        for col in columns:
            if diff == 0:
                diff_df[col] = df[col]
            else:
                diff_df[col+'_D'+str(diff)] = df[col].rolling(window=diff+1).apply(lambda x: x.iloc[diff] - x.iloc[0])
        sampled_dfs = []
        for shift in diffs[diff]:
            sampled_df = diff_df.shift(periods=shift)
            sampled_df = sampled_df.add_suffix('_S'+str(shift))
            sampled_dfs.append(sampled_df)
        final_df = pd.concat(sampled_dfs, axis=1, join="inner")
        final_dfs.append(final_df)   
    pivot_df = pd.concat(final_dfs, axis=1, join="inner")
    return pivot_df

def CreateLabel(df, symbol, column):
    label_df = pd.DataFrame()
    df = df.sort_index(ascending = False)
    label_df['label'] = df[symbol+'_'+column].rolling(window=2).apply(lambda x: x.iloc[0] - x.iloc[1])
    label_df = label_df.sort_index(ascending = True)
    return label_df

In [12]:
stocks = ['500.PAR']
pivot_cols = ['open'] # ['open','high','low','close','volume']
diff_shifts = {1:[1]} #{1:[1,2,3,4],5:[5,10,15]}

peak_cols = ['open']
technical_cols = ['volume_adi']

label_sym = '500.PAR'
label_col = 'close'

full_res, label_df = [], None
for symbol in STOCKS: 
    sym_dfs = []
    df = GetStockPriceDF(symbol)
    print(df)
    sym_dfs.append(DiffShift(df, pivot_cols, diff_shifts)) # pivot
    sym_dfs.append(DiffShift(df, peak_cols, {1:[0]})) # peak ahead
    technicals = CalculateTechnicals(df, technical_cols) # calculate technicals
    print(technicals)
    sym_dfs.append(DiffShift(technicals, technical_cols, {0:[1]})) #shifted technicals
    sym_df = pd.concat(sym_dfs, axis=1, join="inner").add_prefix(symbol + '_') #combined symbol data
    full_res.append(sym_df)
    
    if symbol == label_sym:
        label_df = DiffShift(df, [label_col], {1:[0]})

df = pd.concat(full_res, axis=1, join='inner') #combined data
print(df)
    

https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=500.PAR&outputsize=full&apikey=5AK7ZPDAGCNO39B7
               open     high      low    close  volume
date                                                  
2021-10-04  70.1465  70.3220  69.2680  69.2680   20034
2021-10-05  69.7227  70.6183  69.7000  70.5850    3802
2021-10-06  70.0788  70.4488  69.7600  70.1720   10918
2021-10-07  71.2665  72.0000  71.2584  71.8690   96843
2021-10-08  71.7390  71.7800  71.3970  71.5075   36806
2021-10-11  71.0699  71.6715  71.0000  71.6314    6859
2021-10-12  70.4148  71.1998  70.4000  71.0449   75216
2021-10-13  70.6458  70.9581  70.4289  70.5409  402371
2021-10-14  71.0780  71.8265  71.0000  71.8020    1830
2021-10-15  72.0413  72.5000  72.0328  72.4134    9744
2021-10-18  72.3973  72.6346  72.0375  72.5842  245500
2021-10-19  72.5241  72.9379  72.5014  72.9062    3750
2021-10-20  72.9410  73.2804  72.9410  73.2299  233175
2021-10-21  73.0330  73.3274  73.0330  73.1539    

  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])


In [57]:
df = DownloadData(['500.PAR','IBM'],[],[])
print(df.tail())

https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=500.PAR&outputsize=full&apikey=5AK7ZPDAGCNO39B7


  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])


https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=IBM&outputsize=full&apikey=5AK7ZPDAGCNO39B7


  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])


            500.PAR_open  500.PAR_high  500.PAR_low  500.PAR_close  \
date                                                                 
2021-11-03       75.2000       75.2309      75.0820        75.1559   
2021-11-04       75.8710       76.2770      75.8530        76.1082   
2021-11-05       76.1528       77.0700      76.1445        76.6530   
2021-11-08       76.3740       76.5929      76.3000        76.3679   
2021-11-09       76.1175       76.4446      75.9287        76.0374   

            500.PAR_volume  IBM_open  IBM_high  IBM_low  IBM_close  IBM_volume  
date                                                                            
2021-11-03            3251   126.230    127.29   125.68     127.13     5421406  
2021-11-04           50139   123.050    123.34   119.90     120.85     7208736  
2021-11-05           11306   121.430    123.77   121.43     123.61     6790478  
2021-11-08            4869   123.985    124.78   123.53     124.54     5625275  
2021-11-09           11

In [58]:
sam_df = ExtractSampleSet(df, df.index[-1], 200)
print(sam_df)

            500.PAR_open  500.PAR_high  500.PAR_low  500.PAR_close  \
date                                                                 
2021-01-25       58.9393       59.2176      58.3393        58.6950   
2021-01-26       58.9220       59.2030      58.9220        59.0020   
2021-01-27       58.8795       59.0813      58.2557        58.2568   
2021-01-28       57.4600       58.6550      57.0728        58.5005   
2021-01-29       57.5537       57.8840      57.2030        57.2030   
...                  ...           ...          ...            ...   
2021-11-02       74.7440       75.2209      74.7440        75.2209   
2021-11-03       75.2000       75.2309      75.0820        75.1559   
2021-11-04       75.8710       76.2770      75.8530        76.1082   
2021-11-05       76.1528       77.0700      76.1445        76.6530   
2021-11-08       76.3740       76.5929      76.3000        76.3679   

            500.PAR_volume  IBM_open  IBM_high   IBM_low  IBM_close  \
date              

In [59]:
daily_diffs = {1:[1,2,3,4],5:[5,10,15]}
lookahead, label_column, label_symbol = 'open', 'close', '500.PAR'
pivot_df = PivotData(sam_df, daily_diffs, lookahead, label_column, label_symbol)
print(pivot_df)

            500.PAR_open_diff1_shift1  500.PAR_high_diff1_shift1  \
date                                                               
2021-01-25                        NaN                        NaN   
2021-01-26                        NaN                        NaN   
2021-01-27                    -0.0173                    -0.0146   
2021-01-28                    -0.0425                    -0.1217   
2021-01-29                    -1.4195                    -0.4263   
...                               ...                        ...   
2021-11-02                     1.3150                     0.4120   
2021-11-03                    -0.2560                    -0.0130   
2021-11-04                     0.4560                     0.0100   
2021-11-05                     0.6710                     1.0461   
2021-11-08                     0.2818                     0.7930   

            500.PAR_low_diff1_shift1  500.PAR_close_diff1_shift1  \
date                                           

In [60]:
len(pivot_df.dropna().index)

180

In [35]:
sampled_df

Unnamed: 0_level_0,open,high,low,close,volume,trend_ichimoku_base,trend_ichimoku_a,trend_ichimoku_b
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-10-29,73.685,74.8219,73.6486,74.8219,119822,72.02865,72.84515,72.02865
2021-11-01,75.0,75.2339,74.6775,74.8157,34393,72.23465,73.16105,72.23465
2021-11-02,74.744,75.2209,74.744,75.2209,11617,72.23465,73.18405,72.23465
2021-11-03,75.2,75.2309,75.082,75.1559,3251,72.23465,73.25685,72.23465
2021-11-04,75.871,76.277,75.853,76.1082,50139,72.7562,73.78485,72.7562
