In [41]:
from datetime import datetime
from ta import add_all_ta_features
import pandas as pd
import requests
import math
AV_KEY='5AK7ZPDAGCNO39B7'

In [42]:
STOCKS, CRYPTOS = ['500.PAR','IBM'], ['BTC','ETH']
LABEL_SYMBOL = '500.PAR'
DIFFS = {1:3, 5:3, 20:3}
LABEL = 'close'
LOOK_AHEAD = ['open']
NAN_LIMIT = 100 #remove 
NO_SAMPLES = 500
TRAINING_RATIO = 0.5


In [43]:
TECHNICAL_COLS = ['volume_adi', 'volume_obv',
       'volume_cmf', 'volume_fi', 'volume_mfi', 'volume_em', 'volume_vpt',
       'volume_nvi', 'volume_vwap', 'volatility_atr', 'volatility_bbm',
       'volatility_bbh', 'volatility_bbl', 'volatility_bbw', 'volatility_bbp',
       'volatility_bbhi', 'volatility_bbli', 'volatility_kcc',
       'volatility_kch', 'volatility_kcl', 'volatility_kcw', 'volatility_kcp',
       'volatility_kchi', 'volatility_kcli', 'volatility_dcl',
       'volatility_dch', 'volatility_dcm', 'volatility_dcw', 'volatility_dcp',
       'volatility_ui', 'trend_macd', 'trend_macd_signal', 'trend_macd_diff',
       'trend_sma_fast', 'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow',
       'trend_adx', 'trend_adx_pos', 'trend_adx_neg', 'trend_vortex_ind_pos',
       'trend_vortex_ind_neg', 'trend_vortex_ind_diff', 'trend_trix',
       'trend_mass_index', 'trend_cci', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv',
       'trend_ichimoku_base', 'trend_ichimoku_a', 'trend_ichimoku_b',
       'trend_visual_ichimoku_a', 'trend_visual_ichimoku_b', 'trend_aroon_up',
       'trend_aroon_down', 'trend_aroon_ind', 'trend_psar_up_indicator',
       'trend_psar_down_indicator', 'trend_stc', 'momentum_rsi',
       'momentum_stoch_rsi', 'momentum_stoch_rsi_k', 'momentum_stoch_rsi_d',
       'momentum_tsi', 'momentum_uo', 'momentum_stoch',
       'momentum_stoch_signal', 'momentum_wr', 'momentum_ao', 'momentum_kama',
       'momentum_roc', 'momentum_ppo', 'momentum_ppo_signal',
       'momentum_ppo_hist', 'others_dr', 'others_dlr', 'others_cr']

In [86]:
def GetStockPriceDF(symbol): 
    cols = ['open', 'high', 'low', 'close', 'volume']
    url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol='+symbol+'&outputsize=full&apikey='+AV_KEY
    print(url)
    r = requests.get(url)
    data = r.json()
    dic = data['Time Series (Daily)']
    df = pd.DataFrame.from_dict(dic, orient='index')
    df = df[['1. open', '2. high', '3. low', '4. close', '6. volume']]
    df = df.rename(columns={'1. open': 'open', '2. high': 'high', '3. low': 'low','4. close':'close', '6. volume':'volume'})
    df.index.name = 'date'
    df = df.sort_index(ascending = True)
    df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
    return df

def GetCryptoPriceDF(symbol): 
    cols = ['open', 'high', 'low', 'close', 'volume']
    url = 'https://www.alphavantage.co/query?function=DIGITAL_CURRENCY_DAILY&symbol='+symbol+'&market=CNY&apikey='+AV_KEY
    print(url)
    r = requests.get(url)
    data = r.json()
    dic = data['Time Series (Digital Currency Daily)']
    df = pd.DataFrame.from_dict(dic, orient='index')
    df = df[['1a. open (CNY)', '2a. high (CNY)', '3a. low (CNY)', '4a. close (CNY)', '5. volume']]
    df = df.rename(columns={'1a. open (CNY)': 'open', '2a. high (CNY)': 'high', '3a. low (CNY)': 'low', '4a. close (CNY)': 'close', '5. volume':'volume'})
    df.index.name = 'date'
    df = df.sort_index(ascending = True)
    df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
    return df

def AddTechnicalFeatures(df, symbol, technical_cols):
    cols = ['open', 'high', 'low', 'close', 'volume']
    df = add_all_ta_features(df, open="open", high="high", low="low", close="close", volume="volume")
    all_cols = cols + technical_cols
    df = df[all_cols]
    df = df.add_prefix(symbol + '_')
    return df 

def DownloadData(stocks, cryptos, technical_cols):
    dfs = []
    for symbol in stocks:
        df = GetStockPriceDF(symbol)
        df = AddTechnicalFeatures(df, symbol, technical_cols)
        dfs.append(df)
    for symbol in cryptos:
        df = GetCryptoPriceDF(symbol)
        df = AddTechnicalFeatures(df, symbol, technical_cols)
        dfs.append(df)
    df = pd.concat(dfs, axis=1, join="inner")
    return df

def ExtractSampleSet(df, date, no_samples):
    ind = df.index.get_loc(date)
    if ind-no_samples > 0:
        df = df.iloc[ind-no_samples:ind,:]
        return df
    return None

def PivotData(df, diffs):
    final_dfs = []
    for diff in diffs.keys():
        diff_df = pd.DataFrame()   
        for col in df.columns:
            diff_df[col+'_diff'+str(diff)] = df[col].rolling(window=diff+1).apply(lambda x: x.iloc[diff] - x.iloc[0])
        sampled_dfs = []
        for base_shift in range(diffs[diff]):
            shift = base_shift*diff
            sampled_df = diff_df.shift(periods=shift)
            sampled_df = sampled_df.add_suffix('_shift'+str(shift))
            sampled_dfs.append(sampled_df)
        final_df = pd.concat(sampled_dfs, axis=1, join="inner")
        final_dfs.append(final_df)
    pivot_df = pd.concat(final_dfs, axis=1, join="inner")
    return pivot_df

In [68]:
df = DownloadData(['500.PAR'],[],[])
print(df.head())

https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=500.PAR&outputsize=full&apikey=5AK7ZPDAGCNO39B7


  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])


            500.PAR_open  500.PAR_high  500.PAR_low  500.PAR_close  \
date                                                                 
2010-06-15         14.01         14.01        13.97          13.97   
2010-06-16         14.07         14.13        14.07          14.12   
2010-06-17         14.16         14.16        14.05          14.05   
2010-06-18         14.04         14.14        14.04          14.14   
2010-06-21         14.23         14.30        14.22          14.22   

            500.PAR_volume  
date                        
2010-06-15               1  
2010-06-16               4  
2010-06-17               1  
2010-06-18           54000  
2010-06-21            2995  


In [93]:
sam_df = ExtractSampleSet(df, df.index[-1], 15)
print(len(sam_df.index))
print(sam_df)

15
            500.PAR_open  500.PAR_high  500.PAR_low  500.PAR_close  \
date                                                                 
2021-10-15       72.0413       72.5000      72.0328        72.4134   
2021-10-18       72.3973       72.6346      72.0375        72.5842   
2021-10-19       72.5241       72.9379      72.5014        72.9062   
2021-10-20       72.9410       73.2804      72.9410        73.2299   
2021-10-21       73.0330       73.3274      73.0330        73.1539   
2021-10-22       73.4307       73.6349      73.3242        73.3242   
2021-10-25       73.4500       73.8319      73.3500        73.7920   
2021-10-26       74.2680       74.5601      74.1900        74.4580   
2021-10-27       74.2276       74.2785      73.9500        74.2289   
2021-10-28       73.9026       74.1303      73.7259        73.8859   
2021-10-29       73.6850       74.8219      73.6486        74.8219   
2021-11-01       75.0000       75.2339      74.6775        74.8157   
2021-11-02       

In [94]:
diffs = {1:2, 5:2}
pivot_df = PivotData(sam_df, diffs)
print(pivot_df)

            500.PAR_open_diff1_shift0  500.PAR_high_diff1_shift0  \
date                                                               
2021-10-15                        NaN                        NaN   
2021-10-18                     0.3560                     0.1346   
2021-10-19                     0.1268                     0.3033   
2021-10-20                     0.4169                     0.3425   
2021-10-21                     0.0920                     0.0470   
2021-10-22                     0.3977                     0.3075   
2021-10-25                     0.0193                     0.1970   
2021-10-26                     0.8180                     0.7282   
2021-10-27                    -0.0404                    -0.2816   
2021-10-28                    -0.3250                    -0.1482   
2021-10-29                    -0.2176                     0.6916   
2021-11-01                     1.3150                     0.4120   
2021-11-02                    -0.2560           

In [34]:
len(sampled_df.index)

5

In [35]:
sampled_df

Unnamed: 0_level_0,open,high,low,close,volume,trend_ichimoku_base,trend_ichimoku_a,trend_ichimoku_b
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-10-29,73.685,74.8219,73.6486,74.8219,119822,72.02865,72.84515,72.02865
2021-11-01,75.0,75.2339,74.6775,74.8157,34393,72.23465,73.16105,72.23465
2021-11-02,74.744,75.2209,74.744,75.2209,11617,72.23465,73.18405,72.23465
2021-11-03,75.2,75.2309,75.082,75.1559,3251,72.23465,73.25685,72.23465
2021-11-04,75.871,76.277,75.853,76.1082,50139,72.7562,73.78485,72.7562
