In [49]:
import yfinance as yf
import bs4 as bs
import pickle
import requests
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import numpy as np
import tqdm

# Get data
## Tickers

In [6]:
html = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(html.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})

tickers = []
for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        ticker = ticker[:-1]
        tickers.append(ticker)
pickle.dump(tickers, open('data/sp500_tickers.pkl', 'wb'))

In [7]:
tickers = pickle.load(open('data/sp500_tickers.pkl', 'rb'))

In [101]:
period_start_date = '1990-01-01'
period_end_date = '2019-01-01'

## History data

In [73]:
history_data = dict()
for t in tqdm.tqdm(tickers): 
    ticker = yf.Ticker(t)
    t_h = ticker.history(period="max", auto_adjust=True)
    t_h.reset_index(inplace=True)
    t_h.columns = [x.lower() for x in t_h.columns]
    t_h['ticker'] = t
    t_h = t_h[(t_h['date'] >= period_start_date) & (t_h['date'] < period_end_date)]
    t_h = t_h[t_h['open'] != 0]
    if not t_h.empty:
        history_data[t] = t_h

 13%|█▎        | 65/505 [00:20<02:49,  2.60it/s]

- BRK.B: No data found, symbol may be delisted


 15%|█▌        | 78/505 [00:23<01:52,  3.78it/s]

- BF.B: 1d data not available for startTime=-2208988800 and endTime=1617619855. Only 100 years worth of day granularity data are allowed to be fetched per request.


100%|██████████| 505/505 [06:31<00:00,  1.29it/s]


# features
## Indicators computing random forest

In [75]:
list_m = list(range(1, 20)) + list(range(20, 241, 20))

for t_h in tqdm.tqdm(list(history_data.values())):
    for m in list_m:
        t_h_shift_one = t_h.shift(1)
        t_h_shift_m = t_h.shift(m)
        t_h_shift_m_plus_one = t_h.shift(m + 1)

        t_h[f'i_{m}'] = t_h_shift_m.apply(axis=1, func=lambda x: x['close'] / x['open'] - 1)

        data_for_cr = pd.concat([t_h_shift_one['close'], t_h_shift_m_plus_one['close']], axis=1)
        data_for_cr.columns = ['close_one', 'close_m_plus_one']
        t_h[f'cr_{m}'] = data_for_cr.apply(axis=1, func=lambda x: x['close_one'] / x['close_m_plus_one'] - 1)

        data_for_or = pd.concat([t_h['open'], t_h_shift_m['close']], axis=1)
        t_h[f'or_{m}'] = data_for_or.apply(axis=1, func=lambda x: x['open'] / x['close'] - 1)

100%|██████████| 497/497 [1:14:22<00:00,  8.98s/it]


In [143]:
for t_h in tqdm.tqdm(list(history_data.values())):
    cols_to_process = [x for x in t_h.columns if '_' in x]
    for col in cols_to_process:
        t_h[col] = t_h[col].astype('float32')

100%|██████████| 497/497 [00:48<00:00, 10.20it/s]


In [145]:
pickle.dump(history_data, open('data/history_data_light.pkl', 'wb'))

Unnamed: 0,date,open,high,low,close,volume,dividends,stock splits,ticker,i_1,cr_1,or_1,i_2,cr_2,or_2,i_3,cr_3,or_3,i_4,cr_4,or_4,i_5,cr_5,or_5,i_6,cr_6,or_6,i_7,cr_7,or_7,i_8,cr_8,or_8,i_9,cr_9,or_9,i_10,cr_10,or_10,i_11,cr_11,or_11,i_12,cr_12,or_12,i_13,cr_13,or_13,i_14,cr_14,or_14,i_15,cr_15,or_15,i_16,cr_16,or_16,i_17,cr_17,or_17,i_18,cr_18,or_18,i_19,cr_19,or_19,i_20,cr_20,or_20,i_40,cr_40,or_40,i_60,cr_60,or_60,i_80,cr_80,or_80,i_100,cr_100,or_100,i_120,cr_120,or_120,i_140,cr_140,or_140,i_160,cr_160,or_160,i_180,cr_180,or_180,i_200,cr_200,or_200,i_220,cr_220,or_220,i_240,cr_240,or_240
0,1993-06-03,7.846162,7.912096,7.648360,7.747261,56603000,0.0,0.0,ALL,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1993-06-04,7.681330,7.747264,7.549461,7.549461,12644400,0.0,0.0,ALL,-0.012605,,-8.510280e-03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1993-06-07,7.450561,7.516495,7.384627,7.417594,10043800,0.0,0.0,ALL,-0.017167,-0.025532,-1.310031e-02,-0.012605,,-0.038297,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1993-06-08,7.417593,7.450560,7.285725,7.318692,6974600,0.0,0.0,ALL,-0.004425,-0.017467,-1.329128e-07,-0.017167,-0.042553,-0.017467,-0.012605,,-0.042553,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1993-06-09,7.351659,7.516494,7.351659,7.450560,4043200,0.0,0.0,ALL,-0.013333,-0.013333,4.504510e-03,-0.004425,-0.030568,-0.008889,-0.017167,-0.055319,-0.026201,-0.012605,,-0.051063,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6437,2018-12-24,75.575529,76.004128,73.508752,73.594467,1429300,0.0,0.0,ALL,0.000501,0.000501,-6.261508e-03,-0.009310,-0.013954,-0.005764,-0.007476,-0.020366,-0.020128,0.002336,-0.003868,-0.026500,0.012505,0.006936,-0.010105,-0.005518,-0.009182,0.000631,-0.019348,-0.030947,-0.015386,-0.015532,-0.032004,-0.037014,-0.022283,-0.041186,-0.038065,0.003615,-0.047250,-0.047190,-0.013768,-0.061582,-0.053215,-0.006306,-0.075810,-0.067458,-0.026589,-0.100383,-0.081597,-0.010259,-0.104720,-0.106016,0.008024,-0.097536,-0.110326,0.000905,-0.099775,-0.103187,0.015837,-0.082696,-0.105411,-0.006021,-0.092240,-0.088440,0.000000,-0.085101,-0.097924,0.003431,-0.088633,-0.090830,0.003350,-0.128417,-0.140964,-0.006177,-0.188021,-0.187351,0.000199,-0.196724,-0.203341,-0.005255,-0.152304,-0.153429,0.000000,-0.120401,-0.128096,-4.990987e-03,-0.144479,-0.144845,0.005181,-0.142738,-0.152945,-0.007358,-0.155074,-0.159313,0.010842,-0.141012,-0.161501,-0.062168,-0.159334,-0.101428,-0.003769,-0.193231,-0.194211
6438,2018-12-26,73.861145,77.089891,73.337304,76.956551,2452000,0.0,0.0,ALL,-0.026213,-0.032310,3.623611e-03,0.000501,-0.031825,-0.028804,-0.009310,-0.045814,-0.028317,-0.007476,-0.052018,-0.042356,0.002336,-0.036053,-0.048583,0.012505,-0.025599,-0.032560,-0.005518,-0.041196,-0.022068,-0.019348,-0.062257,-0.037721,-0.015532,-0.063280,-0.058859,-0.022283,-0.072166,-0.059886,0.003615,-0.078034,-0.068804,-0.013768,-0.091903,-0.074693,-0.006306,-0.105671,-0.088612,-0.026589,-0.129450,-0.102431,-0.010259,-0.133647,-0.126296,0.008024,-0.126695,-0.130508,0.000905,-0.128861,-0.123531,0.015837,-0.112334,-0.125705,-0.006021,-0.121570,-0.109118,0.000000,-0.114662,-0.118387,0.000433,-0.163482,-0.155722,0.007143,-0.208653,-0.210211,-0.001697,-0.224224,-0.220712,0.017738,-0.175620,-0.197147,-0.004978,-0.150951,-0.148245,-1.137330e-02,-0.167261,-0.158044,0.002514,-0.175149,-0.177782,0.002788,-0.181350,-0.189719,-0.011464,-0.183481,-0.170327,-0.001209,-0.124982,-0.129831,-0.000397,-0.215333,-0.215225
6439,2018-12-27,76.004106,77.966115,75.404069,77.899445,2851800,0.0,0.0,ALL,0.041908,0.045684,-1.237639e-02,-0.026213,0.011897,0.032742,0.000501,0.012405,-0.000626,-0.009310,-0.002223,-0.000125,-0.007476,-0.008711,-0.014571,0.002336,0.007984,-0.020979,0.012505,0.018916,-0.004491,-0.005518,0.002606,0.006305,-0.019348,-0.019417,-0.009803,-0.015532,-0.020487,-0.031553,-0.022283,-0.029779,-0.032610,0.003615,-0.035914,-0.041787,-0.013768,-0.050417,-0.047846,-0.006306,-0.064815,-0.062170,-0.026589,-0.089680,-0.076389,-0.010259,-0.094069,-0.100946,0.008024,-0.086799,-0.105281,0.000905,-0.089064,-0.098101,0.015837,-0.071782,-0.100338,-0.006021,-0.081440,-0.083270,0.002787,-0.120340,-0.142465,-0.006053,-0.177112,-0.185894,0.007715,-0.188053,-0.202408,0.005332,-0.163500,-0.178150,0.003375,-0.112550,-0.125625,5.367195e-04,-0.122759,-0.135382,0.009635,-0.143324,-0.160070,-0.001453,-0.155762,-0.158236,0.010163,-0.135557,-0.160070,-0.009498,-0.093364,-0.103006,0.011992,-0.182336,-0.202974
6440,2018-12-28,78.318525,79.032849,77.747059,78.118507,2236000,0.0,0.0,ALL,0.024937,0.012252,5.379756e-03,0.041908,0.058496,0.017698,-0.026213,0.024296,0.064190,0.000501,0.024809,0.029806,-0.009310,0.010002,0.030322,-0.007476,0.003435,0.015436,0.002336,0.020334,0.008833,0.012505,0.031400,0.025823,-0.005518,0.014890,0.036948,-0.019348,-0.007403,0.020350,-0.015532,-0.008486,-0.002063,-0.022283,-0.017892,-0.003152,0.003615,-0.024102,-0.012608,-0.013768,-0.038783,-0.018852,-0.006306,-0.053357,-0.033612,-0.026589,-0.078526,-0.048264,-0.010259,-0.082969,-0.073569,0.008024,-0.075610,-0.078036,0.000905,-0.077903,-0.070637,0.015837,-0.060410,-0.072943,0.013910,-0.121080,-0.134397,0.002129,-0.165593,-0.163819,0.003579,-0.182518,-0.181214,0.002751,-0.157655,-0.156136,0.009056,-0.103821,-0.112861,3.426488e-03,-0.113821,-0.113902,-0.004661,-0.139125,-0.131701,0.000931,-0.137245,-0.137535,-0.004337,-0.139125,-0.134493,0.021057,-0.080637,-0.089737,-0.000783,-0.183099,-0.178865


## Indicators computing for LSTM

In [197]:
for ticker, hist in tqdm.tqdm(history_data.items()):
    history_data[ticker] = history_data[ticker][['ticker', 'date', 'i_1', 'cr_1', 'or_1']]

100%|██████████| 497/497 [00:00<00:00, 705.82it/s]


## Output computing

In [102]:
all_dates = [str(d)[:10] for d in pd.date_range(start=period_start_date, end=period_end_date).tolist()]

In [105]:
history_data_agg = pd.concat(list(history_data.values()), axis=0)

In [132]:
outputs = list()
for d in tqdm.tqdm(all_dates):
    data_one_day = history_data_agg.loc[history_data_agg['date'] == d, ['ticker', 'date', 'open', 'close']].copy()
    data_one_day['return'] = data_one_day['close'] / data_one_day['open'] - 1
    data_one_day = data_one_day.sort_values('return', ascending=False, inplace=False).reset_index(drop=True, inplace=False)
    data_one_day['class'] = 0
    data_one_day.loc[:len(data_one_day) // 2, 'class'] = 1
    outputs.append(data_one_day)

100%|██████████| 10593/10593 [01:34<00:00, 112.44it/s]


In [133]:
outputs_df = pd.concat(outputs, axis=0)
outputs_df = outputs_df[['ticker', 'date', 'class']]

In [187]:
for ticker, hist in tqdm.tqdm(history_data.items()):
    history_data[ticker] = pd.merge(hist, outputs_df, how='left', on=['date', 'ticker'])
    if history_data[ticker][history_data[ticker].isnull().any(axis=1)].empty:
        print(ticker)

100%|██████████| 497/497 [04:58<00:00,  1.67it/s]


In [189]:
history_data['MMM']

Unnamed: 0,date,open,high,low,close,volume,dividends,stock splits,ticker,i_1,cr_1,or_1,i_2,cr_2,or_2,i_3,cr_3,or_3,i_4,cr_4,or_4,i_5,cr_5,or_5,i_6,cr_6,or_6,i_7,cr_7,or_7,i_8,cr_8,or_8,i_9,cr_9,or_9,i_10,cr_10,or_10,i_11,cr_11,or_11,i_12,cr_12,or_12,i_13,cr_13,or_13,i_14,cr_14,or_14,i_15,cr_15,or_15,i_16,cr_16,or_16,i_17,cr_17,or_17,i_18,cr_18,or_18,i_19,cr_19,or_19,i_20,cr_20,or_20,i_40,cr_40,or_40,i_60,cr_60,or_60,i_80,cr_80,or_80,i_100,cr_100,or_100,i_120,cr_120,or_120,i_140,cr_140,or_140,i_160,cr_160,or_160,i_180,cr_180,or_180,i_200,cr_200,or_200,i_220,cr_220,or_220,i_240,cr_240,or_240,class
0,1990-01-02,8.252539,8.382500,8.226547,8.369504,1496000,0.0,0.0,MMM,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
1,1990-01-03,8.382499,8.512460,8.369503,8.460476,2631600,0.0,0.0,MMM,0.014173,,0.001553,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2,1990-01-04,8.408493,8.629427,8.408493,8.525458,2538400,0.0,0.0,MMM,0.009302,0.010869,-0.006144,0.014173,,0.004658,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
3,1990-01-05,8.486471,8.525459,8.382502,8.382502,1825600,0.0,0.0,MMM,0.013910,0.007681,-0.004573,0.009302,0.018634,0.003072,0.014173,,0.013975,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0
4,1990-01-08,8.369500,8.616426,8.369500,8.603430,2164400,0.0,0.0,MMM,-0.012251,-0.016768,-0.001551,0.013910,-0.009216,-0.018293,0.009302,0.001553,-0.010753,0.014173,,-5.132749e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7302,2018-12-24,168.257899,169.784231,165.131215,165.232971,2249500,0.0,0.0,MMM,-0.021878,-0.019372,-0.010122,-0.009148,-0.032691,-0.029299,-0.030816,-0.055561,-0.042482,0.006050,-0.047039,-6.512105e-02,-0.015421,-0.062978,-0.056685,-0.020088,-0.090932,-0.072463,0.003475,-0.080376,-0.100134,-0.003938,-0.069432,-0.089685,-0.018881,-0.073467,-0.078851,-0.000202,-0.073093,-0.082846,-0.021617,-0.093398,-0.082476,0.015380,-0.091246,-0.102575,-0.028165,-0.119803,-0.100445,-0.011600,-0.116247,-0.128712,0.017420,-0.101731,-0.125192,0.003385,-0.100763,-0.110823,0.011634,-0.084409,-0.109866,0.007885,-0.080606,-0.093677,0.006496,-0.066311,-0.089913,-0.004854,-0.075192,-0.075762,-0.008469,0.002519,-0.010043,-0.008915,-0.123491,-0.128399,0.006167,-0.116627,-0.129848,-0.018627,-0.122811,-0.109589,-0.003359,-0.047438,-0.058573,0.000597,-0.073685,-0.083700,0.001000,-0.059397,-0.073102,-0.004398,-0.116520,-0.127764,0.014076,-0.206572,-0.230907,-0.044047,-0.191113,-0.162299,0.000581,-0.218234,-0.225698,1
7303,2018-12-26,165.936014,172.365130,163.614122,172.300369,2925500,0.0,0.0,MMM,-0.017978,-0.027918,0.004255,-0.021878,-0.046750,-0.023782,-0.009148,-0.059697,-0.042694,-0.030816,-0.081928,-5.569582e-02,0.006050,-0.073644,-0.078022,-0.015421,-0.089138,-0.069702,-0.020088,-0.116311,-0.085263,0.003475,-0.106051,-0.112551,-0.003938,-0.095412,-0.102247,-0.018881,-0.099334,-0.091563,-0.000202,-0.098971,-0.095502,-0.021617,-0.118709,-0.095137,0.015380,-0.116617,-0.114959,-0.028165,-0.144376,-0.112858,-0.011600,-0.140919,-0.140736,0.017420,-0.126809,-0.137264,0.003385,-0.125868,-0.123093,0.011634,-0.109971,-0.122149,0.007885,-0.106274,-0.106184,0.006496,-0.092378,-0.102472,0.008947,-0.027840,-0.023493,0.005632,-0.144068,-0.142874,0.002432,-0.145492,-0.140876,-0.002039,-0.125596,-0.115382,0.001370,-0.075498,-0.078668,0.010008,-0.100173,-0.108135,0.008700,-0.089766,-0.092599,0.007709,-0.143445,-0.156332,-0.009500,-0.244734,-0.236618,0.002671,-0.177359,-0.182370,0.006522,-0.239619,-0.240071,0
7304,2018-12-27,169.747200,176.407578,169.173671,176.407578,2809000,0.0,0.0,MMM,0.038354,0.042772,-0.014818,-0.017978,0.013660,0.027320,-0.021878,-0.005977,-0.001361,-0.009148,-0.019478,-2.070670e-02,-0.030816,-0.042660,-0.034007,0.006050,-0.034021,-0.056846,-0.015421,-0.050178,-0.048335,-0.020088,-0.078514,-0.064253,0.003475,-0.067814,-0.092169,-0.003938,-0.056720,-0.081628,-0.018881,-0.060811,-0.070698,-0.000202,-0.060432,-0.074728,-0.021617,-0.081014,-0.074354,0.015380,-0.078833,-0.094632,-0.028165,-0.107779,-0.092483,-0.011600,-0.104175,-0.121000,0.017420,-0.089460,-0.117449,0.003385,-0.088480,-0.102953,0.011634,-0.071902,-0.101987,0.007885,-0.068048,-0.085655,-0.010966,0.013960,-0.005581,-0.000989,-0.110000,-0.129303,0.006442,-0.107925,-0.124061,0.006653,-0.081453,-0.102747,0.001877,-0.043331,-0.058080,0.004359,-0.073928,-0.093034,0.006880,-0.057796,-0.079424,0.000607,-0.123974,-0.126284,-0.013823,-0.207339,-0.211754,0.007473,-0.151010,-0.178115,0.005760,-0.210924,-0.229485,1
7305,2018-12-28,177.434393,177.887674,174.437230,175.177261,2311400,0.0,0.0,MMM,0.039237,0.023837,0.005821,0.038354,0.067629,0.029797,-0.017978,0.037823,0.073844,-0.021878,0.017718,4.386383e-02,-0.009148,0.003895,0.023642,-0.030816,-0.019840,0.009739,0.006050,-0.010995,-0.014134,-0.015421,-0.027537,-0.005238,-0.020088,-0.056548,-0.021877,0.003475,-0.045593,-0.051056,-0.003938,-0.034235,-0.040038,-0.018881,-0.038423,-0.028613,-0.000202,-0.038035,-0.032826,-0.021617,-0.059108,-0.032435,0.015380,-0.056874,-0.053631,-0.028165,-0.086511,-0.051385,-0.011600,-0.082820,-0.081194,0.017420,-0.067755,-0.077482,0.003385,-0.066751,-0.062329,0.011634,-0.049778,-0.061319,0.015950,0.033437,0.017438,0.015679,-0.095140,-0.104725,-0.009261,-0.089692,-0.074210,-0.003615,-0.067542,-0.059437,0.012062,-0.021122,-0.030498,0.007854,-0.057447,-0.058937,0.011944,-0.043303,-0.045291,0.003245,-0.092002,-0.095537,-0.018886,-0.180826,-0.164523,0.007183,-0.145867,-0.148953,-0.002283,-0.199252,-0.195480,0


## Split to train / test

In [91]:
history_data = pickle.load(open('data/history_data.pkl', 'rb'))

In [200]:
dataset = dict()
for t, data in tqdm.tqdm(history_data.items()):
    dataset[t] = dict()
    for year in range(1994, 2020):
        dataset[t][year] = dict()
        start_date = str(year - 4) + '-01-01'
        split_date = str(year - 1) + '-01-01'
        end_date = str(year) + '-01-01'
        dataset[t][year]['train'] = data[(data['date'] > start_date) & (data['date'] < split_date)].dropna(axis=0).reset_index(drop=True)
        dataset[t][year]['test'] = data[(data['date'] >= split_date) & (data['date'] < end_date)].reset_index(drop=True)
        if dataset[t][year]['train'].empty or dataset[t][year]['test'].empty:
            del dataset[t][year]

100%|██████████| 497/497 [00:42<00:00, 11.61it/s]


## Normalization

In [81]:
from sklearn.preprocessing import RobustScaler

In [201]:
features_to_scale = [x for x in history_data['AMD'].columns if '_' in x]
scaled_features_name = [x + '_scaled' for x in features_to_scale]

In [202]:
dataset['MMM'].keys()

dict_keys([1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019])

In [203]:
for t, hist in tqdm.tqdm(dataset.items()):
    for y, data in hist.items():
        scaler = RobustScaler().fit(data['train'][features_to_scale])

        scaled_train = scaler.transform(data['train'][features_to_scale]).astype('float32')
        scaled_train = pd.DataFrame(scaled_train, columns=scaled_features_name)
        data['train'] = pd.concat([data['train'][['date', 'ticker', 'class']], scaled_train], axis=1)
        
        scaled_test = scaler.transform(data['test'][features_to_scale]).astype('float32')
        scaled_test = pd.DataFrame(scaled_test, columns=scaled_features_name)
        data['test'] = pd.concat([data['test'][['date', 'ticker', 'class']], scaled_test], axis=1)

        

100%|██████████| 497/497 [01:39<00:00,  5.01it/s]


In [218]:
dataset['AWK'][2013]['test']

Unnamed: 0,date,ticker,class,i_1_scaled,cr_1_scaled,or_1_scaled
0,2012-01-03,AWK,0,-0.835679,-0.685262,0.617648
1,2012-01-04,AWK,0,-1.149249,-0.789391,-0.228518
2,2012-01-05,AWK,0,-0.20894,-0.282806,0.071888
3,2012-01-06,AWK,1,0.523012,0.551494,0.111821
4,2012-01-09,AWK,0,0.289935,0.348565,-0.185675
5,2012-01-10,AWK,0,-0.16495,-0.221301,0.110584
6,2012-01-11,AWK,0,-1.031264,-0.907972,0.973028
7,2012-01-12,AWK,1,0.166201,0.63081,0.068902
8,2012-01-13,AWK,1,0.310258,0.347913,-0.860271
9,2012-01-17,AWK,1,0.289661,-0.103896,0.953352


# Train model

In [270]:
len(data_test_one_year)

992

In [276]:
data_test_one_year[-2:-1]

[    ticker       date  i_1_scaled  i_1_scaled  i_1_scaled
 750    ACN 2008-12-24   -0.450757   -0.450757   -0.450757
 751    ACN 2008-12-26    0.037102    0.037102    0.037102
 752    ACN 2008-12-29    0.600138    0.600138    0.600138
 753    ACN 2008-12-30    0.020065    0.020065    0.020065
 754    ACN 2008-12-31    0.773680    0.773680    0.773680
 755    ACN 2009-01-02    0.859425    0.859425    0.859425
 756    ACN 2009-01-05    1.697288    1.697288    1.697288
 757    ACN 2009-01-06    0.511359    0.511359    0.511359
 758    ACN 2009-01-07    0.703186    0.703186    0.703186
 759    ACN 2009-01-08    1.360084    1.360084    1.360084
 760    ACN 2009-01-09    0.228255    0.228255    0.228255
 761    ACN 2009-01-12    0.301106    0.301106    0.301106
 762    ACN 2009-01-13   -0.966284   -0.966284   -0.966284
 763    ACN 2009-01-14    1.503649    1.503649    1.503649
 764    ACN 2009-01-15   -2.144558   -2.144558   -2.144558
 765    ACN 2009-01-16    0.500070    0.500070    0.5000

In [273]:
y_test_one_year[-2:]

[ticker                    ACN
 date      2009-12-07 00:00:00
 class                       1
 Name: 989, dtype: object,
 ticker                    ACN
 date      2009-12-08 00:00:00
 class                       1
 Name: 990, dtype: object]

In [277]:
data_train_one_year = list()
data_test_one_year = list()
y_train_one_year = list()
y_test_one_year = list()
history_size = 240
year = 2010
for t in tqdm.tqdm(dataset.keys()):
    data_one_year = dataset[t].get(year)
    if not data_one_year:
        continue
        
    train_tmp = data_one_year['train']
    for i in range(history_size, len(train_tmp)):
        one_train_data = train_tmp.loc[i - 240 + 1: i, ['ticker', 'date', 'i_1_scaled', 'i_1_scaled', 'i_1_scaled']]
        y_train = train_tmp.loc[i, ['ticker', 'date', 'class']]
        data_train_one_year.append(one_train_data)        
        y_train_one_year.append(y_train)
    

    test_tmp = data_one_year['test']
    data_tmp = pd.concat([train_tmp, test_tmp], axis=0).reset_index(drop=True)
    for i in range(len(train_tmp), len(data_tmp)):
        one_test_data = data_tmp.loc[i - 240 + 1: i, ['ticker', 'date', 'i_1_scaled', 'i_1_scaled', 'i_1_scaled']]
        y_test = data_tmp.loc[i, ['ticker', 'date', 'class']]
        data_test_one_year.append(one_test_data)        
        y_test_one_year.append(y_test)

100%|██████████| 497/497 [07:36<00:00,  1.09it/s]


In [279]:
dataset_2010 = {'X_train': data_train_one_year, 'X_test': data_test_one_year, 'y_train': y_train_one_year, 'y_test': y_test_one_year}

In [None]:
pickle.dump(dataset_2010, open('data/dataset_2010.pkl', 'wb'))

In [209]:
import tensoflow as tf 

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, x_col, y_col=None, batch_size=32, num_classes=None, shuffle=True):
        self.batch_size = batch_size
        self.df = dataframe
        self.indices = self.df.index.tolist()
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.x_col = x_col
        self.y_col = y_col
        self.on_epoch_end()

    def __len__(self):
        return len(self.indices) // self.batch_size

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
        
        X, y = self.__get_data(batch)
        return X, y

    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        X = # logic
        y = # logic
        
        for i, id in enumerate(batch):
            X[i,] = # logic
            y[i] = # labels

        return X, y

SyntaxError: invalid syntax (<ipython-input-209-d82caec56986>, line 30)

In [207]:
dataset['MMM'][2017]['test'].tail()

Unnamed: 0,date,ticker,class,i_1_scaled,cr_1_scaled,or_1_scaled
247,2016-12-23,MMM,0,0.06236,0.306043,-0.172378
248,2016-12-27,MMM,1,-0.330719,-0.339286,-0.151296
249,2016-12-28,MMM,1,0.032306,-0.012011,-0.097723
250,2016-12-29,MMM,1,-0.609324,-0.545704,0.170531
251,2016-12-30,MMM,1,-0.052356,0.073295,0.57732


In [166]:
dataset['AXP'][2017]['train']

Unnamed: 0,date,open,high,low,close,volume,dividends,stock splits,ticker,i_1,cr_1,or_1,i_2,cr_2,or_2,i_3,cr_3,or_3,i_4,cr_4,or_4,i_5,cr_5,or_5,i_6,cr_6,or_6,i_7,cr_7,or_7,i_8,cr_8,or_8,i_9,cr_9,or_9,i_10,cr_10,or_10,i_11,cr_11,or_11,i_12,cr_12,or_12,i_13,cr_13,or_13,i_14,cr_14,or_14,i_15,cr_15,or_15,i_16,cr_16,or_16,i_17,cr_17,or_17,i_18,cr_18,or_18,i_19,cr_19,or_19,i_20,cr_20,or_20,i_40,cr_40,or_40,i_60,cr_60,or_60,i_80,cr_80,or_80,i_100,cr_100,or_100,i_120,cr_120,or_120,i_140,cr_140,or_140,i_160,cr_160,or_160,i_180,cr_180,or_180,i_200,cr_200,or_200,i_220,cr_220,or_220,i_240,cr_240,or_240
10480,2013-12-16,74.986048,75.888097,74.986048,75.486191,3599000,0.0,0.0,AXP,0.004321,0.004562,0.003346,-0.005729,-0.002622,0.007923,-0.016182,-0.018877,0.000715,-0.001054,-0.024140,-0.015594,-0.004874,-0.026184,-0.020874,0.003972,-0.010173,-0.022926,0.003204,-0.009704,-0.006861,0.003444,-0.010992,-0.006390,-0.003298,-0.018877,-0.007682,-0.008486,-0.024709,-0.015594,-0.000931,-0.022316,-0.021445,0.003870,-0.016571,-0.019044,0.004012,-0.010758,-0.013280,0.003202,-0.002028,-0.007448,-0.000834,-0.003216,0.001312,0.015975,0.017386,0.000119,-0.008558,0.014795,0.020790,0.000971,0.016274,0.018190,-0.006156,0.010628,0.019675,-0.002770,0.021859,0.014010,0.005118,0.043001,0.042722,-0.003865,0.082045,0.089176,0.002715,0.142896,0.140199,0.009459,0.113777,0.111457,0.003531,0.149668,0.143057,0.000396,0.105439,0.113965,0.007808,0.244865,0.234790,0.003859,0.255425,0.252550,0.010768,0.353854,0.347153,0.009776,0.435809,0.414163,0.003401,0.437275,0.435974
10481,2013-12-17,75.655879,75.664812,75.012834,75.120010,2893700,0.0,0.0,AXP,0.006670,0.010038,0.002248,0.004321,0.014646,0.012309,-0.005729,0.007390,0.016927,-0.016182,-0.009028,0.009654,-0.001054,-0.014344,-0.006800,-0.004874,-0.016409,-0.012128,0.003972,-0.000237,-0.014198,0.003204,0.000237,0.002011,0.003444,-0.001064,0.002485,-0.003298,-0.009028,0.001182,-0.008486,-0.014918,-0.006800,-0.000931,-0.012502,-0.012704,0.003870,-0.006699,-0.010282,0.004012,-0.000828,-0.004466,0.003202,0.007990,0.001418,-0.000834,0.006790,0.010256,0.015975,0.027599,0.009053,-0.008558,0.024982,0.029909,0.000971,0.026476,0.027286,-0.006156,0.020773,0.028783,-0.000870,0.049677,0.053607,-0.010998,0.096441,0.111557,-0.002978,0.147804,0.153664,-0.002119,0.118870,0.127786,0.009949,0.150681,0.134538,0.001579,0.121395,0.119340,0.000440,0.243025,0.248192,-0.020840,0.260904,0.286439,0.013595,0.356139,0.333114,-0.000841,0.423595,0.438803,0.008971,0.445552,0.433975
10482,2013-12-18,75.289695,76.915166,74.619858,76.799057,5332300,0.0,0.0,AXP,-0.007083,-0.004851,0.002259,0.006670,0.005139,-0.002603,0.004321,0.009724,0.007409,-0.005729,0.002503,0.012005,-0.016182,-0.013835,0.004767,-0.001054,-0.019125,-0.011607,-0.004874,-0.021180,-0.016909,0.003972,-0.005086,-0.018969,0.003204,-0.004615,-0.002839,0.003444,-0.005910,-0.002367,-0.003298,-0.013835,-0.003664,-0.008486,-0.019697,-0.011607,-0.000931,-0.017292,-0.017483,0.003870,-0.011517,-0.015072,0.004012,-0.005675,-0.009284,0.003202,0.003101,-0.003428,-0.000834,0.001906,0.005367,0.015975,0.022614,0.004169,-0.008558,0.020009,0.024924,0.000971,0.021496,0.022314,0.001238,0.046144,0.042414,-0.004189,0.103683,0.111557,-0.002034,0.145493,0.148704,-0.001331,0.119798,0.126514,-0.003333,0.126502,0.134484,-0.005517,0.111411,0.120249,0.013883,0.239351,0.222456,0.004667,0.277327,0.271003,0.003726,0.323671,0.315786,0.014721,0.428612,0.402333,0.009784,0.423818,0.421075
10483,2013-12-19,76.754406,77.281341,76.727613,77.174171,3956300,0.0,0.0,AXP,0.020047,0.022352,-0.000581,-0.007083,0.017392,0.021757,0.006670,0.027605,0.016801,0.004321,0.032293,0.027008,-0.005729,0.024910,0.031693,-0.016182,0.008207,0.024315,-0.001054,0.002799,0.007621,-0.004874,0.000698,0.002216,0.003972,0.017151,0.000116,0.003204,0.017633,0.016560,0.003444,0.016310,0.017041,-0.003298,0.008207,0.015719,-0.008486,0.002214,0.007621,-0.000931,0.004673,0.001632,0.003870,0.010577,0.004089,0.004012,0.016550,0.009989,0.003202,0.025521,0.015959,-0.000834,0.024300,0.024925,0.015975,0.045471,0.023704,-0.008558,0.042808,0.044863,0.000869,0.063311,0.065856,-0.001839,0.133840,0.134374,-0.012225,0.171733,0.198736,-0.003842,0.149097,0.146444,0.001456,0.157228,0.143099,0.007643,0.142707,0.130692,0.004434,0.246963,0.231155,-0.007146,0.296483,0.324105,-0.002626,0.342164,0.343253,0.010512,0.430446,0.431975,0.008713,0.449564,0.440539
10484,2013-12-20,77.442124,78.397759,77.200986,78.210205,6294500,0.0,0.0,AXP,0.005469,0.004884,0.003472,0.020047,0.027345,0.008373,-0.007083,0.022361,0.030912,0.006670,0.032624,0.025911,0.004321,0.037335,0.036210,-0.005729,0.029916,0.040937,-0.016182,0.013132,0.033492,-0.001054,0.007697,0.016650,-0.004874,0.005586,0.011196,0.003972,0.022120,0.009077,0.003204,0.022604,0.025668,0.003444,0.021274,0.026154,-0.003298,0.013132,0.024820,-0.008486,0.007110,0.016650,-0.000931,0.009580,0.010606,0.003870,0.015513,0.013086,0.004012,0.021515,0.019039,0.003202,0.030530,0.025062,-0.000834,0.029303,0.034109,0.015975,0.050577,0.032877,0.001610,0.071685,0.071817,0.004211,0.140578,0.139589,0.004324,0.205291,0.207797,-0.018363,0.152713,0.178982,-0.010738,0.149351,0.165552,-0.005752,0.136875,0.146973,-0.002279,0.237888,0.245201,0.000610,0.331346,0.329859,0.000619,0.350599,0.352356,0.026090,0.439807,0.408070,-0.002648,0.448417,0.451999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10990,2015-12-24,63.838292,64.543135,63.838292,64.222755,1956400,0.0,0.0,AXP,0.013324,0.016267,-0.003287,0.001600,0.022953,0.012927,0.003668,0.030941,0.019591,-0.018226,0.005750,0.027553,-0.017234,-0.010465,0.002444,0.000000,-0.002566,-0.013718,0.005735,0.016563,-0.005845,-0.002319,0.016120,0.013221,-0.002607,-0.001997,0.012780,0.005305,0.001574,-0.005277,0.006048,0.000858,-0.001718,-0.001856,-0.009064,-0.002432,-0.002965,-0.015893,-0.012321,0.008511,-0.006390,-0.019128,-0.013864,-0.017275,-0.009656,-0.013030,-0.030886,-0.020505,0.002778,-0.023311,-0.034072,-0.002923,-0.026165,-0.026521,0.000696,-0.023992,-0.029366,-0.000139,-0.023174,-0.027201,0.001345,-0.054587,-0.063264,0.003248,-0.039195,-0.055659,-0.011356,-0.084530,-0.054000,-0.000660,-0.075614,-0.075489,-0.000129,-0.094795,-0.097305,-0.007806,-0.112009,-0.108414,0.001141,-0.101140,-0.110333,0.002140,-0.109429,-0.117152,0.018227,-0.109714,-0.135597,0.001630,-0.175987,-0.180319,-0.011521,-0.209685,-0.209899
10991,2015-12-28,64.076282,64.167818,63.490443,63.856594,2860700,0.0,0.0,AXP,0.006022,0.002716,-0.002281,0.013324,0.019027,0.000429,0.001600,0.025731,0.016703,0.003668,0.033741,0.023392,-0.018226,0.008481,0.031384,-0.017234,-0.007778,0.006181,0.000000,0.000142,-0.010041,0.005735,0.019323,-0.002139,-0.002319,0.018879,0.016998,-0.002607,0.000713,0.016556,0.005305,0.004294,-0.001569,0.006048,0.003576,0.002004,-0.001856,-0.006373,0.001287,-0.002965,-0.013221,-0.008639,0.008511,-0.003692,-0.015471,-0.013864,-0.014606,-0.005964,-0.013030,-0.028255,-0.016854,0.002778,-0.020659,-0.030471,-0.002923,-0.023521,-0.022892,0.000696,-0.021342,-0.025748,0.000944,-0.057622,-0.056858,-0.003641,-0.049972,-0.049060,0.000534,-0.048303,-0.062384,-0.009954,-0.069921,-0.070446,-0.012485,-0.091869,-0.074609,0.003676,-0.103045,-0.109160,0.008223,-0.104975,-0.115084,-0.005770,-0.111835,-0.110283,-0.011043,-0.130391,-0.122040,0.005623,-0.175383,-0.120731,-0.004232,-0.205140,-0.187280
10992,2015-12-29,64.332580,64.634655,64.186123,64.579735,3838800,0.0,0.0,AXP,-0.003429,-0.005701,0.007454,0.006022,-0.003001,0.001710,0.013324,0.013217,0.004430,0.001600,0.019883,0.020770,0.003668,0.027847,0.027485,-0.018226,0.002731,0.035509,-0.017234,-0.013435,0.010205,0.000000,-0.005560,-0.006081,0.005735,0.013512,0.001853,-0.002319,0.013070,0.021066,-0.002607,-0.004992,0.020622,0.005305,-0.001432,0.002425,0.006048,-0.002146,0.006012,-0.001856,-0.012038,0.005292,-0.002965,-0.018847,-0.004674,0.008511,-0.009372,-0.011533,-0.013864,-0.020225,-0.001988,-0.013030,-0.033795,-0.012921,0.002778,-0.026242,-0.026593,-0.002923,-0.029088,-0.018984,-0.011069,-0.060092,-0.040677,0.023521,-0.052321,-0.051929,-0.001592,-0.065598,-0.062636,-0.008592,-0.073633,-0.059387,-0.003784,-0.077782,-0.075897,0.012500,-0.112214,-0.116973,-0.000884,-0.118118,-0.104579,-0.001382,-0.113334,-0.109197,0.007666,-0.125050,-0.128262,-0.017120,-0.123746,-0.090079,-0.018514,-0.190066,-0.172722
10993,2015-12-30,64.543135,64.762824,64.149524,64.222755,2792700,0.0,0.0,AXP,0.003842,0.011324,-0.000567,-0.003429,0.005558,0.010751,0.006022,0.008289,0.004989,0.013324,0.024691,0.007718,0.001600,0.031433,0.024111,0.003668,0.039487,0.030848,-0.018226,0.014086,0.038898,-0.017234,-0.002263,0.013512,0.000000,0.005702,-0.002828,0.005735,0.024989,0.005132,-0.002319,0.024543,0.024408,-0.002607,0.006276,0.023962,0.005305,0.009877,0.005706,0.006048,0.009154,0.009304,-0.001856,-0.000850,0.008583,-0.002965,-0.007736,-0.001416,0.008511,0.001846,-0.008298,-0.013864,-0.009129,0.001278,-0.013030,-0.022853,-0.009691,0.002778,-0.015215,-0.023407,0.006945,-0.036992,-0.046389,0.029980,-0.048286,-0.084387,-0.002692,-0.059035,-0.044589,0.062933,-0.055773,-0.112182,0.000000,-0.072347,-0.084505,-0.000498,-0.113581,-0.115297,0.004528,-0.101138,-0.110422,0.004029,-0.105775,-0.109195,-0.001724,-0.124913,-0.120662,0.024883,-0.086584,-0.098645,0.005845,-0.169544,-0.171558


In [148]:
dataset['ABBV'][2015]['train'].tail()

Unnamed: 0,date,open,high,low,close,volume,dividends,stock splits,ticker,i_1,cr_1,or_1,i_2,cr_2,or_2,i_3,cr_3,or_3,i_4,cr_4,or_4,i_5,cr_5,or_5,i_6,cr_6,or_6,i_7,cr_7,or_7,i_8,cr_8,or_8,i_9,cr_9,or_9,i_10,cr_10,or_10,i_11,cr_11,or_11,i_12,cr_12,or_12,i_13,cr_13,or_13,i_14,cr_14,or_14,i_15,cr_15,or_15,i_16,cr_16,or_16,i_17,cr_17,or_17,i_18,cr_18,or_18,i_19,cr_19,or_19,i_20,cr_20,or_20,i_40,cr_40,or_40,i_60,cr_60,or_60,i_80,cr_80,or_80,i_100,cr_100,or_100,i_120,cr_120,or_120,i_140,cr_140,or_140,i_160,cr_160,or_160,i_180,cr_180,or_180,i_200,cr_200,or_200,i_220,cr_220,or_220,i_240,cr_240,or_240,i_1_scaled,cr_1_scaled,or_1_scaled,i_2_scaled,cr_2_scaled,or_2_scaled,i_3_scaled,cr_3_scaled,or_3_scaled,i_4_scaled,cr_4_scaled,or_4_scaled,i_5_scaled,cr_5_scaled,or_5_scaled,i_6_scaled,cr_6_scaled,or_6_scaled,i_7_scaled,cr_7_scaled,or_7_scaled,i_8_scaled,cr_8_scaled,or_8_scaled,i_9_scaled,cr_9_scaled,or_9_scaled,i_10_scaled,cr_10_scaled,or_10_scaled,i_11_scaled,cr_11_scaled,or_11_scaled,i_12_scaled,cr_12_scaled,or_12_scaled,i_13_scaled,cr_13_scaled,or_13_scaled,i_14_scaled,cr_14_scaled,or_14_scaled,i_15_scaled,cr_15_scaled,or_15_scaled,i_16_scaled,cr_16_scaled,or_16_scaled,i_17_scaled,cr_17_scaled,or_17_scaled,i_18_scaled,cr_18_scaled,or_18_scaled,i_19_scaled,cr_19_scaled,or_19_scaled,i_20_scaled,cr_20_scaled,or_20_scaled,i_40_scaled,cr_40_scaled,or_40_scaled,i_60_scaled,cr_60_scaled,or_60_scaled,i_80_scaled,cr_80_scaled,or_80_scaled,i_100_scaled,cr_100_scaled,or_100_scaled,i_120_scaled,cr_120_scaled,or_120_scaled,i_140_scaled,cr_140_scaled,or_140_scaled,i_160_scaled,cr_160_scaled,or_160_scaled,i_180_scaled,cr_180_scaled,or_180_scaled,i_200_scaled,cr_200_scaled,or_200_scaled,i_220_scaled,cr_220_scaled,or_220_scaled,i_240_scaled,cr_240_scaled,or_240_scaled
247,2013-12-24,39.233028,39.240462,38.861362,38.92083,1747000.0,0.0,0.0,ABBV,-0.002845,-0.00019,0.003803,-0.003411,-0.00095,0.003613,-0.0109,-0.032032,0.002849773,0.029178,-0.019578,-0.02835,0.003743,-0.014802,-0.015849,-0.000936,0.00401,-0.011055,0.001338,0.003627,0.007829,-0.003234,-0.001709,0.007444,0.0,0.008439,0.002088,0.017366,0.026753,0.012274,-0.004665,0.023954,0.030658,0.02352,0.056672,0.027848,0.00688,0.058587,0.060691,-0.001608,0.052232,0.062613,0.017097,0.086588,0.056234,-0.002062,0.085243,0.090721,0.000413,0.089741,0.08937,-0.011878,0.081893,0.093886,0.004547,0.08614,0.086008,-0.012041,0.074377,0.090271,0.005903,0.066532,0.067989,0.004717,0.196788,0.190596,-0.003508,0.244805,0.249833,0.004655,0.172455,0.175096,0.00636,0.267851,0.258075,0.006478,0.24399,0.235517,-0.03377,0.192192,0.235801,0.022743,0.269962,0.243521,-0.014058,0.434481,0.459306,-0.00775,0.490672,0.513037,0.00774,0.60824,0.602434,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
248,2013-12-26,39.091796,39.441163,38.905963,39.38913,2401500.0,0.0,0.0,ABBV,-0.007958,-0.004184,0.004393,-0.002845,-0.004374,0.00019,-0.003411,-0.00513,-1.209058e-07,-0.0109,-0.036082,-0.00076,0.029178,-0.023681,-0.031848,0.003743,-0.018925,-0.019392,-0.000936,-0.000191,-0.014615,0.001338,-0.000573,0.004201,-0.003234,-0.005886,0.003817,0.0,0.004219,-0.001519,0.017366,0.022456,0.00863,-0.004665,0.019669,0.026948,0.02352,0.052251,0.024148,0.00688,0.054157,0.056873,-0.001608,0.047829,0.058788,0.017097,0.082042,0.052431,-0.002062,0.080702,0.086795,0.000413,0.085181,0.085449,-0.011878,0.077366,0.089948,0.004547,0.081595,0.082099,0.006265,0.05949,0.056236,0.022093,0.181122,0.158595,-0.009535,0.239887,0.245918,-0.007547,0.165746,0.186841,0.018182,0.248064,0.225996,0.006404,0.225685,0.21708,0.020766,0.225967,0.210477,-0.005067,0.233626,0.239618,-0.000807,0.447693,0.454053,-0.009785,0.500997,0.525893,0.002352,0.589682,0.585424,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
249,2013-12-27,39.337105,39.433739,38.935708,39.062073,2354100.0,0.0,0.0,ABBV,0.007606,0.012032,-0.001321,-0.007958,0.007797,0.010695,-0.002845,0.007606,0.006466199,-0.003411,0.00684,0.006275,-0.0109,-0.024484,0.00551,0.029178,-0.011933,-0.025773,0.003743,-0.00712,-0.013239,-0.000936,0.011839,-0.008432,0.001338,0.011452,0.010502,-0.003234,0.006075,0.010116,0.0,0.016302,0.004747,0.017366,0.034759,0.01496,-0.004665,0.031938,0.033392,0.02352,0.064912,0.030575,0.00688,0.066841,0.063505,-0.001608,0.060436,0.065432,0.017097,0.095061,0.059036,-0.002062,0.093705,0.093615,0.000413,0.098238,0.09226,-0.011878,0.090329,0.096787,-0.015534,0.06427,0.084426,0.013454,0.167408,0.16206,0.012441,0.255395,0.238039,0.010356,0.195869,0.189765,-0.015455,0.23532,0.243942,-0.009249,0.226337,0.226948,0.018095,0.219684,0.197235,0.002793,0.249047,0.250581,0.003473,0.465112,0.447984,-0.004235,0.537499,0.541998,0.026402,0.597483,0.571857,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
250,2013-12-30,39.195871,39.41887,39.099237,39.404003,2898800.0,0.0,0.0,ABBV,-0.006992,-0.008303,0.003425,0.007606,0.003629,-0.004906,-0.007958,-0.000571,0.00706669,-0.002845,-0.00076,0.002853,-0.003411,-0.00152,0.002662,-0.0109,-0.032584,0.0019,0.029178,-0.020138,-0.029271,0.003743,-0.015364,-0.016781,-0.000936,0.003437,-0.011992,0.001338,0.003054,0.006874,-0.003234,-0.002278,0.00649,0.0,0.007864,0.001139,0.017366,0.026167,0.011316,-0.004665,0.023369,0.029682,0.02352,0.056069,0.026875,0.00688,0.057983,0.059687,-0.001608,0.051631,0.061607,0.017097,0.085968,0.055233,-0.002062,0.084623,0.089688,0.000413,0.089119,0.088339,-0.004725,0.076845,0.088339,0.008991,0.153935,0.156378,0.011155,0.229383,0.222821,-0.002908,0.181446,0.193739,0.012684,0.235244,0.222821,0.006465,0.218369,0.231798,-0.005582,0.188865,0.205525,-0.033357,0.241838,0.295706,0.028312,0.43786,0.407559,0.043367,0.531217,0.481843,0.031041,0.560868,0.524789,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
251,2013-12-31,39.389129,39.441162,38.920829,39.255329,3019700.0,0.0,0.0,ABBV,0.00531,0.008754,-0.000377,-0.006992,0.000378,0.008373,0.007606,0.012414,-1.620723e-08,-0.007958,0.008178,0.012032,-0.002845,0.007986,0.007797,-0.003411,0.00722,0.007606,-0.0109,-0.024116,0.00684,0.029178,-0.01156,-0.024484,0.003743,-0.006745,-0.011934,-0.000936,0.012221,-0.00712,0.001338,0.011834,0.011839,-0.003234,0.006455,0.011452,0.0,0.016686,0.006075,0.017366,0.035149,0.016302,-0.004665,0.032328,0.034759,0.02352,0.065314,0.031938,0.00688,0.067244,0.064912,-0.001608,0.060837,0.066841,0.017097,0.095474,0.060436,-0.002062,0.094118,0.095061,0.006579,0.094118,0.082312,0.013426,0.162518,0.142463,0.00827,0.229314,0.218211,0.004458,0.200077,0.186579,-0.003641,0.229314,0.221271,-0.025962,0.238339,0.261604,0.028578,0.211927,0.189831,0.018719,0.302587,0.271115,0.002891,0.415033,0.427102,0.017601,0.489712,0.449127,0.018741,0.532886,0.495283,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [96]:
pickle.dump(dataset, open('data/dataset.pkl', 'wb'))

KeyboardInterrupt: 

## Output

In [79]:
history_data['ABT'][240:250]

Unnamed: 0,date,open,high,low,close,volume,dividends,stock splits,ticker,i_1,cr_1,or_1,i_2,cr_2,or_2,i_3,cr_3,or_3,i_4,cr_4,or_4,i_5,cr_5,or_5,i_6,cr_6,or_6,i_7,cr_7,or_7,i_8,cr_8,or_8,i_9,cr_9,or_9,i_10,cr_10,or_10,i_11,cr_11,or_11,i_12,cr_12,or_12,i_13,cr_13,or_13,i_14,cr_14,or_14,i_15,cr_15,or_15,i_16,cr_16,or_16,i_17,cr_17,or_17,i_18,cr_18,or_18,i_19,cr_19,or_19,i_20,cr_20,or_20,i_40,cr_40,or_40,i_60,cr_60,or_60,i_80,cr_80,or_80,i_100,cr_100,or_100,i_120,cr_120,or_120,i_140,cr_140,or_140,i_160,cr_160,or_160,i_180,cr_180,or_180,i_200,cr_200,or_200,i_220,cr_220,or_220,i_240,cr_240,or_240
2716,1990-12-12,2.349335,2.401982,2.342754,2.395401,7201590,0.0,0.0,ABT,-0.00554,-0.011019,-0.005570478,0.013966,0.0,-0.01652858,0.002793,-0.002778,-0.005570478,0.002785,0.008427,-0.008333,0.011363,0.002793,0.002809,0.017045,0.019886,-0.002793019,-0.00565,0.028653,0.014205,0.035608,0.06213,0.02292285,-0.008798,0.049707,0.056214,0.0,0.055882,0.04386006,0.005917,0.065281,0.05,0.0,0.058996,0.059347,0.005935,0.058996,0.053097,0.01497,0.07485,0.053097,-0.014749,0.06213,0.068862,-0.011696,0.06213,0.056214,-0.014577,0.043605,0.05621355,0.011765,0.046647,0.037791,0.020833,0.065281,0.040817,-0.002959,0.06213,0.059347,-0.009146,0.091186,0.098462,0.034162,0.117124,0.077542,-0.021807,0.124086,0.142743,-0.014124,0.013571,0.028141,-0.015337,0.112457,0.123491,0.006849,0.24199,0.226669,0.009294,0.353213,0.328327,0.009634,0.40632,0.385143,0.011834,0.436771,0.414843,0.001945,0.414497,0.409348,0.014652,,0.316851
2717,1990-12-13,2.408561,2.441465,2.369076,2.375657,3946528,0.0,0.0,ABT,0.019608,0.013928,0.005493841,-0.00554,0.002755,0.01949868,0.013966,0.013928,0.008264324,0.002793,0.011111,0.019499,0.002785,0.022472,0.016666,0.011363,0.01676,0.02808962,0.017045,0.034092,0.022346,-0.00565,0.04298,0.03977278,0.035608,0.076924,0.04871,-0.008798,0.064328,0.08284025,0.0,0.070588,0.070175,0.005917,0.080119,0.07647,0.0,0.073746,0.086053,0.005935,0.073746,0.079645,0.01497,0.089821,0.079645,-0.014749,0.076924,0.095808,-0.011696,0.076924,0.08284025,-0.014577,0.05814,0.08284,0.011765,0.061225,0.063953,0.020833,0.080119,0.067055,0.012384,0.120001,0.119265,-0.020833,0.09867,0.118137,-0.003268,0.16515,0.206122,-0.025641,0.048301,0.075635,0.0,0.145521,0.155413,0.001712,0.250722,0.264042,-0.01105,0.354372,0.377029,-0.003788,0.412302,0.414661,0.007812,0.442585,0.442078,0.011673,0.436983,0.430984,0.001802,0.342672,0.345192
2718,1990-12-14,2.382238,2.3954,2.336173,2.355915,4375131,0.0,0.0,ABT,-0.013661,-0.008242,0.00277015,0.019608,0.005571,-0.00549511,-0.00554,-0.00551,0.008356666,0.013966,0.005571,-0.002755,0.002793,0.002777,0.008357,0.002785,0.014045,0.005555042,0.011363,0.00838,0.016854,0.017045,0.025568,0.01117302,-0.00565,0.034384,0.028409,0.035608,0.068047,0.03724904,-0.008798,0.055555,0.071006,0.0,0.061764,0.058479,0.005917,0.071216,0.064705,0.0,0.064896,0.074184,0.005935,0.064896,0.067846,0.01497,0.080838,0.067846,-0.014749,0.068047,0.08383194,-0.011696,0.068047,0.071006,-0.014577,0.049419,0.071006,0.011765,0.052478,0.052326,0.042683,0.103975,0.058479,-0.009375,0.102862,0.147781,-0.04902,0.189645,0.250332,0.023529,0.06094,0.045537,-0.006173,0.139629,0.135689,0.011905,0.246773,0.229216,0.007547,0.358217,0.369631,-0.001898,0.395335,0.399201,0.003861,0.422377,0.415345,0.003846,0.411435,0.409923,-0.003591,0.326815,0.332887
2719,1990-12-17,2.336173,2.349335,2.323012,2.336173,4677202,0.0,0.0,ABT,-0.01105,-0.00831,-0.008379763,-0.013661,-0.016484,-0.01662015,0.019608,-0.002785,-0.02472559,-0.00554,-0.013774,-0.011142,0.013966,-0.002785,-0.022038,0.002793,-0.005556,-0.01114166,0.002785,0.005618,-0.013889,0.011363,0.0,-0.002808913,0.017045,0.017046,-0.00838,-0.00565,0.025788,0.008523117,0.035608,0.059172,0.017192,-0.008798,0.046784,0.050296,0.0,0.052941,0.038012,0.005917,0.062314,0.044118,0.0,0.056047,0.053412,0.005935,0.056047,0.047197,0.01497,0.071856,0.04719719,-0.014749,0.059172,0.062874,-0.011696,0.059172,0.050296,-0.014577,0.040698,0.050296,-0.005747,0.046784,0.026012,-0.009585,0.135099,0.151003,0.021429,0.236516,0.247591,0.0,0.033984,0.037242,0.018692,0.12314,0.096698,-0.005033,0.215634,0.209512,0.009328,0.354497,0.325769,-0.001908,0.38374,0.380016,-0.003868,0.399706,0.401452,0.011538,0.394344,0.372145,-0.007233,0.318159,0.321399
2720,1990-12-18,2.336172,2.362495,2.329592,2.362495,3968805,0.0,0.0,ABT,0.0,-0.00838,-5.298107e-07,-0.01105,-0.01662,-0.008380288,-0.013661,-0.024726,-0.01662067,0.019608,-0.011142,-0.024726,-0.00554,-0.022038,-0.011142,0.013966,-0.011142,-0.02203889,0.002793,-0.013889,-0.011142,0.002785,-0.002809,-0.01388963,0.011363,-0.00838,-0.002809,0.017045,0.008523,-0.008380288,-0.00565,0.017192,0.008523,0.035608,0.050296,0.017191,-0.008798,0.038012,0.050296,0.0,0.044118,0.038011,0.005917,0.053412,0.044117,0.0,0.047197,0.053412,0.005935,0.047197,0.04719664,0.01497,0.062874,0.047197,-0.014749,0.050296,0.062874,-0.011696,0.050296,0.050296,0.037791,0.026012,-0.005602,0.013158,0.151003,0.158477,0.013605,0.247591,0.197352,-0.008696,0.037242,0.043307,0.021277,0.096698,0.067322,0.013537,0.209512,0.197396,0.001845,0.325769,0.320884,0.019011,0.380016,0.346545,0.007752,0.401452,0.387976,-0.003802,0.372145,0.377382,0.009191,0.321399,0.314659
2721,1990-12-19,2.362497,2.382239,2.336174,2.369078,6101124,0.0,0.0,ABT,0.011268,0.011267,6.462212e-07,0.0,0.002793,0.01126785,-0.01105,-0.00554,0.002793666,-0.013661,-0.013737,-0.00554,0.019608,0.0,-0.013736,-0.00554,-0.011019,6.462212e-07,0.013966,0.0,-0.011019,0.002793,-0.002778,6.462212e-07,0.002785,0.008427,-0.002778,0.011363,0.002793,0.008427288,0.017045,0.019886,0.002794,-0.00565,0.028653,0.019887,0.035608,0.06213,0.028654,-0.008798,0.049707,0.062131,0.0,0.055882,0.049708,0.005917,0.065281,0.055882,0.0,0.058996,0.0652821,0.005935,0.058996,0.058997,0.01497,0.07485,0.058997,-0.014749,0.06213,0.07485,-0.022409,0.005602,0.028654,0.029221,0.17153,0.13827,0.012903,0.210844,0.149145,0.002968,0.055062,0.067549,-0.023809,0.079348,0.105674,0.0,0.210888,0.192964,-0.00734,0.335767,0.340707,-0.003731,0.361717,0.366818,-0.005758,0.403616,0.409036,-0.007663,0.392902,0.409036,-0.001825,0.329472,0.334334
2722,1990-12-20,2.329593,2.349335,2.30985,2.323012,4878583,0.0,0.0,ABT,0.002785,0.002786,-0.01666686,0.011268,0.014085,-0.01392714,0.0,0.005587,-0.002816867,-0.01105,-0.00277,-0.011173,-0.013661,-0.010989,-0.01939,0.019608,0.002786,-0.02747281,-0.00554,-0.008264,-0.013927,0.013966,0.002786,-0.02479316,0.002793,0.0,-0.013927,0.002785,0.011236,-0.01666686,0.011363,0.005587,-0.005618,0.017045,0.022728,-0.011173,-0.00565,0.031519,0.005682,0.035608,0.065089,0.014327,-0.008798,0.052632,0.047338,0.0,0.058824,0.035088,0.005917,0.068249,0.04117637,0.0,0.061947,0.050445,0.005935,0.061947,0.044247,0.01497,0.077844,0.044247,0.00867,0.031519,0.014327,-0.022013,0.14144,0.144071,0.018987,0.152346,0.104988,0.023529,0.070523,0.022431,0.0,0.108754,0.090275,-0.003268,0.196287,0.172492,0.001838,0.344442,0.31233,0.007491,0.370625,0.337761,0.003876,0.412961,0.389411,0.003876,0.412961,0.389411,-0.007353,0.338051,0.332806
2723,1990-12-21,2.362497,2.362497,2.283527,2.296689,8954315,0.0,0.0,ABT,-0.002825,-0.019445,0.01699719,0.002785,-0.016713,-0.00277793,0.011268,-0.005634,4.777418e-07,0.0,-0.013966,0.011268,-0.01105,-0.02216,0.002793,-0.013661,-0.03022,-0.005539743,0.019608,-0.016713,-0.013737,-0.00554,-0.027548,4.777418e-07,0.013966,-0.016713,-0.011019,0.002793,-0.019445,4.777418e-07,0.002785,-0.008427,-0.002778,0.011363,-0.013966,0.008427,0.017045,0.002841,0.002793,-0.00565,0.011461,0.019887,0.035608,0.044379,0.028653,-0.008798,0.032164,0.062131,0.0,0.038235,0.04970793,0.005917,0.047478,0.055882,0.0,0.041298,0.065282,0.005935,0.041298,0.058997,0.0,0.011461,0.028653,-0.034921,0.140839,0.186946,0.031646,0.101867,0.106845,-0.005781,0.019543,0.048929,0.015198,0.087195,0.085811,-0.003289,0.16918,0.196901,0.005505,0.308623,0.32358,0.007449,0.333982,0.340707,0.001938,0.385487,0.411761,-0.003861,0.385487,0.414498,0.007394,0.329041,0.339229
2724,1990-12-24,2.283527,2.309851,2.283527,2.30327,1104921,0.0,0.0,ABT,-0.027855,-0.011332,-0.005730482,-0.002825,-0.030556,-0.01699709,0.002785,-0.027855,-0.03611121,0.011268,-0.016901,-0.033426,0.0,-0.02514,-0.022535,-0.01105,-0.033241,-0.03072601,-0.013661,-0.041209,-0.038781,0.019608,-0.027855,-0.04670349,-0.00554,-0.038567,-0.033426,0.013966,-0.027855,-0.04407682,0.002793,-0.030556,-0.033426,0.002785,-0.019663,-0.036111,0.011363,-0.02514,-0.025281,0.017045,-0.008522,-0.030726,-0.00565,0.0,-0.014204,0.035608,0.032545,-0.00573,-0.008798,0.020468,0.02662771,0.0,0.02647,0.01462,0.005917,0.035608,0.020588,0.0,0.029498,0.029674,-0.002907,0.0,0.011662,0.050336,0.153883,0.114282,-0.018349,0.076014,0.086512,-0.020649,0.019711,0.050513,-0.006024,0.055566,0.062238,0.019737,0.163561,0.13077,0.005464,0.286711,0.270068,0.005535,0.303361,0.28638,0.001942,0.372436,0.367217,-0.005814,0.375097,0.375211,-0.012939,0.301925,0.32113
2725,1990-12-26,2.296689,2.30327,2.283527,2.30327,2569832,0.0,0.0,ABT,0.008645,0.002865,-0.002857127,-0.027855,-0.008499,1.457539e-07,-0.002825,-0.027778,-0.0113314,0.002785,-0.025069,-0.030556,0.011268,-0.014084,-0.027855,0.0,-0.022346,-0.01690132,-0.01105,-0.03047,-0.025139,-0.013661,-0.038462,-0.03324057,0.019608,-0.025069,-0.041209,-0.00554,-0.035812,-0.02785467,0.013966,-0.025069,-0.038567,0.002793,-0.027778,-0.027855,0.002785,-0.016854,-0.030556,0.011363,-0.022346,-0.019663,0.017045,-0.005681,-0.025139,-0.00565,0.002865,-0.008522,0.035608,0.035503,1.457539e-07,-0.008798,0.023392,0.032545,0.0,0.029412,0.020468,0.005917,0.038576,0.026471,-0.035088,0.020408,0.057577,0.0375,0.123915,0.056567,-0.006211,0.095906,0.096189,-0.006135,0.059595,0.082655,0.015244,0.071422,0.058737,-0.022436,0.140546,0.155931,0.007273,0.281049,0.272777,0.01105,0.297501,0.284368,-0.009747,0.379037,0.39675,-0.003914,0.3871,0.394007,-0.009399,0.332552,0.346394
