In [4]:
import numpy as np
import pandas_datareader as pdr
import datetime
import pandas as pd
from datetime import datetime

In [19]:
def get(tickers, startdate, enddate):
    def data(ticker):
        return pdr.get_data_google(ticker, start=startdate, end=enddate)

    datas = map(data, tickers)
    return pd.concat(datas, keys=tickers, names=['Ticker', 'Date'])

all_data = get(['AAPL', 'IBM', 'GOOG'], datetime(2007, 1, 1), datetime(2017, 1, 1))

In [41]:
all_data.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,2007-01-03,12.33,12.37,11.7,11.97,311433248.0
AAPL,2007-01-04,12.01,12.28,11.97,12.24,214031636.0


In [23]:
all_data_flat = all_data.reset_index()

In [40]:
all_data_flat.head(2)

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Volume
0,AAPL,2007-01-03,12.33,12.37,11.7,11.97,311433248.0
1,AAPL,2007-01-04,12.01,12.28,11.97,12.24,214031636.0


In [39]:
all_data_flat.pivot(index='Date', columns='Ticker', values='Close').head(2)

Ticker,AAPL,GOOG,IBM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2007-01-03,11.97,233.56,97.27
2007-01-04,12.24,241.39,98.31


In [44]:
all_data_unstacked = all_data.unstack(level=0)
all_data_unstacked.head(2)

Unnamed: 0_level_0,Open,Open,Open,High,High,High,Low,Low,Low,Close,Close,Close,Volume,Volume,Volume
Ticker,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2007-01-03,12.33,97.18,232.77,12.37,98.4,238.09,11.7,96.26,230.32,11.97,97.27,233.56,311433248.0,9199500.0,
2007-01-04,12.01,97.25,234.27,12.28,98.79,241.73,11.97,96.88,233.94,12.24,98.31,241.39,214031636.0,10557200.0,


In [54]:
all_data_unstacked['Close', 'AAPL'].head(2)

Date
2007-01-03    11.97
2007-01-04    12.24
Name: (Close, AAPL), dtype: float64

In [63]:
all_data_unstacked_pct = all_data_unstacked.pct_change().fillna(0)
all_data_unstacked_pct.head(2)

Unnamed: 0_level_0,Open,Open,Open,High,High,High,Low,Low,Low,Close,Close,Close,Volume,Volume,Volume
Ticker,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2007-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2007-01-04,-0.025953,0.00072,0.006444,-0.007276,0.003963,0.015288,0.023077,0.006441,0.015717,0.022556,0.010692,0.033525,-0.312753,0.147584,0.0


In [65]:
from sklearn.model_selection import train_test_split

In [88]:
rows = all_data_unstacked_pct.shape[0]
train_size = int(rows*0.75)
test_size = rows-train_size
print('Train: %d, test: %d' % (train_size, test_size))

Train: 1888, test: 630


In [89]:
train_X = all_data_unstacked_pct.iloc[0:train_size]
test_X = all_data_unstacked_pct.iloc[train_size:]
assert train_X.shape[0] + test_X.shape[0] == rows

In [90]:
train_X.head()

Unnamed: 0_level_0,Open,Open,Open,High,High,High,Low,Low,Low,Close,Close,Close,Volume,Volume,Volume
Ticker,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2007-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2007-01-04,-0.025953,0.00072,0.006444,-0.007276,0.003963,0.015288,0.023077,0.006441,0.015717,0.022556,0.010692,0.033525,-0.312753,0.147584,0.0
2007-01-05,0.019983,0.003599,0.02877,0.002443,-0.008503,0.007364,0.007519,0.00031,0.02086,-0.007353,-0.009053,0.00812,-0.024363,-0.315832,0.0
2007-01-08,0.002449,0.009221,0.010746,0.004062,0.015824,0.004846,0.00995,0.014859,0.008542,0.004938,0.015192,-0.007397,-0.044947,0.431572,0.0
2007-01-09,0.0057,0.005888,-0.004598,0.074434,0.008342,-0.00331,-0.001642,0.007321,-0.002076,0.082719,0.01183,0.003974,3.202131,0.074351,0.0


In [91]:
from sklearn.preprocessing import StandardScaler

In [93]:
scaler = StandardScaler()
train_X_scaled = scaler.fit_transform(train_X)
test_X_scaled = scaler.transform(test_X)

In [101]:
train_X_scaled = pd.DataFrame(data=train_X_scaled, columns=train_X.columns)
test_X_scaled = pd.DataFrame(data=test_X_scaled, columns=test_X.columns)
train_X_scaled.head()

Unnamed: 0_level_0,Open,Open,Open,High,High,High,Low,Low,Low,Close,Close,Close,Volume,Volume,Volume
Ticker,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG
0,-0.056854,-0.031304,-0.028985,-0.067191,-0.034376,-0.036396,-0.05979,-0.032351,-0.035378,-0.060116,-0.0312,-0.034167,-0.136343,-0.146259,-0.021249
1,-1.141226,0.019096,0.169111,-0.459282,0.289591,0.860756,0.95535,0.436986,0.80088,0.953554,0.697625,1.648268,-0.98594,0.238283,-0.021249
2,0.778095,0.220515,0.855429,0.064464,-0.729396,0.395715,0.270957,-0.009787,1.074509,-0.390553,-0.648307,0.373318,-0.202526,-0.969182,-0.021249
3,0.04547,0.61391,0.301367,0.151699,1.2591,0.247966,0.377915,1.050411,0.41911,0.161807,1.004377,-0.405374,-0.258443,0.978234,-0.021249
4,0.181319,0.380701,-0.170321,3.944098,0.64747,-0.230652,-0.132023,0.501103,-0.145829,3.65723,0.775215,0.165285,8.562293,0.047469,-0.021249


In [107]:
train_X_scaled.describe()

Unnamed: 0_level_0,Open,Open,Open,High,High,High,Low,Low,Low,Close,Close,Close,Volume,Volume,Volume
Ticker,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG,AAPL,IBM,GOOG
count,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0,1888.0
mean,6.615471e-18,4.586726e-18,1.7097320000000002e-17,1.308393e-18,1.5112680000000002e-17,-4.77784e-18,-3.733331e-17,-4.116293e-17,1.0173120000000001e-17,3.969283e-19,2.52858e-18,-1.4113e-18,-2.469776e-18,-2.128712e-17,6.38143e-16
std,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265,1.000265
min,-6.113093,-6.917997,-15.25398,-6.788799,-5.891316,-5.316449,-8.813099,-6.424892,-5.130838,-8.106035,-5.674692,-5.859183,-2.05251,-2.291763,-9.532627
25%,-0.4933883,-0.4569576,-0.2836463,-0.4923048,-0.4557095,-0.4393732,-0.4507512,-0.4166981,-0.4511574,-0.4878459,-0.4717483,-0.4416886,-0.6343269,-0.6055594,-0.02124889
50%,0.01381718,0.01822354,-0.00820251,-0.006229087,-3.007348e-05,-0.0220325,0.03089916,0.01859382,0.02704752,0.0004863213,-0.00829598,-0.02090848,-0.1772898,-0.1761633,-0.02124889
75%,0.5054681,0.5256086,0.2860741,0.4807749,0.4355084,0.4511142,0.4517891,0.4595603,0.466788,0.5236372,0.5094054,0.458216,0.414868,0.3724554,-0.02124889
max,9.327024,5.41844,31.25474,6.747618,6.395425,11.24686,8.410895,6.179507,9.287179,6.290016,7.818135,9.998386,13.6921,12.53017,20.70746


In [109]:
train_X_scaled.max()

        Ticker
Open    AAPL       9.327024
        IBM        5.418440
        GOOG      31.254744
High    AAPL       6.747618
        IBM        6.395425
        GOOG      11.246862
Low     AAPL       8.410895
        IBM        6.179507
        GOOG       9.287179
Close   AAPL       6.290016
        IBM        7.818135
        GOOG       9.998386
Volume  AAPL      13.692099
        IBM       12.530170
        GOOG      20.707462
dtype: float64