In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# imports 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt  
from talib import RSI
from datetime import datetime as dt 
idx = pd.IndexSlice

### Get data 

In [4]:
raw = pd.read_csv('http://hilpisch.com/pyalgo_eikon_eod_data.csv', parse_dates=True).dropna()
raw.head()

Unnamed: 0,Date,AAPL.O,MSFT.O,INTC.O,AMZN.O,GS.N,SPY,.SPX,.VIX,EUR=,XAU=,GDX,GLD
1,2010-01-04,30.572827,30.95,20.88,133.9,173.08,113.33,1132.99,20.04,1.4411,1120.0,47.71,109.8
2,2010-01-05,30.625684,30.96,20.87,134.69,176.14,113.63,1136.52,19.35,1.4368,1118.65,48.17,109.7
3,2010-01-06,30.138541,30.77,20.8,132.25,174.26,113.71,1137.14,19.16,1.4412,1138.5,49.34,111.51
4,2010-01-07,30.082827,30.452,20.6,130.0,177.67,114.19,1141.69,19.06,1.4318,1131.9,49.1,110.82
5,2010-01-08,30.282827,30.66,20.83,133.52,174.31,114.57,1144.98,18.13,1.4412,1136.1,49.84,111.37


In [18]:
# reshape table 
prices = pd.melt(raw,  id_vars = ['Date'], value_vars = raw.columns,
                 var_name = 'Ticker', 
                 value_name = 'Close') 
# set index by ticker and Data 
prices.columns = ['ticker' 'data', 'close'] 
prices.set_index(['ticker', 'date'], inplace=True) 
prices.sort_index(level = ['ticker', 'date'], inplace=True)

# select timeframe
start = '2012-01-01'
end = '2013-12-31'
prices = prices.loc[idx[:, start:end], :]
prices.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Close
Ticker,Date,Unnamed: 2_level_1
.SPX,2012-01-03,1277.06
.SPX,2012-01-04,1277.3
.SPX,2012-01-05,1281.06
.SPX,2012-01-06,1277.81
.SPX,2012-01-09,1280.7


### Add signals 

In [14]:
rolling_1m = prices.Close.groupby('Ticker').rolling(window=21).mean().values
prices['rolling_1m'] = rolling_1m

rolling_rank = prices.rolling_1m.groupby('Ticker').rank(ascending=False).values
prices['rolling_rank'] = rolling_rank

prices['rsi'] = prices.Close.groupby(level='Ticker').apply(RSI)

### Calculate lagged returns

In [16]:
lags = [1, 2, 5, 10, 21, 63] 

In [17]:
for lag in lags: 
    prices['returns_{lag}d'] = prices.close.prc_change(lag) 
prices.head() 

AttributeError: 'DataFrame' object has no attribute 'close'

### Linear Regression 

In [None]:
class MultipleTimeSeriesCV:
    """Generates tuples of train_idx, test_idx pairs
    Assumes the MultiIndex contains levels 'symbol' and 'date'
    purges overlapping outcomes"""

    def __init__(self,
                 n_splits=3,
                 train_period_length=126,
                 test_period_length=21,
                 lookahead=None,
                 shuffle=False):
        self.n_splits = n_splits
        self.lookahead = lookahead
        self.test_length = test_period_length
        self.train_length = train_period_length
        self.shuffle = shuffle

    def split(self, X, y=None, groups=None):
        unique_dates = X.index.get_level_values('date').unique()
        days = sorted(unique_dates, reverse=True)

        split_idx = []
        for i in range(self.n_splits):
            test_end_idx = i * self.test_length
            test_start_idx = test_end_idx + self.test_length
            train_end_idx = test_start_idx + self.lookahead - 1
            train_start_idx = train_end_idx + self.train_length + self.lookahead - 1
            split_idx.append([train_start_idx, train_end_idx,
                              test_start_idx, test_end_idx])

        dates = X.reset_index()[['date']]
        print(dates)
        for train_start, train_end, test_start, test_end in split_idx:
            train_idx = dates[(dates.date > days[train_start])
                              & (dates.date <= days[train_end])].index
            test_idx = dates[(dates.date > days[test_start])
                             & (dates.date <= days[test_end])].index
            if self.shuffle:
                np.random.shuffle(list(train_idx))
            yield train_idx, test_idx

    def get_n_splits(self, X, y, groups=None):
        return self.n_splits