# Imports

In [1]:
import os
import numpy as np
import pandas as pd
import pandas_datareader.data as web
from pathlib import Path

In [2]:
import pandas_ta as ta

In [3]:
import json

# Parameters

In [4]:
total_obs=21*12*8-4

In [5]:
backtest_start=pd.to_datetime('2010-01-01')
backtest_end=pd.to_datetime('2016-01-01')

In [6]:
fwd_test_start=pd.to_datetime('2016-01-01')
fwd_test_end=pd.to_datetime('2018-01-01')

# List of SP500 Companies

In [7]:
url="https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

In [8]:
df=pd.read_html(url,header=0)[0]

In [9]:
sp500_comp=list(df['Symbol'])

In [10]:
sp500_comp[:10]

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A']

# Data of Companies in SP500

In [11]:
equity_csv_path=Path("../Data_Store/wiki_prices.csv")

In [12]:
equity_data=pd.read_csv(
    equity_csv_path,
    parse_dates=['date'],
    index_col=['date','ticker']
).sort_index()

In [13]:
equity_data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 15389314 entries, (Timestamp('1962-01-02 00:00:00'), 'ARNC') to (Timestamp('2018-03-27 00:00:00'), 'ZUMZ')
Data columns (total 12 columns):
 #   Column       Dtype  
---  ------       -----  
 0   open         float64
 1   high         float64
 2   low          float64
 3   close        float64
 4   volume       float64
 5   ex-dividend  float64
 6   split_ratio  float64
 7   adj_open     float64
 8   adj_high     float64
 9   adj_low      float64
 10  adj_close    float64
 11  adj_volume   float64
dtypes: float64(12)
memory usage: 1.4+ GB


In [14]:
cols_to_include=['adj_open','adj_high','adj_low','adj_close','adj_volume']

In [15]:
equity_data=equity_data[cols_to_include]

In [16]:
available_ticker=equity_data.index.get_level_values("ticker")

In [17]:
available_ticker

Index(['ARNC', 'BA', 'CAT', 'DD', 'DIS', 'GE', 'HPQ', 'IBM', 'KO', 'ARNC',
       ...
       'ZEN', 'ZEUS', 'ZGNX', 'ZION', 'ZIOP', 'ZIXI', 'ZNGA', 'ZOES', 'ZTS',
       'ZUMZ'],
      dtype='object', name='ticker', length=15389314)

In [18]:
equity_data=equity_data[available_ticker.isin(sp500_comp)]

In [19]:
data=equity_data[(equity_data.index.get_level_values("date")>=backtest_start)
                  &(equity_data.index.get_level_values("date")<=fwd_test_end)]

In [20]:
nobs_per_group=data.groupby("ticker").size()
keep_ticker=nobs_per_group[nobs_per_group==total_obs].index

In [21]:
idx=pd.IndexSlice

In [22]:
data=data.loc[idx[:,keep_ticker],:]

In [23]:
data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 752488 entries, (Timestamp('2010-01-04 00:00:00'), 'A') to (Timestamp('2017-12-29 00:00:00'), 'ZBRA')
Data columns (total 5 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   adj_open    752488 non-null  float64
 1   adj_high    752488 non-null  float64
 2   adj_low     752488 non-null  float64
 3   adj_close   752488 non-null  float64
 4   adj_volume  752488 non-null  float64
dtypes: float64(5)
memory usage: 32.3+ MB


In [24]:
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,adj_open,adj_high,adj_low,adj_close,adj_volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,A,21.415535,21.579273,21.239108,21.354133,2729300.0
2010-01-04,ACGL,23.936667,24.066667,23.916667,23.983333,1604400.0
2010-01-04,ACN,35.0205,35.594053,35.00363,35.484403,3650100.0
2010-01-04,ADI,25.674932,25.997988,25.529556,25.578015,2102700.0
2010-01-04,ADM,25.917474,26.213862,25.793979,25.909241,3472500.0


In [25]:
data.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,adj_open,adj_high,adj_low,adj_close,adj_volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-12-29,XEL,48.11,48.29,48.07,48.11,2765244.0
2017-12-29,XOM,84.0,84.2,83.64,83.64,8463522.0
2017-12-29,YUM,82.64,82.71,81.59,81.61,1343728.0
2017-12-29,ZBH,121.75,121.95,120.62,120.67,1021850.0
2017-12-29,ZBRA,104.33,104.94,103.28,103.8,158534.0


In [26]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,adj_open,adj_high,adj_low,adj_close,adj_volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,A,21.415535,21.579273,21.239108,21.354133,2729300.0
2010-01-04,ACGL,23.936667,24.066667,23.916667,23.983333,1604400.0
2010-01-04,ACN,35.020500,35.594053,35.003630,35.484403,3650100.0
2010-01-04,ADI,25.674932,25.997988,25.529556,25.578015,2102700.0
2010-01-04,ADM,25.917474,26.213862,25.793979,25.909241,3472500.0
...,...,...,...,...,...,...
2017-12-29,XEL,48.110000,48.290000,48.070000,48.110000,2765244.0
2017-12-29,XOM,84.000000,84.200000,83.640000,83.640000,8463522.0
2017-12-29,YUM,82.640000,82.710000,81.590000,81.610000,1343728.0
2017-12-29,ZBH,121.750000,121.950000,120.620000,120.670000,1021850.0


# Fama-French Factor Data

In [27]:
start=data.index.get_level_values("date")[0]

In [28]:
end=data.index.get_level_values("date")[-1]

In [29]:
url='F-F_Research_Data_5_Factors_2x3_daily'

In [30]:
factor_data=web.DataReader(url,'famafrench',start=start,end=end)[0]

  factor_data=web.DataReader(url,'famafrench',start=start,end=end)[0]


In [31]:
factor_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2013 entries, 2010-01-04 to 2017-12-29
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Mkt-RF  2013 non-null   float64
 1   SMB     2013 non-null   float64
 2   HML     2013 non-null   float64
 3   RMW     2013 non-null   float64
 4   CMA     2013 non-null   float64
 5   RF      2013 non-null   float64
dtypes: float64(6)
memory usage: 110.1 KB


In [32]:
factor_data.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,1.69,0.79,1.14,-0.17,0.21,0.0
2010-01-05,0.31,-0.42,1.22,-0.18,0.19,0.0
2010-01-06,0.13,-0.14,0.55,-0.05,0.2,0.0
2010-01-07,0.4,0.25,0.96,-0.66,0.22,0.0
2010-01-08,0.33,0.31,0.02,0.23,-0.4,0.0


In [33]:
factor_data.tail()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-12-22,-0.07,-0.19,-0.21,-0.1,0.22,0.004
2017-12-26,-0.07,0.31,-0.07,-0.13,0.61,0.004
2017-12-27,0.05,-0.16,-0.2,0.07,-0.12,0.004
2017-12-28,0.22,0.1,0.05,-0.13,-0.19,0.004
2017-12-29,-0.57,-0.31,0.01,0.16,0.13,0.004


In [34]:
factor_data.dropna(inplace=True)

# SP500 Data

In [35]:
data_path=Path("../Data_Store/sp500_data.csv")

In [36]:
data_sp500=pd.read_csv(
    data_path,
    parse_dates=['Date'],
    index_col=['Date']
    
).sort_index()

In [37]:
data_sp500=data_sp500["Close"].rename({"Close":"sp500_close"}).to_frame()

In [38]:
data_sp500=data_sp500[(data_sp500.index>=backtest_start)&(data_sp500.index<=fwd_test_end)]

In [39]:
data_sp500["sp500_returns"]=(data_sp500
                             .Close
                             .pct_change(1)
                            )

In [40]:
data_sp500.dropna(inplace=True)

In [41]:
data_sp500.rename(columns={"Close":"sp500_close"},inplace=True)

In [42]:
data_sp500.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2012 entries, 2010-01-05 to 2017-12-29
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   sp500_close    2012 non-null   float64
 1   sp500_returns  2012 non-null   float64
dtypes: float64(2)
memory usage: 47.2 KB


In [43]:
data_sp500.head()

Unnamed: 0_level_0,sp500_close,sp500_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-01-05,1136.52,0.003116
2010-01-06,1137.14,0.000546
2010-01-07,1141.69,0.004001
2010-01-08,1144.98,0.002882
2010-01-11,1146.98,0.001747


In [44]:
data_sp500.tail()

Unnamed: 0_level_0,sp500_close,sp500_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-12-22,2683.34,-0.000458
2017-12-26,2680.5,-0.001058
2017-12-27,2682.62,0.000791
2017-12-28,2687.54,0.001834
2017-12-29,2673.61,-0.005183


# Feature Engineering

In [45]:
data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 752488 entries, (Timestamp('2010-01-04 00:00:00'), 'A') to (Timestamp('2017-12-29 00:00:00'), 'ZBRA')
Data columns (total 5 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   adj_open    752488 non-null  float64
 1   adj_high    752488 non-null  float64
 2   adj_low     752488 non-null  float64
 3   adj_close   752488 non-null  float64
 4   adj_volume  752488 non-null  float64
dtypes: float64(5)
memory usage: 32.3+ MB


In [46]:
data=data.rename(columns=lambda x: x.replace("adj_",""))

## Dollar Volume

In [47]:
data["dollar_volume"]=data[["close","volume"]].prod(axis=1)

In [48]:
for t in [7,15,21]:
    data[f"dollar_volume_{t}d"]=(data.dollar_volume
                                 .groupby("ticker")
                                 .transform(lambda x: x.rolling(window=t).mean())
                                )
                                 

In [49]:
data["dollar_volume_rank"]=(data
                            .groupby("date")
                            .dollar_volume_21d
                            .rank(ascending=False))

## Returns

In [50]:
q=0.0001

In [51]:
for t in [1,7,15,21]:
    data[f"returns_{t}d"]=(data
                           .groupby("ticker")
                           .close
                           .pct_change(t)
                           .pipe(lambda x: x.clip(lower=x.quantile(q),upper=x.quantile(1-q)))
                           .add(1)
                           .pow(1/t)
                           .sub(1))

## Lagged Returns

In [52]:
for t in [1,7,15,21]:
    for lag in [1,3,5,7]:
        data[f"returns_{t}d_lag{lag}"]=(data
                                        .groupby("ticker")[f"returns_{t}d"]
                                        .shift(lag))

## Momentum

In [53]:
for t in [7,15,21]:
    data[f"momentum_{t}d"]=(data.
                            groupby("ticker")
                            .returns_1d
                            .transform(lambda x: x.rolling(window=t).mean())
                            )

## Lagged Momentum

In [54]:
for t in [7,15,21]:
    for lag in [1,3,5,7]:
        data[f"momentum_{t}d_lag{lag}"]=(data.
                                         groupby("ticker")[f"momentum_{t}d"].
                                         shift(lag))

## Volatility

In [55]:
for t in [7,15,21]:
    data[f"volatility_{t}d"]=(data
                              .groupby("ticker")
                              .returns_1d
                              .transform(lambda x: x.rolling(window=t).std())
                             )

## Lagged Volatility

In [56]:
for t in [7,15,21]:
    for lag in [1,3,5,7]:
        data[f"volatility_{t}d_lag{lag}"]=(data
                                           .groupby("ticker")[f"volatility_{t}d"]
                                           .shift(lag))

## SMA-Simple Moving Average

In [57]:
for t in [7,15,21]:
    data[f"sma_{t}d"]=(data
                       .groupby("ticker")
                       .close
                       .transform(lambda x: x.rolling(window=t).mean())
                      )

## Min Price

In [58]:
for t in [7,15,21]:
    data[f"min_price_{t}d"]=(data
                             .groupby("ticker")
                             .close
                             .transform(lambda x: x.rolling(window=t).min())
                            )

## Max Price

In [59]:
for t in [7,15,21]:
    data[f"max_price_{t}d"]=(data
                             .groupby("ticker")
                             .close
                             .transform(lambda x: x.rolling(window=t).max())
                            )

## Relative Strength Index (RSI)

In [60]:
data["rsi"]=(data
             .groupby("ticker")
             .close
             .transform(lambda x: ta.rsi(x,length=14)))

## Bollinger Bands

In [61]:
def compute_bollinger_bands(group):
    bb=ta.bbands(group['close'],length=20)
    bb=bb.rename(columns={"BBL_20_2.0":"bb_lower","BBU_20_2.0":"bb_upper"})
    bb=bb[["bb_lower","bb_upper"]]
    return group.join(bb)

In [62]:
data=data.groupby("ticker",group_keys=False).apply(compute_bollinger_bands)

In [63]:
data["bb_upper"]=data.bb_upper.sub(data.close).div(data.bb_upper).apply(np.log1p)

In [64]:
data["bb_lower"]=data.close.sub(data.bb_lower).div(data.close).apply(np.log1p)

## Average True Range (ATR)

In [65]:
def compute_avg_true_range(group):
    atr=ta.atr(high=group.high,low=group.low,close=group.close,length=14).to_frame("avg_true_range")
    return group.join(atr)

In [66]:
data=data.groupby("ticker",group_keys=False).apply(compute_avg_true_range)

## Moving Average Convergence Divergence (MACD)

In [67]:
def compute_macd(group):
    macd=ta.macd(group.close,fast=12,slow=26,signal=9)
    macd=macd.rename(columns={"MACD_12_26_9":"macd","MACDs_12_26_9":"macd_signal","MACDh_12_26_9":"macd_hist"})
    return group.join(macd)

In [68]:
data=data.groupby("ticker",group_keys=False).apply(compute_macd)

## Forward Returns

In [69]:
for fwd_steps in [1,5,7,15,21]:
    data[f"forward_returns_{fwd_steps}d"]=(data
                                           .groupby("ticker")
                                           .returns_1d
                                           .shift(-fwd_steps)
                                           )

In [70]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,dollar_volume,dollar_volume_7d,dollar_volume_15d,dollar_volume_21d,dollar_volume_rank,...,bb_upper,avg_true_range,macd,macd_hist,macd_signal,forward_returns_1d,forward_returns_5d,forward_returns_7d,forward_returns_15d,forward_returns_21d
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2010-01-04,A,21.415535,21.579273,21.239108,21.354133,2729300.0,5.828184e+07,,,,,...,,,,,,-0.010863,0.000649,0.007882,-0.005433,0.001356
2010-01-04,ACGL,23.936667,24.066667,23.916667,23.983333,1604400.0,3.847886e+07,,,,,...,,,,,,-0.003336,-0.003106,0.005366,-0.005719,0.000569
2010-01-04,ACN,35.020500,35.594053,35.003630,35.484403,3650100.0,1.295216e+08,,,,,...,,,,,,0.006180,-0.000940,0.011358,-0.001194,-0.000481
2010-01-04,ADI,25.674932,25.997988,25.529556,25.578015,2102700.0,5.378289e+07,,,,,...,,,,,,-0.001579,-0.005716,-0.000333,-0.014638,-0.004680
2010-01-04,ADM,25.917474,26.213862,25.793979,25.909241,3472500.0,8.996984e+07,,,,,...,,,,,,0.005402,0.000649,0.004258,-0.004703,-0.014227
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-29,XEL,48.110000,48.290000,48.070000,48.110000,2765244.0,1.330359e+08,1.486776e+08,1.582446e+08,1.542423e+08,190.0,...,0.080295,0.579526,-0.675182,-0.288591,-0.386591,,,,,
2017-12-29,XOM,84.000000,84.200000,83.640000,83.640000,8463522.0,7.078890e+08,7.328977e+08,8.678067e+08,9.217507e+08,16.0,...,0.007602,0.684221,0.396903,0.083335,0.313568,,,,,
2017-12-29,YUM,82.640000,82.710000,81.590000,81.610000,1343728.0,1.096616e+08,8.422477e+07,1.217347e+08,1.376934e+08,216.0,...,0.023987,0.937312,0.560779,-0.258010,0.818789,,,,,
2017-12-29,ZBH,121.750000,121.950000,120.620000,120.670000,1021850.0,1.233066e+08,1.119114e+08,1.669807e+08,1.617899e+08,179.0,...,0.020777,2.044254,1.691002,0.575195,1.115808,,,,,


In [71]:
data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 752488 entries, (Timestamp('2010-01-04 00:00:00'), 'A') to (Timestamp('2017-12-29 00:00:00'), 'ZBRA')
Data columns (total 81 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   open                 752488 non-null  float64
 1   high                 752488 non-null  float64
 2   low                  752488 non-null  float64
 3   close                752488 non-null  float64
 4   volume               752488 non-null  float64
 5   dollar_volume        752488 non-null  float64
 6   dollar_volume_7d     750244 non-null  float64
 7   dollar_volume_15d    747252 non-null  float64
 8   dollar_volume_21d    745008 non-null  float64
 9   dollar_volume_rank   745008 non-null  float64
 10  returns_1d           752114 non-null  float64
 11  returns_7d           749870 non-null  float64
 12  returns_15d          746878 non-null  float64
 13  returns_21d          744634 non-null  float64

In [72]:
data.dropna(inplace=True)

In [73]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,close,volume,dollar_volume,dollar_volume_7d,dollar_volume_15d,dollar_volume_21d,dollar_volume_rank,...,bb_upper,avg_true_range,macd,macd_hist,macd_signal,forward_returns_1d,forward_returns_5d,forward_returns_7d,forward_returns_15d,forward_returns_21d
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2010-02-22,A,21.347311,21.347311,21.047125,21.251797,2888500.0,6.138582e+07,8.755057e+07,8.758384e+07,7.986180e+07,197.0,...,0.003026,0.473915,0.056859,0.260081,-0.203223,-0.009631,0.020025,0.002472,-0.003291,0.011566
2010-02-22,ACGL,24.070000,24.306667,24.070000,24.230000,2048400.0,4.963273e+07,4.717724e+07,4.194648e+07,3.907178e+07,268.0,...,0.008969,0.346561,0.061855,0.111841,-0.049985,-0.000275,0.013788,-0.005998,-0.002398,0.001330
2010-02-22,ACN,34.573465,34.657811,34.202342,34.328862,2421400.0,8.312391e+07,1.116830e+08,1.292910e+08,1.358379e+08,119.0,...,0.033302,0.674139,-0.479265,0.174337,-0.653602,-0.010811,0.003002,0.000985,0.009022,-0.000707
2010-02-22,ADI,24.188871,24.261559,23.898120,24.140412,4298200.0,1.037603e+08,1.565638e+08,1.256450e+08,1.216604e+08,133.0,...,-0.005210,0.650486,-0.198539,0.494893,-0.693432,-0.024757,0.019836,-0.007765,-0.002070,0.011972
2010-02-22,ADM,24.683991,24.866038,24.535043,24.667441,3619800.0,8.929120e+07,1.197275e+08,1.496057e+08,1.499746e+08,108.0,...,0.035304,0.516744,-0.184526,0.023590,-0.208116,-0.006374,0.010218,-0.002345,-0.001745,-0.000341
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-11-29,XEL,51.180000,51.480000,50.850000,51.260000,2311537.0,1.184894e+08,1.072401e+08,1.381043e+08,1.471875e+08,187.0,...,0.012771,0.538257,0.529041,-0.011455,0.540496,0.006828,0.005709,0.007081,-0.017028,0.000624
2017-11-29,XOM,81.650000,82.310000,81.480000,82.270000,9475070.0,7.795140e+08,6.577386e+08,7.044767e+08,6.980541e+08,19.0,...,0.028160,0.746925,-0.283034,-0.052811,-0.230222,0.012398,-0.007359,0.001333,0.005216,-0.004523
2017-11-29,YUM,81.290000,82.120000,81.200000,81.810000,1658544.0,1.356855e+08,1.058911e+08,1.232016e+08,1.427005e+08,192.0,...,0.009228,1.033082,1.068747,0.145776,0.922972,0.020291,0.001685,0.003251,-0.008810,-0.012822
2017-11-29,ZBH,115.500000,116.740000,115.000000,116.520000,1749712.0,2.038764e+08,1.181175e+08,1.283747e+08,1.856724e+08,151.0,...,0.015251,2.028270,-0.638798,0.623590,-1.262388,0.004978,0.002511,-0.012197,-0.001648,-0.008871


# Saving Data

In [74]:
prepared_data_store_path=Path("../Prepared_Data_Store")

## Align the various datasets

In [75]:
start_date=pd.to_datetime(data.index.get_level_values("date")[0])
end_date=pd.to_datetime(data.index.get_level_values("date"))[-1]

In [76]:
factor_data=factor_data[(factor_data.index>=start_date) & (factor_data.index<=end_date)]

In [77]:
data_sp500=data_sp500[(data_sp500.index>=start_date) & (data_sp500.index<=end_date)]

## Saving Stocks Data

In [78]:
path=os.path.join(prepared_data_store_path,"stocks_data.csv")

In [79]:
data.to_csv(path,index=True)

## Saving Factor Data

In [80]:
factor_data.rename_axis(index={"Date":"date"},inplace=True)

In [81]:
path=os.path.join(prepared_data_store_path,"factor_data.csv")

In [82]:
factor_data.to_csv(path,index=True)

## Saving Sp500 Data

In [83]:
data_sp500.rename_axis(index={"Date":"date"},inplace=True)

In [84]:
path=os.path.join(prepared_data_store_path,"sp500_data.csv")

In [85]:
data_sp500.to_csv(path,index=True)

## Saving Stocks to be Traded

In [86]:
stocks=list(data.index.get_level_values("ticker"))

In [87]:
path=os.path.join(prepared_data_store_path,"stocks_to_traded.json")

In [88]:
with open(path,"w") as file:
    json.dump(stocks,file)