### 1. Function definitions

#### 1-1. Import packages

In [1]:
import os
from pathlib import Path
import functools

import quandl
import json
import pandas as pd
pd.set_option("display.precision", 4)
pd.set_option('display.float_format', lambda x: '%.4f' % x)
# import pandas_datareader.data as pdr

# import math
import numpy as np
import datetime as dt
from dateutil.relativedelta import relativedelta

# plotting packages
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = [21, 8]

#### 1-2. Functions to fetch data

**1-2-1. Get my personal keys** from ../data/APIs.json

In [2]:
# Get personal API key from ../data/APIs.json
f = open('../data/APIs.json')
APIs = json.load(f)
f.close()

**1-2-2. Define helper functions**

In [3]:
def assertCorrectDateFormat(date_text):
    try:
        dt.datetime.strptime(date_text, '%Y-%m-%d')
    except ValueError:
        raise ValueError("Incorrect date format, should be YYYY-MM-DD")

**1-2-3. Define function** to retrieve raw data from Quandl

**Documentation**
```
Continuous Futures (SCF)
https://data.nasdaq.com/databases/SCF/documentation
```

In [66]:
def getQuandlSCFData(secs,start_date,end_date,columns):
    # Retrieve data using quandl.get_table
    quandl.ApiConfig.api_key = APIs['Quandl']
    
    data = quandl.get_table('SCF/PRICES',
                            quandl_code = secs,
                            date = {'gte':start_date, 'lte':end_date},
                            qopts = {'columns':list(set(['date','symbol']+list(columns)))},
                            paginate=True
                            )

    data.date = pd.to_datetime(data.date, unit='D')
    print(f"Data   | Quandl | Retrieved {secs} data for dates from {data.date.min():%Y-%m-%d} to {data.date.max():%Y-%m-%d}.")
    
    data.set_index(['date','symbol'],inplace=True)
    data.sort_index(inplace=True)
    
    return data

**Documentation**
```
End of Day US Stock Prices (EOD)
https://data.nasdaq.com/databases/EOD/documentation
https://data.nasdaq.com/databases/EOD/usage/quickstart/python
```

In [67]:
# Function that retrieves EOD data from Quandl
def getQuandlEODData(secs,start_date,end_date,columns):
    # Get one security (sec)'s data fom Quandl using quandl.get_table
    # NOTE: missing data for the inputted date will NOT return a row.

    # INPUT         | DATA TYPE                 | DESCRIPTION
    # secs          | string / list of string   | security ticker
    # start_date    | string (YYYY-MM-DD)       | start date of data
    # end_date      | string (YYYY-MM-DD)       | end date of data (same as or after start_date)
    # columns       | string / list of string   | columns to return
    
    # Retrieve data using quandl.get_table
    quandl.ApiConfig.api_key = APIs['Quandl']
    
    data = quandl.get_table('QUOTEMEDIA/PRICES',
                            ticker = secs, 
                            date = {'gte':start_date, 'lte':end_date},
                            qopts = {'columns':list(set(['date','ticker']+list(columns)))},
                            paginate=True
                            )

    data.date = pd.to_datetime(data.date, unit='D')
    print(f"Data   | Quandl | Retrieved {secs} data for dates from {data.date.min():%Y-%m-%d} to {data.date.max():%Y-%m-%d}.")
    
    data.set_index(['date','ticker'],inplace=True)
    data.sort_index(inplace=True)
    
    return data

In [72]:
def getPriceAndVolume(secs,start_date='2008-01-01',end_date='2022-01-31'):
    
    assertCorrectDateFormat(start_date)
    assertCorrectDateFormat(end_date)
    
    prices = pd.DataFrame()
    volumes = pd.DataFrame()
    for sec in secs:
        if sec=='CBOE_VX1_EB':
            sec_data = getQuandlSCFData(sec,start_date,end_date,('settle','volume')).dropna()
            sec_data = sec_data.droplevel('symbol') # remove this level of index
            prices[sec] = sec_data.settle
            volumes[sec] = sec_data.volume.astype(int)
        else:
            sec_data = getQuandlEODData(sec,start_date,end_date,('adj_close','dividend','adj_volume')).dropna()
            sec_data = sec_data.droplevel('ticker') # remove this level of index
            prices[sec] = sec_data.adj_close + sec_data.dividend # add back the dividend
            volumes[sec] = sec_data.adj_volume.astype(int)
    
    print(f"Data   | DONE   | Returning {len(prices):d} rows of data from {prices.index.min():%Y-%m-%d} to {prices.index.max():%Y-%m-%d}.")
    
    return prices, volumes
    

In [71]:
secs = ('BIL','DBA','USO','GLD','SLX',
        'UNG','SLV','VEA','VWO','CBOE_VX1_EB',
        'ITOT','EMB','BWX','LQD','HYG',
        'MBB','TIP','SPTL','PSP','VNQ')

prices, volumes = getPriceAndVolume(secs)
prices

Data   | Quandl | Retrieved BIL data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved DBA data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved USO data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved GLD data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved SLX data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved UNG data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved SLV data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved VEA data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved VWO data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved CBOE_VX1_EB data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved ITOT data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved EMB data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved BWX data for dates from 200

Unnamed: 0_level_0,BIL,DBA,USO,GLD,SLX,UNG,SLV,VEA,VWO,CBOE_VX1_EB,ITOT,EMB,BWX,LQD,HYG,MBB,TIP,SPTL,PSP,VNQ
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2008-01-02,85.2938,32.5637,627.76,84.86,58.9571,1224.0,15.18,32.4341,36.6387,189.6457,24.8569,53.1117,22.6941,62.2834,40.6184,72.2863,74.3347,18.6102,9.379,34.0823
2008-01-03,85.238,33.3332,626.96,85.57,59.422,1194.24,15.285,32.4884,36.991,189.2057,24.8511,53.2059,22.7612,62.0779,40.5409,72.3216,74.5857,18.5758,9.3146,33.0198
2008-01-04,85.2201,33.6839,618.48,85.13,57.0485,1208.0,15.167,31.7761,35.7685,190.4657,24.22,53.2321,22.8157,62.5887,40.4959,72.3851,74.5647,18.6756,9.0442,31.6792
2008-01-07,85.2936,33.2358,604.0,84.77,55.5694,1217.6,15.053,31.8168,36.0503,189.9057,24.311,53.3525,22.7654,62.5124,40.2551,72.6393,74.5508,18.7135,9.1086,31.974
2008-01-08,85.2342,33.606,608.48,86.78,54.6257,1228.8,15.59,31.559,36.0222,191.3257,23.8212,53.4816,22.7402,62.395,40.0224,72.5122,74.6066,18.6549,8.7695,30.8671


**1-2-4. Define function** to get factor data

In [92]:
def calcRollingCumRet(timeSeriesPrices,window=20):
    # output is time-series returns with rolling window
    ret = timeSeriesPrices.pct_change().fillna(0)
    
    return ((1+ret).rolling(window).agg(lambda x : x.prod())-1).iloc[window:]



In [94]:
cumRet = calcRollingCumRet(prices)
cumRet

Unnamed: 0_level_0,BIL,DBA,USO,GLD,SLX,UNG,SLV,VEA,VWO,CBOE_VX1_EB,ITOT,EMB,BWX,LQD,HYG,MBB,TIP,SPTL,PSP,VNQ
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2008-01-31,0.0031,0.1065,-0.0780,0.0771,-0.0723,0.0269,0.1079,-0.0717,-0.0876,0.0051,-0.0474,0.0081,0.0148,0.0137,-0.0062,0.0115,0.0276,0.0165,-0.0224,-0.0047
2008-02-01,0.0036,0.0885,-0.1008,0.0442,-0.0308,0.0096,0.0873,-0.0603,-0.0744,0.0038,-0.0337,0.0017,0.0170,0.0213,-0.0038,0.0113,0.0247,0.0217,0.0111,0.0702
2008-02-04,0.0040,0.0989,-0.0768,0.0466,-0.0032,0.0167,0.0889,-0.0491,-0.0439,0.0017,-0.0185,0.0016,0.0131,0.0102,-0.0024,0.0085,0.0190,0.0110,0.0413,0.1040
2008-02-05,0.0025,0.1102,-0.0709,0.0343,-0.0214,0.0216,0.0740,-0.0883,-0.0909,0.0130,-0.0510,-0.0029,0.0081,0.0150,-0.0046,0.0079,0.0241,0.0158,0.0005,0.0551
2008-02-06,0.0041,0.1041,-0.0926,0.0250,-0.0113,0.0146,0.0504,-0.0832,-0.1042,0.0096,-0.0414,-0.0050,0.0096,0.0114,-0.0008,0.0082,0.0221,0.0129,0.0235,0.0751
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-25,-0.0002,0.0125,0.1183,0.0190,-0.0182,0.0406,0.0323,-0.0500,-0.0071,0.2885,-0.0970,-0.0354,-0.0152,-0.0372,-0.0246,-0.0139,-0.0203,-0.0434,-0.1266,-0.0789
2022-01-26,0.0000,0.0202,0.1282,0.0068,-0.0127,0.0804,0.0212,-0.0545,-0.0140,0.3427,-0.0992,-0.0396,-0.0227,-0.0425,-0.0275,-0.0198,-0.0259,-0.0505,-0.1252,-0.0966
2022-01-27,0.0001,-0.0010,0.1231,-0.0059,-0.0108,0.1931,-0.0043,-0.0605,-0.0186,0.3889,-0.1059,-0.0332,-0.0217,-0.0350,-0.0302,-0.0157,-0.0228,-0.0247,-0.1200,-0.1175
2022-01-28,-0.0002,0.0182,0.1247,-0.0159,-0.0115,0.3649,-0.0295,-0.0540,-0.0294,0.2924,-0.0822,-0.0347,-0.0256,-0.0378,-0.0270,-0.0138,-0.0244,-0.0316,-0.1162,-0.0928


In [13]:
def calcRollingVolatility(timeSeriesPrices,window=20):
    # output is time-series volatility with rolling window
    rollingVol = timeSeriesPrices.rolling(window).std()/np.sqrt(window/225)

    return rollingVol.iloc[window:]

In [14]:
calcRollingVolatility(prices)

Unnamed: 0_level_0,BIL,DBA,EMB,GLD,HYG,LQD,MBB,PSP,SLV,TIP,UNG,USO,VEA,VNQ,VWO
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2008-01-31,0.3997,3.7548,0.4086,7.4895,0.9617,1.0840,0.9148,0.9819,1.6680,2.3295,97.8757,62.0338,3.8633,4.2298,5.9211
2008-02-01,0.4146,3.7001,0.3825,7.1958,0.9194,1.1360,0.8573,1.0143,1.6472,2.3833,95.0897,54.9592,3.5273,4.7671,5.6009
2008-02-04,0.4230,3.8397,0.3588,6.6871,0.8919,1.1993,0.7775,1.0947,1.5360,2.3130,92.2711,47.7329,3.3539,5.0705,5.4990
2008-02-05,0.4267,3.7306,0.4017,5.9900,0.8732,1.2995,0.7878,1.0953,1.2966,2.2657,90.4618,45.9338,3.2679,5.1052,5.4429
2008-02-06,0.4294,3.6848,0.4352,5.7601,0.8733,1.2849,0.7316,1.0964,1.2269,2.1751,89.9083,43.3007,3.2002,4.8959,5.4150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-25,0.0253,0.6945,5.3642,5.2006,2.1797,5.4000,2.2289,2.0439,1.9826,4.4861,3.0275,8.4184,2.8360,12.5523,2.2457
2022-01-26,0.0253,0.7164,5.2612,5.1088,2.2523,5.3074,2.1897,2.1976,1.9882,4.4466,3.0479,8.5496,3.2922,13.3194,2.4024
2022-01-27,0.0255,0.7158,4.9904,5.3134,2.4135,5.2109,2.0994,2.3412,1.9988,4.2254,3.2883,8.6516,3.7431,14.1207,2.6521
2022-01-28,0.0273,0.7386,4.3946,5.7113,2.4242,4.7583,1.9174,2.3920,2.0756,3.6272,3.6434,8.6244,4.0240,13.6025,2.8883


In [None]:
def calcLoss()

In [None]:
def calcRollingAvgVolume(timeSeriesVolumes,window=20):
    return timeSeriesVolumes.rolling(window).mean()

calcRollingAvgVolume(volumes)

**1-2-5. Define function** to get trading positions

In [None]:
def calcSecurityRanking(factorData,cut=10,rebalance='M'):
    # W - Weekly, M - Monthly, Q - Quarterly, A - Anually
    resampled = factorData.resample(rebalance).mean()
    rank = pd.DataFrame(columns=resampled.columns, index=resampled.index)
    
    return pd.qcut(resampled.T,q=cut,labels=range(1,cut+1)).T


def calcSecurityPosition(factorData,cut=10,selection=2,ascending=False,rebalance='M'):
    # ascending=False: high is better
    rank = calcSecurityRanking(factorData,cut,rebalance)
    if ascending: selection*=-1

    position = pd.DataFrame(0,columns=rank.columns,index=rank.index)
    position.mask(rank<=, inplace=True)
    
    return
