### 1. Function definitions

#### 1-1. Import packages

In [1]:
import os
from pathlib import Path
import functools

import quandl
import json
import pandas as pd
pd.set_option("display.precision", 4)
pd.set_option('display.float_format', lambda x: '%.4f' % x)
# import pandas_datareader.data as pdr

# import math
import numpy as np
import datetime as dt
from dateutil.relativedelta import relativedelta

# plotting packages
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = [21, 8]

#### 1-2. Functions to fetch data

**1-2-1. Get my personal keys** from ../data/APIs.json

In [2]:
# Get personal API key from ../data/APIs.json
f = open('../data/APIs.json')
APIs = json.load(f)
f.close()

**1-2-2. Define helper functions**

In [3]:
def assertCorrectDateFormat(date_text):
    try:
        dt.datetime.strptime(date_text, '%Y-%m-%d')
    except ValueError:
        raise ValueError("Incorrect date format, should be YYYY-MM-DD")

**1-2-3. Define function** to retrieve raw data from Quandl

**Documentation**
```
End of Day US Stock Prices (EOD)
https://data.nasdaq.com/databases/EOD/documentation
https://data.nasdaq.com/databases/EOD/usage/quickstart/python
```

In [4]:
# Function that retrieves EOD data from Quandl
def getQuandlEODData(secs,start_date,end_date,columns):
    # Get one security (sec)'s data fom Quandl using quandl.get_table
    # NOTE: missing data for the inputted date will NOT return a row.

    # INPUT         | DATA TYPE                 | DESCRIPTION
    # secs          | string / list of string   | security ticker
    # start_date    | string (YYYY-MM-DD)       | start date of data
    # end_date      | string (YYYY-MM-DD)       | end date of data (same as or after start_date)
    # columns       | string / list of string   | columns to return
    
    # Retrieve data using quandl.get_table
    quandl.ApiConfig.api_key = APIs['Quandl']
    
    data = quandl.get_table('QUOTEMEDIA/PRICES',
                            ticker = secs, 
                            date = {'gte':start_date, 'lte':end_date},
                            qopts = {'columns':list(set(['date','ticker']+list(columns)))}
                            )

    data.date = pd.to_datetime(data.date, unit='D')
    print(f"Data   | Quandl | Retrieved {secs} data for dates from {data.date.min():%Y-%m-%d} to {data.date.max():%Y-%m-%d}.")
    
    data.set_index(['date','ticker'],inplace=True)
    data.sort_index(inplace=True)
    
    return data

In [8]:
def getPriceAndVolume(secs,start_date='2008-01-01',end_date='2022-01-31'):
    
    assertCorrectDateFormat(start_date)
    assertCorrectDateFormat(end_date)
    
    prices = pd.DataFrame()
    volumes = pd.DataFrame()
    for sec in secs:
        sec_data = getQuandlEODData(sec,start_date,end_date,('adj_close','adj_volume')).dropna()
        sec_data = sec_data.droplevel('ticker')
        prices[sec] = sec_data.adj_close
        volumes[sec] = sec_data.adj_volume.astype(int)
    
    print(f"Data   | DONE   | Returning {len(prices):d} rows of data from {prices.index.min():%Y-%m-%d} to {prices.index.max():%Y-%m-%d}.")
    
    return prices, volumes
    

In [9]:
elim = ('BNDX','COW','FM','GOVT','SCHB')
secs = ('BIL','DBA','EMB','GLD','HYG',
        'LQD','MBB','PSP','SLV','TIP',
        'UNG','USO','VEA','VNQ','VWO')

p, v = getPriceAndVolume(secs)
p.head()

Data   | Quandl | Retrieved BIL data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved DBA data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved EMB data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved GLD data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved HYG data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved LQD data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved MBB data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved PSP data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved SLV data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved TIP data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved UNG data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved USO data for dates from 2008-01-02 to 2022-01-31.
Data   | Quandl | Retrieved VEA data for dates from 2008-01-02 t

Unnamed: 0_level_0,BIL,DBA,EMB,GLD,HYG,LQD,MBB,PSP,SLV,TIP,UNG,USO,VEA,VNQ,VWO
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2008-01-02,85.2938,32.5637,53.1117,84.86,40.6184,62.2834,72.2863,9.379,15.18,74.3347,1224.0,627.76,32.4341,34.0823,36.6387
2008-01-03,85.238,33.3332,53.2059,85.57,40.5409,62.0779,72.3216,9.3146,15.285,74.5857,1194.24,626.96,32.4884,33.0198,36.991
2008-01-04,85.2201,33.6839,53.2321,85.13,40.4959,62.5887,72.3851,9.0442,15.167,74.5647,1208.0,618.48,31.7761,31.6792,35.7685
2008-01-07,85.2936,33.2358,53.3525,84.77,40.2551,62.5124,72.6393,9.1086,15.053,74.5508,1217.6,604.0,31.8168,31.974,36.0503
2008-01-08,85.2342,33.606,53.4816,86.78,40.0224,62.395,72.5122,8.7695,15.59,74.6066,1228.8,608.48,31.559,30.8671,36.0222


**1-2-4. Define function** to get factor data

In [10]:
def calcRollingReturns(timeSeriesPrices,window=20):
    # output is time-series returns with rolling window
    
    return timeSeriesPrices.pct_change(periods=window)

In [11]:
returns = calcRollingReturns(p)

Unnamed: 0_level_0,BIL,DBA,EMB,GLD,HYG,LQD,MBB,PSP,SLV,TIP,UNG,USO,VEA,VNQ,VWO
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2008-01-02,,,,,,,,,,,,,,,
2008-01-03,,,,,,,,,,,,,,,
2008-01-04,,,,,,,,,,,,,,,
2008-01-07,,,,,,,,,,,,,,,
2008-01-08,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-25,-0.0002,0.0125,-0.0354,0.0190,-0.0246,-0.0372,-0.0139,-0.1266,0.0323,-0.0203,0.0406,0.1183,-0.0500,-0.0789,-0.0071
2022-01-26,0.0000,0.0202,-0.0396,0.0068,-0.0275,-0.0425,-0.0198,-0.1252,0.0212,-0.0259,0.0804,0.1282,-0.0545,-0.0966,-0.0140
2022-01-27,0.0001,-0.0010,-0.0332,-0.0059,-0.0302,-0.0350,-0.0157,-0.1200,-0.0043,-0.0228,0.1931,0.1231,-0.0605,-0.1175,-0.0186
2022-01-28,-0.0002,0.0182,-0.0347,-0.0159,-0.0270,-0.0378,-0.0138,-0.1162,-0.0295,-0.0244,0.3649,0.1247,-0.0540,-0.0928,-0.0294


In [14]:
def calcRollingVolatility(timeSeriesPrices,window=20):
    # output is time-series volatility with rolling window
    
    return timeSeriesPrices.rolling(window).std()/np.sqrt(window/225)

In [15]:
calcRollingVolatility(p)

Unnamed: 0_level_0,BIL,DBA,EMB,GLD,HYG,LQD,MBB,PSP,SLV,TIP,UNG,USO,VEA,VNQ,VWO
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2008-01-02,,,,,,,,,,,,,,,
2008-01-03,,,,,,,,,,,,,,,
2008-01-04,,,,,,,,,,,,,,,
2008-01-07,,,,,,,,,,,,,,,
2008-01-08,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-25,0.0253,0.6945,5.3642,5.2006,2.1797,5.4000,2.2289,2.0439,1.9826,4.4861,3.0275,8.4184,2.8360,12.5523,2.2457
2022-01-26,0.0253,0.7164,5.2612,5.1088,2.2523,5.3074,2.1897,2.1976,1.9882,4.4466,3.0479,8.5496,3.2922,13.3194,2.4024
2022-01-27,0.0255,0.7158,4.9904,5.3134,2.4135,5.2109,2.0994,2.3412,1.9988,4.2254,3.2883,8.6516,3.7431,14.1207,2.6521
2022-01-28,0.0273,0.7386,4.3946,5.7113,2.4242,4.7583,1.9174,2.3920,2.0756,3.6272,3.6434,8.6244,4.0240,13.6025,2.8883


**1-2-5. Define function** to get trading positions

In [None]:
def calcSecurityRanking(factorData,cut=10,ascending=True,rebalance='M'):
    # W - Weekly, M - Monthly, Q - Quarterly, A - Anually
    rollingMean = factorData.resample(rebalance).mean()
    rank = pd.DataFrame(0, columns=rollingMean.columns, index=rollingMean.index)
    for i,row in rollingMean.iterrows():
        pd.qcut(row,cut)

def splitWinnerLoser(secData,cut=10,acending=True,rebalance='M'):
    return secData