[FINM 33150] Regression Analysis and Quantitative Trading Strategies\
Winter 2022 | Professor Brian Boonstra

# HW #3 Financial Ratio Quantile Strategies

_**Due:** Thursday, February 3rd, at 11:00pm\
**Name:** Ashley Tsoi (atsoi, Student ID: 12286230)_

### 1. Fetch and clean data

#### 1-1. Import packages

In [12]:
import os
# from pathlib import Path
import functools
from collections import defaultdict
import warnings

# import quandl
import json
import pandas as pd
from pandas.tseries.offsets import DateOffset
pd.set_option("display.precision", 4)
pd.set_option('display.float_format', lambda x: '%.4f' % x)
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

import math
import numpy as np
import datetime as dt
from dateutil.relativedelta import relativedelta

# let plot display in the notebook instead of in a different window
%matplotlib inline 
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = [21, 8]

#### 1-2. Fetch data

**1-2-1. Read 2021 crypto data from local**

In [8]:
tables = {'book':['BTC-USD','ETH-BTC','ETH-USD'],
          'trades':['BTC-USD','ETH-BTC','ETH-USD']}

crypto = defaultdict()
for type,pairs in tables.items():
    for p in pairs:
        data = pd.read_csv('../data_large/crypto/'+type+'_narrow_'+p+'_2021.delim.gz', sep='\t').set_index(['received_utc_nanoseconds'])
        data.index = pd.to_datetime(data.index, unit='ns')
        data.sort_index(inplace=True)
        crypto[type+'_'+p[:3].lower()+'_'+p[4:].lower()] = data

In [9]:
bc = list(crypto['book_btc_usd'].columns)
tc = list(crypto['trades_btc_usd'].columns)
print(f'Book tables have columns: \n{bc}\n --------------------\nTrades tables have columns: \n{tc}')

Book tables have columns: 
['Ask1PriceMillionths', 'Bid1PriceMillionths', 'Ask1SizeBillionths', 'Bid1SizeBillionths', 'Ask2PriceMillionths', 'Bid2PriceMillionths', 'Ask2SizeBillionths', 'Bid2SizeBillionths', 'timestamp_utc_nanoseconds', 'Mid']
 --------------------
Trades tables have columns: 
['timestamp_utc_nanoseconds', 'PriceMillionths', 'SizeBillionths', 'Side']


#### 1-3. Define functions to add information columns for trading

**1-3-1. Define function to calculate $\tau$-interval trade flow**

$\tau$ is in seconds

In [10]:
# Return a pd Series of F_tau
@functools.lru_cache(maxsize=16) # Cache the function output
def calcF_tau(trades_table,tau=1):
    assert 'SizeBillionths' in trades_table.columns
    assert 'Side' in trades_table.columns
    
    F = trades_table['SizeBillionths'] * trades_table['Side']
    if tau==1/1000000000:
        return F
    
    return F.rolling(str(tau)+'s').shift(1)

In [11]:
trades_tables = crypto[[k for k in crypto.keys() if k.startswith('trades')]]
tau_range = [1,2,5,60,120,300,3600] # 1 second, 2 seconds, 5 seconds, 1 min, 2 min, 5 min, 1 hour
for t,t_data in trades_tables.items():
    crypto[t]['']

TypeError: unhashable type: 'list'

**1-3-2. Define function to calculate t-second forward returns**

In [22]:
r = pd.Series(index=crypto['trades_btc_usd'].index)
k = 3
for i,v in r.items():
    print(i)
    k-=1
    if k==0: break 

2021-04-10 21:28:56.118594
2021-04-10 21:28:56.118837
2021-04-10 21:28:56.147583


  r = pd.Series(index=crypto['trades_btc_usd'].index)


In [None]:
# Return a pd Series of T-second forward returns
@functools.lru_cache(maxsize=16) # Cache the function output
def calcR_t(trades_price,T):
    idx = trades_price.index
    i_0 = idx.pop()
    r = pd.Series(index=idx)
    for i,p in r.items():    
        i_t = i + DateOffset(seconds=T) # target time
        while idx and i_0 < i_t:
            i_0 = idx.pop()
        if i_0 >= i_t: # second check in case last item fits criteria
            r.loc[i_0] = trades_price.loc[i_0]/p - 1
        else: break
    
    return r

**1-3-3. Define function to get regression $\beta$**

#### 1-4. Split data for test & training

In [None]:
def splitTestTraining(table_dict,train_portion=0.2):
    train,test = defaultdict(),defaultdict()
    for t,t_data in table_dict.items():
        train_size = int(train_portion*len(t_data))
        train[t] = t_data.iloc[:train_size]
        test[t] = t_data.iloc[train_size:]
    return train,test

# train,test = splitTestTraining(crypto)

### 2. Trade

In [None]:
def calcTradeStats(table,date_col_name,price_col_name,init_cash):
    # table must contain columns (1) price*volumn (2) position
    # returns columns: [table.columns] + [signal, position_value, cash, total_value, PnL_daily, PnL_cumulative]

    if date_col_name not in table.index: raise IndexError(f'{date_col_name} must be in table index.')
    if price_col_name not in table.columns: raise ValueError(f'{price_col_name} must be a table column.')
    if 'position' not in table.columns: raise ValueError('position must be a table column.')

    table['signal'] = [table.position[0]] + table.position.diff()[1:].astype(np.int16).tolist()
    table['position_value'] = table[price_col_name] * table.position
    table['cash'] = ([init_cash-table.position_value[0]] + (-1*(table.signal*table[price_col_name])[1:]).tolist()).cumsum()
    table['total_value'] = table.position_value + table.cash
    table['PnL_daily'] = table.total_value.pct_change()
    table['PnL_cumulative'] = table.total_value/init_cash - 1

    return table