In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from numba import jit
from numba import float64
from numba import int64

In [None]:
data = pd.read_csv(r'C:\Code Shit\trade-20200802.csv')
data = data.append(pd.read_csv(r'C:\Code Shit\trade-20200803.csv'))
data = data.append(pd.read_csv(r'C:\Code Shit\trade-20200804.csv'))

data = data[data.symbol =='XBTUSD']
print(data.head())
#time stamping
data['timestamp'] = data.timestamp.map(lambda t: datetime.strptime(t[:-3], "%Y-%m-%dD%H:%M:%S.%f"))


In [None]:
#volume weighted average for 15 minute intervals
def compute_vwap(df):
    q = df['foreignNotional']
    p = df['price']
    vwap = np.sum(p * q) / np.sum(q)
    df['vwap'] = vwap
    return df


data_timeidx = data.set_index('timestamp') #sets timestamp column as the index
data_time_grp = data_timeidx.groupby(pd.Grouper(freq = '10Min')) # groupby rearanges the group on the terms of pd.Grouper which will group every 15 minutes, defaulting to the index
num_time_bars = len(data_time_grp) # finds the number of bars based on the number of 15 minute intervals
data_time_vwap = data_time_grp.apply(compute_vwap) # compputes the vwap using the new 15 min window   

In [None]:
# plotting vwap figures
plt.figure(figsize = (13, 5))
data_time_vwap['vwap'].plot()


In [None]:
# construct tick bars that will sample every n transactions
total_ticks = len(data)
num_ticks_per_bar = total_ticks / num_time_bars
num_ticks_per_bar = round(num_ticks_per_bar, -3) # nearest thousandth
data_tick_grp = data.reset_index().assign(grpId = lambda row: row.index // num_ticks_per_bar) #lambda function

data_tick_vwap = data_tick_grp.groupby('grpId').apply(compute_vwap) #group by new index and compute vwap
data_tick_vwap.set_index('timestamp', inplace = True)

plt.figure(figsize = (13, 5))
data_time_vwap['vwap'].plot()
data_tick_vwap['vwap'].plot() #showing here, we can see a trading opportunity in the form of a flash crash just before market day 08-02

In [None]:
# Now construct Volume bars which will sample after n contracts
data_cm_vol = data.assign(cmVol = data['homeNotional'].cumsum())
total_vol = data_cm_vol.cmVol.values[-1]
vol_per_bar = total_vol / num_time_bars
vol_per_bar = round(vol_per_bar, -2)

data_vol_grp = data_cm_vol.assign(grpId = lambda row: row.cmVol // vol_per_bar) #floor division 

data_vol_vwap = data_vol_grp.groupby('grpId').apply(compute_vwap)
data_vol_vwap.set_index('timestamp', inplace = True)

#plot the data
plt.figure(figsize = (13, 5))
data_time_vwap['vwap'].plot()
data_vol_vwap['vwap'].plot()

#show differences
print("The peak achieved with volume barsticks is:", data_vol_vwap['vwap'].max())
print("The peak achieved with tick bars is:", data_tick_vwap['vwap'].max())
print("The min achieved with volume bars is :", data_vol_vwap['vwap'].min())
print("The min achieved with tick bars is:", data_tick_vwap['vwap'].min())

In [None]:
#Constructing Dollar bars

#create new colum cmDol and make it the cumulative sum of 'foreignNotional' propogating foward
data_cm_dol = data.assign(cmDol = data['foreignNotional'].cumsum()) #price
total_dol = data_cm_dol.cmDol.values[-1] 
dol_per_bar = total_dol / num_time_bars
dol_per_bar = round(dol_per_bar, -2)

data_dol_grp = data_cm_dol.assign(grpId = lambda row: row.cmDol // dol_per_bar) #floor division

data_dol_vwap = data_dol_grp.groupby('grpId').apply(compute_vwap)
data_dol_vwap.set_index('timestamp', inplace = True)

#plot figures
plt.figure(figsize = (13, 5))
data_time_vwap['vwap'].plot()
data_dol_vwap['vwap'].plot()

#show differences
print("The peak achieved with dollar barsticks is:", data_dol_vwap['vwap'].max())
print("The peak achieved with tick bars is:", data_tick_vwap['vwap'].max())
print("The min achieved with dollar bars is:", data_dol_vwap['vwap'].min())
print("The min achieved with tick bars is:", data_tick_vwap['vwap'].min())

In [None]:
def determine_directions(directions):
    if directions in ('PlusTick', 'ZeroPlusTick'):
        return 1
    elif directions in('MinusTick', 'ZeroMinusTick'):
        return -1
    else:
        print(str(directions), ", not computable.")

In [None]:
data_timeidx['tickDirection'] = data_timeidx.tickDirection.map(determine_directions);

In [24]:
data_signed_flow = data_timeidx.assign(bv = data_timeidx.tickDirection * data_timeidx['size']);

In [25]:
def ewma(arguments, window):
    n = arguments.shape[0]
    ewma = np.empty(n, dtype = np.float64)
    alpha = 2 /  float(window+1) #smoothing multiplier
    w = 1
    ewma_old = arguments[0]
    ewma[0] = ewma_old
    for i in range(1,n):
        w += (1- alpha)**i
        ewma_old = ewma_old*(1 - alpha) + arguments[i]
        ewma[i] = ewma_old/w
    return ewma

In [None]:
abs_Ebv_init = np.abs(data_signed_flow['bv'].mean())
E_T_init = 50000 #this is the number of ticks

def compute_Ts(bvs, E_T_init, abs_Ebv_init):
    Ts, i_s = [], []
    i_prev, E_T, abs_Ebv = 0, E_T_init, abs_Ebv_init
    
    n = bvs.shape[0]
    bvs_val = bvs.values.astype(np.float64)
    abs_thetas, thresholds = np.zeros(n), np.zeros(n)
    abs_thetas[0], cur_theta = np.abs(bvs_val[0]), bvs_val[0]
    
    for i in range(1, n):
        cur_theta += bvs_val[i]
        abs_theta = np.abs(cur_theta)
        abs_thetas[i] = abs_theta
        
        threshold = E_T * abs_Ebv
        thresholds[i] = threshold
        
        if abs_theta >= threshold:
            cur_theta = 0
            Ts.append(np.float64(i - i_prev))
            i_s.append(i)
            i_prev = i
            E_T = ewma(np.array(Ts), window = np.int64(len(Ts)))[-1]
            absEbv = np.abs( ewma(bvs_val[:i], window = np.int64(E_T_init * 3))[-1] ) #window of 3 bars
    return Ts, abs_thetas, thresholds, i_s
Ts, abs_thetas, thresholds, i_s = compute_Ts(data_signed_flow.bv, E_T_init, abs_Ebv_init)

In [None]:
data_signed_flow['bv'].head()