# Get Bins

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

In [12]:
events = pd.read_csv("../data/progress3.csv")
events = events.set_index(pd.to_datetime(events.time)).drop(["time"], axis=1)
events["t1"] = pd.to_datetime(events.t1)

t1 = pd.read_csv("../data/t1.csv")
t1 = t1.set_index(pd.to_datetime(t1.time)).drop(["time"], axis=1)
t1 = pd.Series(pd.to_datetime(t1.v_bar))

df = pd.read_csv("../data/progress2.csv")
df = df.set_index(pd.to_datetime(df.time)).drop(["time"], axis=1)

In [13]:
df

Unnamed: 0_level_0,price,fast,slow
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-08 19:54:22.456,40095.39,40095.390000,40095.390000
2021-01-08 19:54:31.003,40119.01,40107.393607,40107.283759
2021-01-08 19:54:39.317,40129.44,40114.984643,40114.774176
2021-01-08 19:54:42.244,40129.35,40118.754504,40118.496027
2021-01-08 19:54:44.811,40102.96,40115.385124,40115.300053
...,...,...,...
2021-01-10 23:26:03.620,33291.65,33363.687809,33380.713857
2021-01-10 23:26:05.362,33287.33,33361.224654,33379.398591
2021-01-10 23:26:06.023,33238.66,33357.270956,33377.416357
2021-01-10 23:26:06.506,33300.83,33355.450280,33376.337676


In [14]:
def get_bins(events, close, t1=None):
    '''
    Compute event's outcome (including side information, if provided).
    events is a DataFrame where:
    -events.index is event's starttime
    -events['t1'] is event's endtime
    -events['trgt'] is event's target
    -events['side'] (optional) implies the algo's position side
    -t1 is original vertical barrier series
    Case 1: ('side' not in events): bin in (-1,1) <-label by price action
    Case 2: ('side' in events): bin in (0,1) <-label by pnl (meta-labeling)
    '''
    #1) prices aligned with events
    events_=events.dropna(subset=['t1'])
    px=events_.index.union(events_['t1'].values).drop_duplicates()
    px=close.reindex(px,method='bfill')
    #2) create out object
    out=pd.DataFrame(index=events_.index)
    out['ret']=px.loc[events_['t1'].values].values/px.loc[events_.index]-1
    if 'side' in events_:
        out['ret']*=events_['side'] # meta-labeling
    out['bin']=np.sign(out['ret'])
    
    if 'side' not in events_:
        # only applies when not meta-labeling
        # to update bin to 0 when vertical barrier is touched, we need the original
        # vertical barrier series since the events['t1'] is the time of first 
        # touch of any barrier and not the vertical barrier specifically. 
        # The index of the intersection of the vertical barrier values and the 
        # events['t1'] values indicate which bin labels needs to be turned to 0
        vtouch_first_idx = events[events['t1'].isin(t1.values)].index
        out.loc[vtouch_first_idx, 'bin'] = 0.
    
    if 'side' in events_:
        out.loc[out['ret']<=0,'bin']=0 # meta-labeling
    return out

In [15]:
bins = get_bins(events,df.price, t1).dropna()

In [18]:
bins

Unnamed: 0_level_0,ret,bin
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-09 20:37:17.462,-0.000698,0.0
2021-01-09 21:01:09.429,0.000682,1.0
2021-01-09 22:26:08.810,-0.001056,0.0
2021-01-10 02:07:16.653,-0.0005,0.0
2021-01-10 02:19:05.256,0.000384,1.0
2021-01-10 03:22:13.879,-0.000858,0.0
2021-01-10 04:28:39.081,-0.00038,0.0
2021-01-10 06:36:19.767,0.000294,1.0
2021-01-10 08:36:19.155,0.00039,1.0
2021-01-10 09:11:25.907,-0.000921,0.0


In [8]:
side = events.dropna().side
side.head()

time
2021-01-09 20:37:17.462    1.0
2021-01-09 21:01:09.429    1.0
2021-01-09 22:26:08.810    1.0
2021-01-10 02:07:16.653   -1.0
2021-01-10 02:19:05.256   -1.0
Name: side, dtype: float64

In [9]:
merge = pd.merge_asof(bins, side.to_frame().rename(columns={0:'side'}),
                   left_index=True, right_index=True, direction='forward')
merge

Unnamed: 0_level_0,ret,bin,side
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-09 20:37:17.462,-0.000698,0.0,1.0
2021-01-09 21:01:09.429,0.000682,1.0,1.0
2021-01-09 22:26:08.810,-0.001056,0.0,1.0
2021-01-10 02:07:16.653,-0.0005,0.0,-1.0
2021-01-10 02:19:05.256,0.000384,1.0,-1.0
2021-01-10 03:22:13.879,-0.000858,0.0,1.0
2021-01-10 04:28:39.081,-0.00038,0.0,-1.0
2021-01-10 06:36:19.767,0.000294,1.0,-1.0
2021-01-10 08:36:19.155,0.00039,1.0,-1.0
2021-01-10 09:11:25.907,-0.000921,0.0,1.0


In [10]:
merge.to_csv("../data/progress4.csv")