# Get T Events

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

In [3]:
df = pd.read_csv("../data/progress.csv")
df = df.set_index(pd.to_datetime(df.time)).drop(["time"], axis=1)

In [4]:
def get_t_events(g_raw, h):
    t_events, s_pos, s_neg = [], 0, 0
    diff = np.log(g_raw).diff().dropna()
    for i in diff.index[1:]:
        try:
            pos, neg = float(s_pos+diff.loc[i]), float(s_neg+diff.loc[i])
        except Exception as e:
            print(e)
            print(s_pos+diff.loc[i], type(s_pos+diff.loc[i]))
            print(s_neg+diff.loc[i], type(s_neg+diff.loc[i]))
            break
        s_pos, s_neg=max(0., pos), min(0., neg)
        if s_neg<-h:
            s_neg=0;t_events.append(i)
        elif s_pos>h:
            s_pos=0;t_events.append(i)
    return pd.DatetimeIndex(t_events)

In [5]:
t_events = get_t_events(df.close,h=df.vol.mean())

In [6]:
t_events

DatetimeIndex(['2021-01-08 19:57:56.847000', '2021-01-08 20:01:42.971000',
               '2021-01-08 20:02:14.242000', '2021-01-08 20:02:40.178000',
               '2021-01-08 20:04:59.035000', '2021-01-08 20:13:07.605000',
               '2021-01-08 20:18:48.288000', '2021-01-08 20:21:14.739000',
               '2021-01-08 20:22:50.773000', '2021-01-08 20:23:21.807000',
               ...
               '2021-01-10 23:23:15.016000', '2021-01-10 23:23:42.399000',
               '2021-01-10 23:23:55.817000', '2021-01-10 23:24:03.733000',
               '2021-01-10 23:24:24.929000', '2021-01-10 23:24:28.092000',
               '2021-01-10 23:25:02.191000', '2021-01-10 23:25:37.310000',
               '2021-01-10 23:25:54.102000', '2021-01-10 23:26:00.159000'],
              dtype='datetime64[ns]', length=3528, freq=None)

## Add vertical barrier

In [7]:
def add_vertical_barrier(t_events, close, num_days=1):
    t1=close.index.searchsorted(t_events+pd.Timedelta(days=num_days))
    t1=t1[t1<close.shape[0]]
    t1=(pd.Series(close.index[t1],index=t_events[:t1.shape[0]]))
    return t1

In [8]:
t1 = add_vertical_barrier(t_events, df.close, num_days=1)
t1

2021-01-08 19:57:56.847   2021-01-09 19:58:02.136
2021-01-08 20:01:42.971   2021-01-09 20:01:46.999
2021-01-08 20:02:14.242   2021-01-09 20:02:18.078
2021-01-08 20:02:40.178   2021-01-09 20:02:50.652
2021-01-08 20:04:59.035   2021-01-09 20:05:07.081
                                    ...          
2021-01-09 23:00:05.626   2021-01-10 23:00:06.758
2021-01-09 23:13:07.833   2021-01-10 23:13:08.116
2021-01-09 23:14:01.039   2021-01-10 23:14:02.174
2021-01-09 23:14:13.910   2021-01-10 23:14:15.963
2021-01-09 23:17:58.636   2021-01-10 23:17:58.851
Name: time, Length: 739, dtype: datetime64[ns]

In [22]:
t1

time
2021-01-08 19:57:56.847   2021-01-09 19:58:02.136
2021-01-08 20:01:42.971   2021-01-09 20:01:46.999
2021-01-08 20:02:14.242   2021-01-09 20:02:18.078
2021-01-08 20:02:40.178   2021-01-09 20:02:50.652
2021-01-08 20:04:59.035   2021-01-09 20:05:07.081
                                    ...          
2021-01-09 23:00:05.626   2021-01-10 23:00:06.758
2021-01-09 23:13:07.833   2021-01-10 23:13:08.116
2021-01-09 23:14:01.039   2021-01-10 23:14:02.174
2021-01-09 23:14:13.910   2021-01-10 23:14:15.963
2021-01-09 23:17:58.636   2021-01-10 23:17:58.851
Name: time, Length: 739, dtype: datetime64[ns]

In [32]:
t1.index.names = ['time']
t1.rename("v_bar", inplace=True)
t1.to_csv("../data/t1.csv")

In [33]:
t1

time
2021-01-08 19:57:56.847   2021-01-09 19:58:02.136
2021-01-08 20:01:42.971   2021-01-09 20:01:46.999
2021-01-08 20:02:14.242   2021-01-09 20:02:18.078
2021-01-08 20:02:40.178   2021-01-09 20:02:50.652
2021-01-08 20:04:59.035   2021-01-09 20:05:07.081
                                    ...          
2021-01-09 23:00:05.626   2021-01-10 23:00:06.758
2021-01-09 23:13:07.833   2021-01-10 23:13:08.116
2021-01-09 23:14:01.039   2021-01-10 23:14:02.174
2021-01-09 23:14:13.910   2021-01-10 23:14:15.963
2021-01-09 23:17:58.636   2021-01-10 23:17:58.851
Name: v_bar, Length: 739, dtype: datetime64[ns]

## Get events

In [9]:
def get_events(close, t_events, ptsl, trgt, min_ret, t1=False, side=None):
    #1) get target
    trgt=trgt.reindex(t_events)
    trgt=trgt[trgt>min_ret] # min_ret
    #2) get t1 (max holding period)
    if t1 is False:
        t1=pd.Series(pd.NaT, index=t_events)
    #3) form events object, apply stop loss on t1
    if side is None:
        side_,ptsl_=pd.Series(1.,index=trgt.index), [ptsl[0],ptsl[0]]
    else: 
        side_,ptsl_=side.reindex(trgt.index),ptsl[:2] #side.loc[trgt.index],ptsl[:2]
    events=(pd.concat({'t1':t1,'trgt':trgt,'side':side_}, axis=1)
            .dropna(subset=['trgt']))
    df0 = apply_triple_barrier(close, events, ptsl_, events.index)
    events['t1']=df0.dropna(how='all').min(axis=1) # pd.min ignores nan
    if side is None:
        events=events.drop('side',axis=1)
    return events

def apply_triple_barrier(close, events, pt_sl, molecule):
    """
    apply stop loss /profit taking, if it takes place between t1 (end of event)
    
    Arguments:
    close -- pandas series of prices
    events -- pandas dataframe with columns:
        t1: The timestamp of vertical barrier. When the value is np.nan, there will not be a vertical barrier
        trgt: The unit width of the horizontal barriers
    pt_sl -- a list of two non-negative float values:
        pt_sl[0] -- the factor that multiplies trgt to set the width of the upper barrier. If 0, there will not be an upper barrier
        pt_sl[1] -- the factor that multiples trgt to set the width of the lower barrier. If 0, there will not be a lower barrier
    molecule -- A list with the subset of event indices that will be processed by a single thread
    
    Output:
    The output from this function is a pandas dataframe containing the timestamps (if any) at which each barrier was touched.
    """
    events0 = events.loc[molecule]
    out = events0[['t1']].copy(deep=True)
    if pt_sl[0]>0:
        pt=pt_sl[0]*events0['trgt']
    else:
        pt=pd.Series(index=events.index) #NaNs
    if pt_sl[1]>0:
        sl=-pt_sl[1]*events0['trgt']
    else:
        sl=pd.Series(index=events.index) #NaNs
    for loc, t1 in events0['t1'].fillna(close.index[-1]).iteritems():
        df0=close[loc:t1] #path prices
        df0=(df0/close[loc]-1)*events0.at[loc, 'side'] #path returns
        out.loc[loc, 'sl']=df0[df0<sl[loc]].index.min() #earliest stop loss
        out.loc[loc, 'pt']=df0[df0>pt[loc]].index.min() #earliest profit taking
    return out

In [10]:
side = pd.read_csv("../data/side.csv")
side = side.set_index(pd.to_datetime(side.time)).drop(["time"], axis=1)
side = pd.Series(side["0"])

In [11]:
min_ret = .0001 
ptsl=[0.1,0.2]

ma_events = get_events(df.close,t_events,ptsl,df.vol,min_ret,t1=t1,side=side)

In [12]:
ma_events.side.value_counts()

-1.0    13
 1.0    11
Name: side, dtype: int64

In [13]:
ma_events

Unnamed: 0,t1,trgt,side
2021-01-09 19:56:34.575,2021-01-10 19:56:34.636,0.000510,
2021-01-09 19:58:15.444,2021-01-10 19:58:16.218,0.001966,
2021-01-09 19:58:30.182,2021-01-10 19:58:31.098,0.002218,
2021-01-09 20:01:02.852,2021-01-10 20:01:03.829,0.001612,
2021-01-09 20:03:06.049,2021-01-10 20:03:07.258,0.002897,
...,...,...,...
2021-01-10 23:24:28.092,NaT,0.002268,
2021-01-10 23:25:02.191,NaT,0.002139,
2021-01-10 23:25:37.310,NaT,0.001870,
2021-01-10 23:25:54.102,NaT,0.001681,


In [19]:
ma_events.index.names = ['time']
ma_events.to_csv("../data/progress3.csv")