# Chapter 3: Labeling

## 3.3 Computing Dynamic Thresholds

### SNIPPET 3.1 DAILY VOLATILITY ESTIMATES

In [3]:
def get_daily_vol(close, span0=100):
    """
    daily vol, reindexed to close
    
    Arguments:
    close -- daily close (probably Pandas series)
    span0 -- span lol (probably int)
    Purpose:
    use the output of this function to set default profit taking and stop-loss limit
    """
    df0 = close.index.searchsorted(close.index-pd.Timedelta(days=1))
    df0=df0[df0>0]
    df0=pd.Series(close.index[df0-1], index=close.index[close.shape[0]-df0.shape[0]:])
    df0=close.loc[df0.index]/close.loc[df0.values].values-1 # daily returns
    df0=df0.ewm(span-span0).std()
    return df0

## 3.4 The Triple-Barrier Method

### SNIPPET 3.2 TRIPLE-BARRIER LABELING METHOD

In [5]:
def apply_triple_barrier(close, events, pt_sl, molecule):
    """
    apply stop loss /profit taking, if it takes place between t1 (end of event)
    
    Arguments:
    close -- pandas series of prices
    events -- pandas dataframe with columns:
        t1: The timestamp of vertical barrier. When the value is np.nan, there will not be a vertical barrier
        trgt: The unit width of the horizontal barriers
    pt_sl -- a list of two non-negative float values:
        pt_sl[0] -- the factor that multiplies trgt to set the width of the upper barrier. If 0, there will not be an upper barrier
        pt_sl[1] -- the factor that multiples trgt to set the width of the lower barrier. If 0, there will not be a lower barrier
    molecule -- A list with the subset of event indices that will be processed by a single thread
    
    Output:
    The output from this function is a pandas dataframe containing the timestamps (if any) at which each barrier was touched.
    """
    events0 = events.loc[molecule]
    out = events0[['t1']].copy(deep=True)
    if pt_sl[0]>0:
        pt=pt_sl[0]*events0['trgt']
    else:
        pt=pd.Series(index=events.index) #NaNs
    if pt_sl[1]>0:
        sl=-pt_sl[1]*events0['trgt']
    else:
        sl=pd.Series(index=events.index) #NaNs
    for loc, t1 in events0['t1'].fillna(close.index[-1]).iteritems():
        df0=close[loc:t1] #path prices
        df0=(df0/close[loc]-1)*events0.at[loc, 'side'] #path returns
        out.loc[loc, 'sl']=df0[df0<sl[loc]].index.min() #earliest stop loss
        out.loc[loc, 'pt']=df0[df0>pt[loc]].index.min() #earliest profit taking
    return out

## 3.5 Learning Side and Size

### SNIPPET 3.3 GETTING THE TIME OF FIRST TOUCH

In [6]:
def get_events(close, t_events, pt_sl, trgt, min_ret, num_threads, t1=False):
    """
    finds the time of the first barrier touch
    
    Arguments:
    close -- a pandas series of prices
    t_events -- the pandas timeindex containing the timestamps that will seed every triple barrier. These are the timestamps discussed in section 2.5
    pt_sl -- a non-negative float that sets the width of the two barriers. A 0 value means that the respective horizontal barrier (profit taking and/or stop loss) will be disabled
    t1 -- a pandas series with the timestamps of the vertical barriers. We pass a False when we watn to disable vertical barriers
    trgt -- a pandas series of targets, expressed in terms of absolute returns
    min_ret -- the minimum target return required for running a triple barrier search
    num_threads -- the number of threads concurrently used by the function
    
    Output:
    pandas dataframe with columns
    t1 -- the timestamp at which the first barrier is touched
    trgt -- the target that was used to generate the horizontal barriers
    """
    #Get target
    trgt = trgt.loc[t_events]
    trgt = trgt[trgt>min_ret] #min_ret
    
    #Get t1 (max holding period)
    if t1 is False:
        t1=pd.Series(pd.NaT, index=t_events)
    # form events object, apply stop loss on t1
    side0 = pd.Series(1., index=trgt.index)
    events=pd.concat({'t1':t1, 'trgt':trgt, 'side':side0},
                    axis=1).dropna(subset=['trgt'])
    df0=mpPandasObj(func=apply_triple_barrier, pdObj=('molecule', events.index),
                   numThreads=num_threads, close=close, events=events, ptSL=[pt_sl, pt_sl])
    events['t1']=df0.dropna(how='all').min(axis=1) #pd.min ignores nan
    events=events.drop('side', axis=1)
    return events

### SNIPPET 3.4 ADDING A VERTICAL BARRIER

In [None]:
t1 = close.index.searchsorted(t_events+pd.Timedelta(days=num_days))
t1 = t1[t1<close.shape[0]]
t1 = pd.Series(close.index[t1], index=t_events[:t1.shape[0]]) #NaNs at end

### SNIPPET 3.5 LABELING FOR SIDE AND SIZE

In [7]:
def get_bins(events, close):
    """
    Output:
    dataframe with columns:
    ret -- the return realized at the time of the first touched barrier
    bin -- the label, {-1, 0, 1}, as a function of the sign of the outcome. The funciton can be easily adjusted to label as 0 those events when the vertical barrier was touched first"""
    # prices aligned with events
    events0 = events.dropna(subset=['t1'])
    px=events0.index.union(events0['t1'].values).drop_duplicates()
    px=close.reindex(px, method='bfill')
    # Create out object
    out=pd.DataFrame(index=events0.index)
    out['ret']=px.loc[events0['t1'].values].values/px.loc[events0.index]-1
    out['bin']=np.sign(out['ret'])
    return out