## Python 3 Notebook
### Libraries Used

In [2]:
import pandas as pd
import numpy as np

### Configuration Used

In [10]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Estimate the Uniqueness

In [4]:
def mp_num_co_events(close_idx, t1, molecule):
    
    # find events that span the period
    t1 = f1.fillna(close_idx[-1]) # unclosed events must still impact other weights
    t1 = t1[t1>=molecule[0]] # events that end at or after molecule[0]
    t1 = t1.loc[:tl[molecule].max()] # events that start at or before t1[molecule].max()
    
    # count events spanning a bar
    iloc = close_idx.searchsorted(np.array([t1.index[0], t1.max()]))
    count = pd.Series(0, index=close_idx[iloc[0]:iloc[1]+1])
    for t_in, t_out in t1.iteritems():
        count.loc[t_in:t_out]+=1.0
    
    return count.loc[molecule[0]:t1[molecule].max()]

### Estimate the Average Uniqueness

In [5]:
def mp_sample_tw(t1, num_co_events, molecule):
    
    # derive average uniqueness over the events lifespan
    wght = pd.Series(index=molecule)
    for t_in, t_out in t1.loc[wght.index].iteritems():
        wght.loc[t_in]=(1./num_co_events.loc[t_in:t_out]).mean()
    
    return wght

In [6]:
# Need a mp_pandas_obj here to test

### Build a Indicator Matrix

In [7]:
def get_ind_matrix(bar_ix, t1):
    """
        get indicator matrix
    """
    
    ind_m = pd.DataFrame(0, index=bar_ix, columns=range(t1.shape[0]))
    for i, (t0, t1) in enumerate(t1.iteritems()):
        ind_m.loc[t0:t1, i]=1
        
    return ind_m

### Compute Average Uniqueness

In [8]:
def get_avg_uniqueness(ind_m):
    """
        average uniqueness from indicator matrix
    """
    
    c = ind_m.sum(axis=1) # concurrency
    u = ind_m.div(c, axis=0) # uniqueness
    avg_u = u[u>0].mean() # average uniqueness
    
    return avg_u

### Return Sample From Sequential Bootstrap

In [18]:
def seq_bootstrap(ind_m, s_len=None):
    """
        generate a sample via sequential bootstrap 
    """
    
    if s_len is None: s_len=ind_m.shape[1]
    phi=[]
    
    while len(phi)< s_len:
        avg_u=pd.Series()
        for i in ind_m:
            ind_m_ = ind_m[phi+[i]] # reduce ind_m
            avg_u.loc[i]=get_avg_uniqueness(ind_m_).iloc[-1]
        prob=avg_u/avg_u.sum() # draw prob
        phi+=[np.random.choice(ind_m.columns, p=prob)]
        
    return phi

### Example of Sequential Bootsrap

In [25]:
t1 = pd.Series([2,3,5], index=[0,2,4]) # for each feature obs
bar_ix = range(t1.max()+1) # index of bars

ind_m = get_ind_matrix(bar_ix, t1)
phi = np.random.choice(ind_m.columns, size=ind_m.shape[1])

f"phi: {phi}"

avg_u = get_avg_uniqueness(ind_m[phi]).mean()
phi = seq_bootstrap(ind_m)

f"standard uniqueness: {avg_u}"
f"phi: {phi}"

seq_u = get_avg_uniqueness(ind_m[phi]).mean()

f"sequential uniqueness: {seq_u}"

'phi: [2 2 1]'

'standard uniqueness: 0.6666666666666666'

'phi: [0, 2, 1]'

'sequential uniqueness: 0.8611111111111112'