# Local Exchangeability Experiments

### Package Import

In [1]:
import pandas as pd
import numpy as np
import sys
pd.set_option('display.max_rows', None)
import bokeh.plotting as bkp
import bokeh.io as bki
import bokeh.palettes as bkpal
from bokeh.models import Span, FuncTickFormatter
import time


### Bokeh Notebook Output and Theme Settings

In [2]:
bki.output_notebook()

In [3]:
from bokeh.io import curdoc
from bokeh.themes import Theme

production = False

if production:
    # Production-quality settings
    fsz = '190pt'
    figsz = 6000
    glyphw = 160
    glyphh = 160
    lw = 30
    glw = 5
else:
    #Notebook-quality settings
    fsz = '16pt'
    figsz = 600
    glyphw = 20
    glyphh = 20
    lw = 3
    glw = 1

curdoc().theme = Theme(json={'attrs': {
    'Figure': {
        'width': figsz,
        'height': figsz
    },
    # apply defaults to Grid properties
    'Grid': {
        'grid_line_width': glw
    },
    # apply defaults to Line/Span properties
    'Line': {
        'line_width': lw
    },
    'Span': {
        'line_width': lw
    },
    # apply defaults to Title properties
    'Title': {
        'text_font_size': fsz
    },
    # apply defaults to Axis properties
    'Axis': {
        'axis_label_text_font_size': fsz,
        'major_label_text_font_size' : fsz
    },
     # apply defaults to Legend properties
    'Legend': {
        'label_text_font_size': fsz,
        'glyph_width': glyphw,
        'glyph_height': glyphh
    }
}})

logFmtr = FuncTickFormatter(code="""
var trns = [
'\u2070',
'\u00B9',
'\u00B2',
'\u00B3',
'\u2074',
'\u2075',
'\u2076',
'\u2077',
'\u2078',
'\u2079'];
var tick_power = Math.floor(Math.log10(tick));
var tick_mult = Math.pow(10, Math.log10(tick) - tick_power);
var ret = '';
if (tick_mult > 1.) {
  if (Math.abs(tick_mult - Math.round(tick_mult)) > 0.05){
    ret = tick_mult.toFixed(1) + '\u22C5';
  } else {
    ret = tick_mult.toFixed(0) +'\u22C5';
  }
}
ret += '10';
if (tick_power < 0){
  ret += '\u207B';
  tick_power = -tick_power;
}
power_digits = []
while (tick_power > 9){
  power_digits.push( tick_power - Math.floor(tick_power/10)*10 )
  tick_power = Math.floor(tick_power/10)
}
power_digits.push(tick_power)
for (i = power_digits.length-1; i >= 0; i--){
  ret += trns[power_digits[i]];
}
return ret;
""")


# Data Preprocessing

### Extract Relevant Columns, Simplify, Tidy

In [4]:
# load in data
# this data was collected on May 1, 2020 from 
# https://www.chapelhillopendata.org/explore/dataset/bicycle-crash-data-chapel-hill-region
df = pd.read_csv('bicycle-crash-data-chapel-hill-region.csv', sep=';')
print(df.shape)
#extract relevant columns
cols_used = ['BikeInjury', 'Biker Intox.', 'BikeAge', 'BikeSex', 'BikeDir', 'BikePos', 
        'DrvrVehTyp', 'LightCond', 'SpeedLimit', 'Day of Week', 'CrashHour']
df = df[cols_used]

#extract rows without missing data
ageknown = (df.BikeAge != '999') & (df.BikeAge != '70+')
lightknown = (df['LightCond'] != 'Unknown') & (df['LightCond'] != 'Other')
vehknown = df['DrvrVehTyp'] != 'Unknown'
spdknown = df['SpeedLimit'] != 'Unknown'
injknown = df['BikeInjury'] != 'Unknown Injury'
dirknown = df['BikeDir'] != 'Unknown'
posknown = df['BikePos'] != 'Unknown'
df = df[ageknown & lightknown & vehknown & spdknown & injknown & dirknown & posknown]

#convert ages to integers
df['BikeAge'] = df['BikeAge'].astype(int)

#simplify injury type
def simplify_inj(inj):
    if 'Minor' in inj:
        return 'Minor'
    elif 'Serious' in inj:
        return 'Serious'
    elif 'Killed' in inj:
        return 'Killed'
    elif 'Possible' in inj:
        return 'Possible'
    elif 'No In' in inj:
        return 'None'
    else:
        return 'NA'
df['BikeInjury'] = df['BikeInjury'].apply(simplify_inj)

#simplify light condition
df['LightCond'] = df['LightCond'].apply(lambda x : x[:4])

#simplify vehicle types
def simplify_veh(veh):
    if 'Truck' in veh or 'truck' in veh or 'Motor Home' in veh:
        return 'Truck'
    elif 'Bus' in veh:
        return 'Bus'
    elif 'EMS' in veh or 'Police' in veh:
        return 'Emergency'
    else:
        return veh
df['DrvrVehTyp'] = df['DrvrVehTyp'].apply(simplify_veh)

#simplify bike direction
def simplify_dir(dr):
    if 'With' in dr:
        return 'With'
    elif 'Not' in dr:
        return 'NA'
    else:
        return 'Against'
df['BikeDir'] = df['BikeDir'].apply(simplify_dir)

#simplify bike position
def simplify_pos(pos):
    if 'With' in pos:
        return 'With'
    elif 'Not' in dr:
        return 'NA'
    else:
        return 'Against'
df['BikeDir'] = df['BikeDir'].apply(simplify_dir)

#simplify day of week
day_map = {'Sunday': 0,
           'Monday': 1,
           'Tuesday': 2,
           'Wednesday': 3,
           'Thursday': 4,
           'Friday': 5,
           'Saturday': 6}
df['Day of Week'] = df['Day of Week'].apply(lambda x : day_map[x])

#simplify speed limit
def simplify_spd(spd):
    return spd.strip().split(' ')[2]
df['SpeedLimit'] = df['SpeedLimit'].apply(simplify_spd)
df['SpeedLimit'] = df['SpeedLimit'].astype(int)


#Print the output
print('Preprocessed Dataframe:')
print(df.head())

print('')
print('')
print('Split Covariate/Outcome Arrays:')

#create a version of the data with covariates and observations separate
Y = np.array(df['BikeInjury'])
X = np.array(df.drop(['BikeInjury'], axis=1))

print('X:')
print(X)
print('Y:')
print(Y)

(11266, 55)
Preprocessed Dataframe:
  BikeInjury Biker Intox.  BikeAge BikeSex  BikeDir  \
0       None           No       12    Male     With   
2   Possible           No       36    Male     With   
3   Possible           No       63    Male     With   
4   Possible           No       14    Male     With   
5       None           No       57    Male  Against   

                      BikePos     DrvrVehTyp LightCond  SpeedLimit  \
0                 Travel Lane  Passenger Car      Dayl          35   
2  Bike Lane / Paved Shoulder  Passenger Car      Dayl          45   
3                 Travel Lane  Sport Utility      Dayl          35   
4                 Travel Lane  Passenger Car      Dayl          35   
5                 Non-Roadway         Pickup      Dayl          15   

   Day of Week  CrashHour  
0            6         18  
2            4         17  
3            3         18  
4            3         15  
5            6         13  


Split Covariate/Outcome Arrays:
X:
[['No' 

# Local Permutation Test

### Premetric

In [5]:
#X columns: ['Biker Intox.', 'BikeAge', 'BikeSex', 'BikeDir', 'BikePos', 
#        'DrvrVehTyp', 'LightCond', 'SpeedLimit', 'Day of Week', 'CrashHour']

def premetric(x1, x2, weight):
    cat_idcs = [2,3,4,5,6]
    if (x1[cat_idcs] != x2[cat_idcs]).any():
        return 1
    else:
        return min(1, 
                  weight*(
                  np.fabs(x1[1]-x2[1]) + 
                  np.fabs(x1[7]-x2[7]) +
                  min(np.fabs(x1[8]-x2[8]), 7 - np.fabs(x1[8]-x2[8])) + 
                  min(np.fabs(x1[9]-x2[9]), 24 - np.fabs(x1[9]-x2[9]))
                  ))           

### Test Statistic

In [6]:
def test_stat(X, Y):
    intox = (X[:, 0] == 'Yes')
    severe = (Y == 'Serious') | (Y == 'Killed') 
    return (severe & intox).sum()/intox.sum() - (severe & ~intox).sum()/(~intox).sum()

### Pair Permutation Subgroup Constructor

In [7]:
def construct_pairs(X, d, match_coln):
    #ensure the match column has exactly two values
    match_vals = np.unique(X[:, 0])
    if match_vals.shape[0] != 2:
        print('Unable to construct pair matching on column ' + str(match_coln))
        print('Number of unique values != 2: ' +str(match_vals.shape[0]))
    
    #separate rows for the two match_coln values
    idcs0 = np.where(X[:, match_coln] == match_vals[0])[0]
    idcs1 = np.where(X[:, match_coln] == match_vals[1])[0]
    X0 = X[X[:, match_coln] == match_vals[0]]
    X1 = X[X[:, match_coln] == match_vals[1]]
    
    #create a matrix of pairwise premetric distances
    dists = np.zeros((X0.shape[0], X1.shape[0]))
    for i in range(X0.shape[0]):
        if i % 1000 == 0:
            sys.stdout.write('row ' + str(i+1)+'/'+str(X0.shape[0])+'              \r')
            sys.stdout.flush()
        for j in range(X1.shape[0]):
            dists[i,j] = d(X0[i,:], X1[j,:])
    sys.stdout.write('\n')
    sys.stdout.flush()
    
    #greedy construction: pick pairs one-by-one via minimum premetric distance
    pairs = []
    pair_dists = []
    while dists.min() < np.inf:
        am = dists.argmin()
        row = am // dists.shape[1]
        col = am % dists.shape[1]
        pairs.append((idcs0[row], idcs1[col]))
        pair_dists.append(dists[row, col])
        dists[row, :] = np.inf
        dists[:, col] = np.inf
        
    #output the premetric-ordered list of pairs
    return pairs, pair_dists

### Paired Permutation Test

In [8]:
def pair_test(X, Y, S, pairs, pair_dists, N_samples, alpha):
    #take pairs until their local exchangeability penalty would exceed alpha/2
    num_pairs = (np.cumsum(pair_dists) <= alpha/2).sum()
    
    #if there are no pairs we can swap under this constraint, just output
    if num_pairs == 0:
        return False, 0, S(X, Y), S(X, Y)*np.ones(N_samples) #all "permutations" are the same, cannot reject null
    
    #compute the local exchangeability penalty
    penalty = np.cumsum(pair_dists)[num_pairs-1]
    thresh = 1 - alpha + penalty
    
    #run N_samples of pair swaps 
    S_vals = np.zeros(N_samples)
    for n in range(N_samples):
        if n % 1000 == 0:
            sys.stdout.write('sample ' + str(n+1)+'/'+str(N_samples)+'              \r')
            sys.stdout.flush()
        #generate a uniform random swap of all pairs
        Yc = Y.copy()
        for p in range(num_pairs):
            if np.random.rand() <= 0.5:
                tmp = Yc[pairs[p][0]]
                Yc[pairs[p][0]] = Yc[pairs[p][1]]
                Yc[pairs[p][1]] = tmp
        #compute the test statistic for the swapped data
        S_vals[n] = S(X, Yc)
    sys.stdout.write('\n')
    sys.stdout.flush()
    
    #Reject the null?
    reject_null = (S(X, Y) > S_vals).sum()/N_samples > thresh
    
    #return the results
    return reject_null, num_pairs, S(X, Y), S_vals

### Run the Test

In [9]:
#try different premetric weights
weights = np.logspace(-7, 0, 20)

#match covariate = Biker Intoxication Status
match_col = 0

#Type 1 error guarantee = 5%
alpha = 0.05

#simulate 100,000 permutations
N_samples = 100000


S_vals = np.zeros((weights.shape[0], N_samples))
S = 0
n_pairs = np.zeros(weights.shape[0])
rejections = np.zeros(weights.shape[0], dtype=np.bool)
#for each premetric weight setting
for n in range(weights.shape[0]):
    print('Weight iter ' + str(n+1)+'/'+str(weights.shape[0]))
    #construct the list of matched pairs
    pairs, pair_dists = construct_pairs(X, lambda x1, x2 : premetric(x1, x2, weights[n]), match_col)

    #run the permutation test and store the output
    rejections[n], n_pairs[n], S, S_vals[n,:] = pair_test(X, Y, test_stat, pairs, pair_dists, N_samples, alpha)   


Weight iter 1/20
row 8001/8234              
sample 99001/100000              
Weight iter 2/20
row 8001/8234              
sample 99001/100000              
Weight iter 3/20
row 8001/8234              
sample 99001/100000              
Weight iter 4/20
row 8001/8234              
sample 99001/100000              
Weight iter 5/20
row 8001/8234              
sample 99001/100000              
Weight iter 6/20
row 8001/8234              
sample 99001/100000              
Weight iter 7/20
row 8001/8234              
sample 99001/100000              
Weight iter 8/20
row 8001/8234              
sample 99001/100000              
Weight iter 9/20
row 8001/8234              
sample 99001/100000              
Weight iter 10/20
row 8001/8234              
sample 99001/100000              
Weight iter 11/20
row 8001/8234              
sample 99001/100000              
Weight iter 12/20
row 8001/8234              
sample 99001/100000              
Weight iter 13/20
row 8001/8234              
sam

### Visualize the Result

In [10]:
#create a figure with a vertical line at the actual observed data statistic
fig = bkp.figure(x_axis_label='Test Statistic Value', y_axis_label='Density', x_range=(0.04,0.14))
vline = Span(location=S, dimension='height', line_color='black')
fig.renderers.append(vline)

weights = np.logspace(-7, 0, 20)
colors = np.array(bkpal.Category10[10])[[3, 1, 2, 0]]

#for each weight setting, plot the histogram of permuted data test statistics
for i, n in enumerate([3, 7, 9, 12]):
    hist, edges = np.histogram(S_vals[n,:], bins=np.linspace(0, 0.15, 90))
    fig.quad(top=hist/N_samples, bottom=0, left=edges[:-1], right=edges[1:],
               fill_color=colors[i], line_color="white", alpha=0.7, legend='\u03BB = %0.0e' % weights[n])
fig.line([0],[0], line_color='black', legend='Observed')
fig.legend.location = 'top_left'
bkp.show(fig)

#plot the number of valid pairs to swap for each weight setting
fig = bkp.figure(x_axis_label='Premetric Weight \u03BB', y_axis_label='# Swappable Pairs', x_axis_type='log')
fig.line(weights[rejections], n_pairs[rejections], line_color=colors[0], legend='Rejected')
fig.line([weights[rejections][-1], weights[~rejections][0]], 
         [n_pairs[rejections][-1], n_pairs[~rejections][0]], 
        line_color=colors[0])
fig.line(weights[~rejections], n_pairs[~rejections], line_color=colors[3], legend='Not Rejected')
fig.xaxis.formatter = logFmtr
bkp.show(fig)

# Local Estimation

### Premetric

In [11]:
#covariate = 'CrashHour'
def premetric(x1, x2, weight):
    return min(1, weight*min(np.fabs(x1-x2), 24 - np.fabs(x1-x2)))

### Local Empirical Measure

In [12]:
def empirical_msr(x, X, premetric):
    b = np.zeros(X.shape[0])
    for n in range(X.shape[0]):
        b[n] = premetric(x, X[n])
    idcs = np.argsort(b)
    c1 = 1. + 2*np.cumsum(b[idcs])
    c2 = 2*(1.+np.arange(b.shape[0]))*b[idcs]
    M = (c1 > c2).sum()
    mu = (1.+2*b[idcs][:M].sum())/M
    w = np.maximum(-2*b + mu, 0)
    return w

### Estimation

In [13]:
weights = np.logspace(-7, 0, 8)
times = np.linspace(0, 23, 50)
ests = np.zeros((weights.shape[0], times.shape[0]))
n_active = np.zeros(weights.shape[0])
Ybin = np.zeros(Y.shape[0])
severe = (Y == 'Serious') | (Y == 'Killed') 
Ybin[severe] = 1
for n in range(weights.shape[0]):
    print('weight iteration ' + str(n+1)+'/'+str(weights.shape[0]))
    n_active[n] = 0
    for j in range(times.shape[0]):
        emp_msr_wts = empirical_msr(times[j], X[:, 9], lambda x1, x2 : premetric(x1, x2, weights[n]))
        n_active[n] += (emp_msr_wts>0).sum()
        ests[n, j] = (emp_msr_wts*Ybin).sum()
    n_active[n] /= times.shape[0]

weight iteration 1/8
weight iteration 2/8
weight iteration 3/8
weight iteration 4/8
weight iteration 5/8
weight iteration 6/8
weight iteration 7/8
weight iteration 8/8


### Visualize the Result

In [14]:
colors = np.array(bkpal.Category10[10])[[3, 1, 2, 0]]

#Plot bars for fraction of severe outcomes at each time
fig = bkp.figure(x_range=(0,25), y_axis_label='Fraction of Severe Outcomes', x_axis_label='Time of Day (Hr)')

fig.quad(top=[(Ybin[X[:,9] == t].sum()/(X[:,9]==t).sum()) for t in range(24)], bottom=0, 
         left=np.arange(24)-.5, right=np.arange(24)+.5, line_color='white', 
         fill_color='black', alpha=0.3)

for i, n in enumerate([0, 3, 4, 6]):
    fig.line(times, ests[n,:], line_color=colors[i], legend='\u03BB = %0.0E' % weights[n])

bkp.show(fig)


fig = bkp.figure(x_axis_label='Premetric Weight \u03BB', y_axis_label='Average # Atoms', x_axis_type='log')
fig.line(weights, n_active, line_color=colors[3])
fig.xaxis.formatter = logFmtr
bkp.show(fig)

### Record Computation Time 

In [15]:
n_trials = 50
n_data = np.logspace(0, 4, 10).astype(int)
cputs = np.zeros((n_trials, n_data.shape[0]))
wt = 0.001
idcs = np.arange(X.shape[0])
np.random.shuffle(idcs)
for t in range(n_trials):
    sys.stdout.write('trial ' + str(t+1)+'/'+str(n_trials)+'               \r')
    sys.stdout.flush()
    for n in range(n_data.shape[0]):
        t0 = time.perf_counter()
        times = np.linspace(0, 23, 50)
        ests = np.zeros(times.shape[0])
        Ybin = np.zeros(Y.shape[0])
        severe = (Y == 'Serious') | (Y == 'Killed') 
        Ybin[severe] = 1
        for j in range(times.shape[0]):
            emp_msr_wts = empirical_msr(times[j], X[idcs[:n_data[n]], 9], lambda x1, x2 : premetric(x1, x2, wt))
            ests[j] = (emp_msr_wts*Ybin[idcs[:n_data[n]]]).sum()
        tf = time.perf_counter()
        cputs[t, n] = tf-t0
sys.stdout.write('\n')
sys.stdout.flush()


trial 50/50               


### Visualize the Result

In [16]:
colors = np.array(bkpal.Category10[10])[[3, 1, 2, 0]]

#Plot bars for fraction of severe outcomes at each time
fig = bkp.figure(y_axis_label='Computation Time (s)',x_axis_label='Dataset Size')
for t in range(n_trials):
    fig.line(n_data, cputs[t,:], line_color=colors[3], line_alpha=0.1)
bkp.show(fig)

# Unused Visualizations

In [20]:
#obtain rows of df for intoxicated biker / severe outcomes
intox = (df['Biker Intox.'] == 'Yes')
severe = (df['BikeInjury'] == 'Serious') | (df['BikeInjury'] == 'Killed') 

indices = [ (~severe & ~intox), (~severe & intox), (severe & ~intox),  (severe & intox) ]
colors = [bkpal.Category10[10][0], bkpal.Category10[10][0], bkpal.Category10[10][1], bkpal.Category10[10][1]]
alphas = [0.3, 0.7, 0.3, 0.7]
labels=['Not Severe, Not Intoxicated', 'Not Severe, Intoxicated', 'Severe, Not Intoxicated', 'Severe, Intoxicated']

coln = df.DrvrVehTyp.unique()
vals = np.zeros(coln.shape[0])
totalcts = df.DrvrVehTyp.value_counts()[coln]
fig = bkp.figure(width=800, height=400, y_range=coln,
                x_axis_label='Fraction')
print(totalcts)
for i in range(4):
    cts = df.DrvrVehTyp[indices[i]].value_counts()[coln]
    cts[np.isnan(cts)] = 0
    fig.hbar(y = coln, left = vals/totalcts, right=(vals+cts)/totalcts,
                 fill_color=colors[i], line_color='black', line_width=0.5, height=.7, fill_alpha=alphas[i])
    vals = vals + cts
bkp.show(fig)


coln = np.sort(df.SpeedLimit.unique())
vals = np.zeros(coln.shape[0])
totalcts = df.SpeedLimit.value_counts()[coln]
fig = bkp.figure(width=600, height=250, y_range=[str(x) for x in coln],
                x_axis_label='Fraction',
                y_axis_label='Speed Limit (MPH)')
for i in range(4):
    cts = df.SpeedLimit[indices[i]].value_counts()[coln]
    cts[np.isnan(cts)] = 0
    fig.hbar(y = [str(x) for x in coln], left = vals/totalcts, right=(vals+cts)/totalcts,
                 fill_color=colors[i], line_color='black', line_width=0.5, height=.7, fill_alpha=alphas[i])
    vals = vals + cts
bkp.show(fig)


coln = np.sort(df.BikeSex.unique())
vals = np.zeros(coln.shape[0])
totalcts = df.BikeSex.value_counts()[coln]
fig = bkp.figure(width=600, height=250, y_range=[str(x) for x in coln],
                x_axis_label='Fraction',
                y_axis_label='Speed Limit (MPH)')
for i in range(4):
    cts = df.BikeSex[indices[i]].value_counts()[coln]
    cts[np.isnan(cts)] = 0
    fig.hbar(y = [str(x) for x in coln], left = vals/totalcts, right=(vals+cts)/totalcts,
                 fill_color=colors[i], line_color='black', line_width=0.5, height=.7, fill_alpha=alphas[i])
    vals = vals + cts
bkp.show(fig)

coln = np.array(['Dawn', 'Dayl', 'Dusk', 'Dark'])
vals = np.zeros(coln.shape[0])
totalcts = df.LightCond.value_counts()[coln]
fig = bkp.figure(width=600, height=200, y_range=[str(x) for x in coln],
                x_axis_label='Fraction')
for i in range(4):
    cts = df.LightCond[indices[i]].value_counts()[coln]
    cts[np.isnan(cts)] = 0
    fig.hbar(y = [str(x) for x in coln], left = vals/totalcts, right=(vals+cts)/totalcts,
                 fill_color=colors[i], line_color='black', line_width=0.5, height=.7, fill_alpha=alphas[i])
    vals = vals + cts
bkp.show(fig)


coln = np.sort(df.BikeAge.unique())
vals = np.zeros(coln.shape[0])
totalcts = df.BikeAge.value_counts()[coln]
fig = bkp.figure(width=600, height=2000, y_range=[str(x) for x in coln],
                x_axis_label='Fraction',
                y_axis_label='Speed Limit (MPH)')
for i in range(4):
    cts = df.BikeAge[indices[i]].value_counts()[coln]
    cts[np.isnan(cts)] = 0
    fig.hbar(y = [str(x) for x in coln], left = vals/totalcts, right=(vals+cts)/totalcts,
                 fill_color=colors[i], line_color='black', line_width=0.5, height=.7, fill_alpha=alphas[i])
    vals = vals + cts
bkp.show(fig)


coln = np.sort(df.LightCond.unique())
vals = np.zeros(coln.shape[0])
totalcts = df.LightCond.value_counts()
fig = bkp.figure(width=600, height=400, y_range=[str(x) for x in coln],
                x_axis_label='Fraction', x_range=(0, 5))
for i in range(4):
    cts = df.LightCond[indices[i]].value_counts()[coln]
    cts[np.isnan(cts)] = 0
    fig.hbar(y = [str(x) for x in coln], left = vals/totalcts, right=(vals+cts)/totalcts,
                 fill_color=colors[i], line_color='black', line_width=0.5, height=.7, fill_alpha=alphas[i],
                legend=labels[i])
    vals = vals + cts
bkp.show(fig)

Passenger Car           5051
Sport Utility           1623
Pickup                  1271
Truck                    261
Van                      466
Motorcycle                50
Bus                       50
Emergency                 50
Tractor/Semi-Trailer      24
Taxicab                   17
Moped                      6
Name: DrvrVehTyp, dtype: int64


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self.loc[key]
