# Experiment result analysis

This notebook focuses on the three different parameter settings and individual-level impact of wearing the sensors


In [1]:
import pandas as pd
import altair as alt
import re
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy import stats
import numpy as np

In [2]:
melted_df = pd.read_csv("melted_data.csv")
df = pd.read_csv("data.csv")

## High-level results / impact of wearing the sensors on safety scores

Let's focus on individual level effects of wearing the sensors. At this point, I will ignore the workers that did not participate in one of the stages (i.e. only complete observations are used)

In [3]:
scores = df[['Haptic Group', 'ID', 'Baseline Average Safety Score', 'Intervention Average Safety Score', 'Baseline Average Lift Rate', 'Intervention Average Lift Rate']].copy()
scores['Score Impact'] = scores['Intervention Average Safety Score'] - scores['Baseline Average Safety Score']
scores['Lift Rate Impact'] = scores['Intervention Average Lift Rate'] - scores['Baseline Average Lift Rate']

Overall, the scores are higher, and the difference is statistically significant

In [4]:
pd.DataFrame(
    [[scores['Score Impact'].mean()] + list(stats.ttest_1samp(scores['Score Impact'], 0, nan_policy='omit'))],
    columns=['Mean score change', 't-statistic', 'p-value']
).round(4)

Unnamed: 0,Mean score change,t-statistic,p-value
0,1.3416,4.357,0.0


How does that look on a per-group basis?

In [5]:
def _stats(df):
    b = df['Baseline Average Safety Score']
    i = df['Intervention Average Safety Score']
    diff = df['Score Impact']
    ttest = stats.ttest_1samp(diff, 0, nan_policy='omit')
    
   
    res = pd.DataFrame([{
        "Baseline Average Safety Score": b.mean(),
        "Intervention Average Safety Score": i.mean(),
        "Change in Average Safety Score": diff.mean(),
        'p-value': ttest[1]
    }])
    return res
    
    
scores.groupby(['Haptic Group']).apply(_stats).round(4)


Unnamed: 0_level_0,Unnamed: 1_level_0,Baseline Average Safety Score,Intervention Average Safety Score,Change in Average Safety Score,p-value
Haptic Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2 bends in 10 minutes,0,68.2895,69.2986,1.125,0.0614
2 bends in 5 minutes,0,66.9176,68.6833,2.1158,0.0
3 bends in 8 minutes,0,67.0759,67.8733,0.6899,0.1895


In [6]:
base = alt.Chart(scores)

dists = base.mark_line(opacity=0.7, binSpacing=0).encode(
    x=alt.X('Score Impact:Q', bin=alt.Bin(maxbins=20)),
    y=alt.Y('count()', stack=None),
    color=alt.Color('Haptic Group:N')
).properties(
    title='Distribution of Safety Score impact',
    width=400, height=400
)

pts = base.mark_point().encode(
    y='Haptic Group',
    x='mean(Score Impact)'
)

band = base.mark_errorbar(extent='ci').encode(
    y=alt.Y('Haptic Group', scale=alt.Scale(zero=False)),
    x=alt.X('Score Impact', title='Impact on Safety Scores'),
    color='Haptic Group'
)

means = alt.layer(pts, band).properties(
    title='Mean Safety Score impact with Bootstrapped 95% CI',
    width=400, height=400)

means | dists

## What is the impact on lift rates? (which is effectively impact on productivity)

In [7]:
pd.DataFrame(
    [[scores['Lift Rate Impact'].mean()] + list(stats.ttest_1samp(scores['Lift Rate Impact'], 0, nan_policy='omit'))],
    columns=['Mean score change', 't-statistic', 'p-value']
).round(4)

Unnamed: 0,Mean score change,t-statistic,p-value
0,-8.7598,-6.0596,0.0


How does that look on a per-group basis?

In [8]:
def _lift_stats(df):
    b = df['Baseline Average Lift Rate']
    i = df['Intervention Average Lift Rate']
    diff = df['Lift Rate Impact']
    ttest = stats.ttest_1samp(diff, 0, nan_policy='omit')
    
   
    res = pd.DataFrame([{
        "Baseline Average Lift Rate": b.mean(),
        "Intervention Average Lift Rate": i.mean(),
        "Change in Average Lift Rate": diff.mean(),
        'p-value': ttest[1]
    }])
    return res
    
    
scores.groupby(['Haptic Group']).apply(_lift_stats).round(4)


Unnamed: 0_level_0,Unnamed: 1_level_0,Baseline Average Lift Rate,Intervention Average Lift Rate,Change in Average Lift Rate,p-value
Haptic Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2 bends in 10 minutes,0,99.6645,95.8151,-5.9844,0.0461
2 bends in 5 minutes,0,102.4516,91.5436,-10.375,0.0
3 bends in 8 minutes,0,103.0434,94.9813,-9.5551,0.0007


In [9]:
base = alt.Chart(scores)

dists = base.mark_line(opacity=0.7, binSpacing=0).encode(
    x=alt.X('Lift Rate Impact:Q', bin=alt.Bin(maxbins=20)),
    y=alt.Y('count()', stack=None),
    color=alt.Color('Haptic Group:N')
).properties(
    title='Distribution of Lift Rate impact',
    width=400, height=400
)

pts = base.mark_point().encode(
    y='Haptic Group',
    x='mean(Lift Rate Impact)'
)

band = base.mark_errorbar(extent='ci').encode(
    y=alt.Y('Haptic Group', scale=alt.Scale(zero=False)),
    x=alt.X('Lift Rate Impact', title='Impact on Lift Rates'),
    color='Haptic Group'
)

means = alt.layer(pts, band).properties(
    title='Mean Lift Rate impact with Bootstrapped 95% CI',
    width=400, height=400)

means | dists



## Incorporating lift rate effect into estimating the causal impact of the wearing the sensors

In [10]:
st_melted_df = melted_df.rename({
    "Average Safety Score": "score", 
    'Stage': "stage",
    'ID': 'id',
    'Average Lift Rate': 'lift_rate',
    'Haptic Group': 'haptic_group'
}, axis=1)

data = st_melted_df[st_melted_df['Participation'] == 'Participated in both'] #let's exclude the ones that dropped off at certain times

data = data[['lift_rate', 'id', 'stage', 'score', 'haptic_group']].dropna()
data.loc[data['stage'] == 'Baseline', 'haptic_group'] = '0_baseline'

print(data.shape)

(418, 5)


Let's try a simple linear model where we include individual effects, treatment (one of the 3 Haptic Groups) and lift rate as covariates.

In [11]:
model = ols('score ~  C(id) + lift_rate + haptic_group', data=data).fit()
s =  model.summary()
display(s.tables[0])

def reg_results(model):
    pvals = pd.DataFrame(model.pvalues, columns=['pvalues'])
    coeffs = pd.DataFrame(model.params, columns=['coefficients'])
    confits = pd.DataFrame(model.conf_int())
    return coeffs.join(pvals).join(confits).rename({0: 'CI-lower', 1: 'CI-upper'}, axis=1)
    
r_results = reg_results(model)
r_results.filter(regex='haptic_group|lift_rate', axis=0)

0,1,2,3
Dep. Variable:,score,R-squared:,0.892
Model:,OLS,Adj. R-squared:,0.781
Method:,Least Squares,F-statistic:,8.007
Date:,"Thu, 26 May 2022",Prob (F-statistic):,7.6e-44
Time:,11:42:39,Log-Likelihood:,-844.49
No. Observations:,418,AIC:,2115.0
Df Residuals:,205,BIC:,2975.0
Df Model:,212,,
Covariance Type:,nonrobust,,


Unnamed: 0,coefficients,pvalues,CI-lower,CI-upper
haptic_group[T.2 bends in 10 minutes],0.417128,0.3721837,-0.502436,1.336693
haptic_group[T.2 bends in 5 minutes],0.888566,0.04543,0.018243,1.758889
haptic_group[T.3 bends in 8 minutes],-0.440382,0.3382847,-1.345004,0.464239
lift_rate,-0.118287,2.299872e-18,-0.142487,-0.094086


## Is there a risk of selection bias given that some people did not participate in both baseline and intervention stages?

In [12]:
diffs = pd.pivot_table(df, index='Haptic Group', columns='Participation', values='Baseline Average Safety Score', aggfunc='mean')
diffs['Difference'] = diffs.iloc[:, 0] - diffs.iloc[:, 1]
diffs.round(3)

Participation,Participated in baseline only,Participated in both,Difference
Haptic Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2 bends in 10 minutes,68.508,68.248,0.26
2 bends in 5 minutes,68.673,66.571,2.102
3 bends in 8 minutes,67.757,66.938,0.819


In [13]:
diffs = pd.pivot_table(df, index='Haptic Group', columns='Participation', values='Intervention Average Safety Score', aggfunc='mean')
diffs['Difference'] = diffs.iloc[:, 0] - diffs.iloc[:, 1]
diffs.round(3)

Participation,Participated in both,Participated in intervention only,Difference
Haptic Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2 bends in 10 minutes,69.373,68.767,0.607
2 bends in 5 minutes,68.687,68.55,0.137
3 bends in 8 minutes,67.628,70.7,-3.072


Looks like the answer is yes - the Haptic Group that perfomed the best also had people with highest safety scores not participating in intervention whereas the group that performed the worst had people who participated in intervention only having higher scores, too. Though looking at ANOVA results, these differences are not statistically significant.

In [14]:
st_df = df.rename({
    "Baseline Average Safety Score": "baseline_score", 
    "Haptic Group": "haptic_group", 
    'Participation':'participation', 
    'Intervention Average Safety Score': 'intervention_score'
}, axis=1)

In [15]:
model = ols('baseline_score ~ participation + haptic_group + participation:haptic_group', data=st_df).fit()
sm.stats.anova_lm(model)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
participation,2.0,42.02709,21.013545,0.780092,0.459503
haptic_group,2.0,92.642145,46.321072,1.719591,0.181303
participation:haptic_group,4.0,80.949281,20.23732,0.751276,0.558001
Residual,244.0,6572.690974,26.937258,,


In [16]:
model = ols('intervention_score ~ participation + haptic_group + participation:haptic_group', data=st_df).fit()
sm.stats.anova_lm(model)


Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
participation,2.0,12.358988,6.179494,0.172868,0.841363
haptic_group,2.0,72.646439,36.32322,1.01612,0.363689
participation:haptic_group,4.0,173.45576,43.36394,1.21308,0.306128
Residual,220.0,7864.334367,35.746974,,


In [17]:
df['Score Change'] = df['Intervention Average Safety Score'] - df['Baseline Average Safety Score']
df['Lift Rate Change'] = df['Intervention Average Lift Rate'] - df['Baseline Average Lift Rate']

In [18]:
alt.Chart(melted_df).transform_density(
        'Average Safety Score', 
        groupby=['Haptic Group', 'Participation'],
        as_=['Metric', 'density']
    ).mark_line().encode(
        x=alt.X('Metric:Q', title='Average Safety Score'),
        y='density:Q',
        color='Participation',
        column='Haptic Group'
    ).properties(title= "Safety Score distribution")

## How does intervention tend to affect people who had different levels safety scores then?

In [19]:
c1 = alt.Chart(df).mark_point().encode(
        x=alt.X('Baseline Average Safety Score'),
        y='Score Change',        
      #  column='Haptic Group'
    ).properties(title= "Change in Safety Scores vs initial Safety Score")

c2 = alt.Chart(df).mark_point().encode(
        x=alt.X('Baseline Average Lift Rate'),
        y='Lift Rate Change',        
       # column='Haptic Group'
    ).properties(title= "Change in Lift Rate vs initial Lift Rate")

c1 | c2

There seems to be some reversion to the mean going on!

In [20]:
def test_corr(df, v1, v2):
    return stats.pearsonr(df[v1], df[v2])
    
no_nans = df.dropna()

no_nans.groupby('Haptic Group').apply(test_corr, v1='Lift Rate Change', v2='Baseline Average Lift Rate')

Haptic Group
2 bends in 10 minutes      (-0.3591867816959875, 0.0035590603537065976)
2 bends in 5 minutes        (-0.26436900969299076, 0.02101350793814217)
3 bends in 8 minutes     (-0.42236809197521435, 0.00030023709546274016)
dtype: object

In [21]:
no_nans.groupby('Haptic Group').apply(test_corr, v1='Score Change', v2='Baseline Average Safety Score')

Haptic Group
2 bends in 10 minutes    (-0.3030661103468305, 0.014920925147287763)
2 bends in 5 minutes      (-0.1418224419616019, 0.22167859990637126)
3 bends in 8 minutes      (-0.23329391729609159, 0.0537068518140289)
dtype: object