In [None]:
import pandas as pd
import numpy as np
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
from scipy.special import expit as invlogit

#import arviz as az
#az.style.use(['default', 'arviz-doc'])

%matplotlib inline
%config InlineBackend.figure_format='retina'

t_dict = {}

# Create fake data -- by default everyone is treated
n = 2000
x = np.random.randint(1, 11, size=n) # vector of athleticism
x = np.clip(np.random.normal(5.5, 1.5, n).astype(int), 1, 10) # normally distributed athleticism
y0 = np.random.binomial(50, invlogit(0.2 * x - 2.5))
y1 = np.random.binomial(50, invlogit(0.2 * x - 1.5))

data = pd.DataFrame({'athleticism': x, 'y0': y0, 'y1': y1})

true_effect = data.y1 - data.y0
ate = true_effect.mean()

# Treatment vectors
all_treated = np.ones(n)
random_treatment = np.random.binomial(1, 0.5, size=n)
biased_treatment = np.random.binomial(1, invlogit(x - 5.5))

# apply raondom treatment 
z = random_treatment
data['y'] = np.where(z == 1, data.y1, data.y0)
data['treatment'] = z
treated = (data.treatment == 1) # mask

print('True ATE:', ate)

In [None]:
data.head()

In [None]:
# Distribution of treatment effect

_ = plt.hist(data[treated].y - data[treated].y0, 100)
plt.show



In [None]:
## Next let's estimate it using a naive model

treatment = data.treatment
y = data.y #- data[treated].y0

with pm.Model() as model_normal:
    alpha = pm.Normal("alpha", mu=0, sigma=10)
    beta = pm.Normal("beta", mu=0, sigma=10)
    sigma = pm.HalfNormal("sigma", sigma=5)  

    mu = alpha + beta * treatment
    obs = pm.Normal("obs", mu=mu, sigma=sigma, observed=y)

    trace = pm.sample()

pm.summary(trace)
pm.plot_posterior(trace)

In [None]:
with model_normal:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)
    pm.compute_log_likelihood(trace, extend_inferencedata=True)

t_dict['model_normal'] = trace

pm.loo(trace)

In [None]:
trace.posterior.beta.mean()

In [None]:
# Model diagnostics look pretty much ok
pm.plot_trace(trace)
print(pm.summary(trace))

In [None]:
pm.plot_ess(trace, kind="evolution")

In [None]:
# The model does recover the treatment effect mean, but is actually rather horribly misspecified. What saves us is that we have a lot of data :)

trace.posterior.beta.plot.hist()

In [None]:

# ... and draw posterior predictive plot:
pm.plot_ppc(trace)

In [None]:
# Posterior predicxtive values (should be number of successful throws) have fractional values -- which of course does not make sense

print(f'minimum number of throws: {trace.posterior_predictive.obs.values.min():.2f}')
print(f'maximum number of throws: {trace.posterior_predictive.obs.values.max():.2f}\n')
print(trace.posterior_predictive.obs.values)


In [None]:
df = pd.read_csv('./data/Free throw.csv.zip').drop('Timestamp', axis=1)
df.columns = ['age', 'gender', 'athleticism', 'y0', 'y1']

z = np.random.binomial(1, 0.5, size=len(df))
df['y'] = np.where(z == 1, df.y1, df.y0)
df['treatment'] = z

print('ATE:', df.groupby('treatment').y.mean()[1] - df.groupby('treatment').y.mean()[0])


with pm.Model() as model_normal2:
    alpha = pm.Normal("alpha", mu=0, sigma=10)
    beta = pm.Normal("beta", mu=0, sigma=10)
    sigma = pm.HalfNormal("sigma", sigma=5)  

    mu = alpha + beta * z
    obs = pm.Normal("y", mu=mu, sigma=sigma, observed=df.y)

    trace = pm.sample()

print(pm.summary(trace))
pm.plot_trace(trace)

In [None]:
# With only 17 observations the posterior predictive plot is a horrible mess

with model_normal2:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)

pm.plot_ppc(trace)

We can definitely do better.

1. The first question is: **what do we actually want to model?** I.e. how should we express the ATE? Is it a difference between y1 and y0 in throws? Is it a delta of success rate?

2. The next question is **what is the variable that we observe**? Is it a number of successful throws? Is it a success rate over n=50 throws?

In [None]:
# First hacky attempt

import pytensor.tensor as pt

treatment = data.treatment.values
y = data.y.values 

#treatment = df.treatment
#y = df.y

with pm.Model() as model_p1:
    alpha = pm.Normal("alpha", mu=15, sigma=10)
    beta = pm.Normal("beta", mu=0, sigma=20)
    sigma = pm.HalfNormal("sigma", sigma=5)  

    mu = pt.math.maximum(0.001, alpha + beta * treatment)
    obs = pm.Poisson("obs", mu=mu, observed=y)

    trace = pm.sample()

ate = data.groupby('treatment').y.mean()[1] - data.groupby('treatment').y.mean()[0]
print('ATE:', ate)
print(pm.summary(trace))
pm.plot_posterior(trace, var_names=["beta"], ref_val=ate, textsize=10)

In [None]:
pm.plot_trace(trace)

In [None]:
with model_p1:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)

pm.plot_ppc(trace)

In [None]:
with model_p1:
    pm.compute_log_likelihood(trace, extend_inferencedata=True)

t_dict['model_poisson1'] = trace

In [None]:
trace.posterior_predictive.obs #.plot.hist()

OK, this can work, but let's see if we can do something different.

Let's reconsider. 

In [None]:
n_shots = 50

with pm.Model() as model_p2:
    
    # Prior for baseline ability
    lambda_pre = pm.Gamma("lambda_pre", alpha=1, beta=5) #, shape=n_players)
    
    # Treatment effect
    theta = pm.LogNormal("theta", mu=0, sigma=1)
    
    # Post-treatment lambda
    lambda_post = pm.Deterministic("lambda_post", lambda_pre * theta)

    # Likelihood for pre- and post-treatment shots
    y_pre_obs = pm.Poisson("y_pre_obs", mu=lambda_pre * n_shots, observed=data.y0)
    y_post_obs = pm.Poisson("y_post_obs", mu=lambda_post * n_shots, observed=data.y1)
    
    trace = pm.sample()

# Summary
pm.plot_posterior(trace, var_names='theta')

In [None]:
with model_p2:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)

pm.plot_ppc(trace)

In [None]:
n_shots = 50

with pm.Model() as model_p3:

    lambda_pre = pm.Gamma("lambda_pre", alpha=1, beta=5) #, shape=n_players)
        
    # Treatment effect
    theta = pm.LogNormal("theta", mu=0, sigma=1)
    
    # Post-treatment lambda
    lambda_post = pm.Deterministic("lambda_post", lambda_pre * theta)

    # Likelihood for post-treatment shots
    lambda_obs = lambda_pre + theta*treatment
    obs = pm.Poisson("obs", mu=lambda_obs * n_shots, observed=data.y)
    
    trace = pm.sample() # add idata_kwargs={"log_likelihood": True} for LOO comparisons

# Summary
pm.plot_posterior(trace)

In [None]:
with model_p3:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)
    pm.compute_log_likelihood(trace, extend_inferencedata=True)

t_dict['model_poisson_m'] = trace

pm.plot_ppc(trace)

In [None]:

y_pre = trace.posterior_predictive["obs_pre"].values  
y_post = trace.posterior_predictive["obs_post"].values  

y_diff = y_post - y_pre

print('Estimated ATE:', y_diff.mean())

In [None]:
# Another try with additive model



with pm.Model() as model_p4:
    
    # Prior for baseline ability
    lambda_pre = pm.Uniform("lambda_pre", 0, 50) #, shape=n_players)
    
    # Treatment effect
    theta = pm.TruncatedNormal("theta", mu=0, sigma=10, lower=-lambda_pre)
    
    # Post-treatment lambda
    lambda_obs = lambda_pre + theta*treatment
    
    # Likelihood for pre- and post-treatment shots
    obs = pm.Poisson("obs", mu=lambda_obs * n_shots, observed=data.y)
    
    trace = pm.sample()

# Summary
pm.plot_posterior(trace)

In [None]:
trace.posterior_predictive

In [None]:
with model_p4:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)
    pm.compute_log_likelihood(trace, extend_inferencedata=True)

pm.plot_ppc(trace)

trace.posterior_predictive.obs.mean()

In [None]:
# And one more try:

with pm.Model() as model_p5:

    t = pm.Data("t", data.treatment)
    
    # Priors for baseline log-intensity (log-lambda)
    mu = pm.Normal("mu", mu=0, sigma=5)
    
    # Treatment effect priors
    tau = pm.Normal("tau", mu=0, sigma=5)
    
    # Expected log-lambda (Poisson intensity) for each individual
    log_lambda = mu + tau * t
    
    # Likelihood (Poisson-distributed successes)
    obs = pm.Poisson("obs", mu=np.exp(log_lambda) * n_shots, observed=data.y)
    
    trace = pm.sample()


print(pm.summary(trace))
pm.plot_posterior(trace)

In [None]:
pm.plot_trace(trace)

In [None]:
with model_p5:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)

pm.plot_ppc(trace)

In [None]:
treated_mask = data.treatment.astype(bool)  

obs_samples = trace.posterior_predictive.obs.values

improvement_pred_treated = obs_samples[:, :, treated_mask] 
ate = improvement_pred_treated.mean() - data.y0.mean()

ate 

### **Binomial models**

In [None]:
y

In [None]:
y_pred = data.y0.values
y_post = data.y1.values
y = treatment*data.y1.values + (1-treatment)*data.y0.values

n_shots = 50

with pm.Model() as model_b1:
    t = pm.Data("t", treatment)

    alpha = pm.Normal('alpha', mu=0, sigma=10)  
    beta = pm.Normal('beta', mu=0, sigma=10)   

    # Expected value of post-treatment performance
    mu = alpha + beta * treatment

    obs = pm.Binomial('obs', n=n_shots, p=pm.math.sigmoid(mu), observed=y)

    trace = pm.sample()

print(pm.summary(trace))
pm.plot_posterior(trace)

In [None]:
with model_b1:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)
    pm.compute_log_likelihood(trace, extend_inferencedata=True)

t_dict['model_binomial1'] = trace

pm.plot_ppc(trace)

In [None]:
treated_mask = data.treatment.astype(bool)  

obs_samples = trace.posterior_predictive.obs.values

improvement_pred_treated = obs_samples[:, :, treated_mask] 
ate = improvement_pred_treated.mean() - data.y0.mean()

ate

In [None]:
coords = {"treatment": [0, 1]}

with pm.Model(coords=coords) as model_b2:
    treatment_idx = pm.Data("treatment_idx", data['treatment'], dims="obs")
    
    # Priors for treatment effect
    alpha = pm.Normal('alpha', mu=0, sigma=1, dims="treatment")
    beta = pm.Normal('beta', mu=0, sigma=1)
    
    # Logistic regression model for post-treatment probability
    logit_p1 = alpha[treatment_idx] + beta
    p1 = pm.math.sigmoid(logit_p1)
    
    # Likelihood for observed post-treatment counts
    obs = pm.Binomial('obs', n=50, p=p1, observed=data['y1'])
    
    trace = pm.sample(nuts_sampler='nutpie')

# Analyze results
az.plot_posterior(trace, var_names=['alpha', 'beta']);

In [None]:
pm.plot_trace(trace)

In [None]:
with model_b2:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)
    pm.compute_log_likelihood(trace, extend_inferencedata=True)

pm.plot_ppc(trace)

In [None]:
treated_mask = data.treatment.astype(bool)  

obs_samples = trace.posterior_predictive.obs.values

improvement_pred_treated = obs_samples[:, :, treated_mask] 
ate = improvement_pred_treated.mean() - data.y0.mean()

ate

In [None]:
import pymc_bart as pmb

coords = {"treatment": data.treatment}

with pm.Model(coords=coords) as model_bart:
    X = data[['treatment']]
    
    # Model pre- and post-treatment probabilities
    p = pmb.BART('p', X, data['y'] / 50)
    
    # Likelihood for observed counts
    obs = pm.Binomial('obs', n=50, p=p, observed=data['y'])
    
    trace =  pm.sample()

# Analyze results
pm.plot_posterior(trace);

In [None]:
with model_bart:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)
    pm.compute_log_likelihood(trace, extend_inferencedata=True)

t_dict['mode_bart'] = trace

pm.plot_ppc(trace)

In [None]:
treated_mask = data.treatment.astype(bool)  

obs_samples = trace.posterior_predictive.obs.values

improvement_pred_treated = obs_samples[:, :, treated_mask] 
ate = improvement_pred_treated.mean() - data.y0.mean()

ate

In [None]:
with pm.Model() as model_b3:
    theta_0 = pm.Beta('theta_0', alpha=1, beta=1)
    theta_1 = pm.Beta('theta_1', alpha=1, beta=1)
    
    y_0 = pm.Binomial('y_0', n=50, p=theta_0, observed=df[df.treatment==0].y)
    y_1 = pm.Binomial('y_1', n=50, p=theta_1, observed=df[df.treatment==1].y)
    
    ate = pm.Deterministic('ate', theta_1 - theta_0)

    trace = pm.sample()



In [None]:
with model_b3:
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)
    pm.compute_log_likelihood(trace, extend_inferencedata=True)



In [None]:
pm.plot_ppc(trace)

In [None]:
comp = az.compare(t_dict, var_name='obs')
comp

In [None]:
az.plot_compare(comp) #, insample_dev=False);
