In [1]:
# test_integration.py concisely formalizes the tests contained in this notebook

import pandas as pd
from IPython.display import display, HTML

from rct.design import QuantileTargetingRCT
from rct.balance import pvalues_report, pvalue_balance, mahalanobis_balance, block_balance

def pretty_print(df, message=''):
    if isinstance(df, pd.Series):
        df=df.to_frame()
    print(message)
    return display(HTML(df.to_html()))

In [2]:
# example data 

DATA_PATH = '../tests/test_data/example_covariates.csv'
df = pd.read_csv(DATA_PATH)
df.describe()

Unnamed: 0,A,B,C
count,100.0,100.0,100.0
mean,0.075969,-0.030332,0.45
std,1.057618,0.994407,0.5
min,-2.914937,-2.143484,0.0
25%,-0.63623,-0.710553,0.0
50%,0.108599,-0.015349,0.0
75%,0.777243,0.582555,1.0
max,2.799044,2.928059,1.0


In [3]:
# drawing top 5% balance i.i.d. and shuffled treatment assignements 
# for 1 treatment (1 control) and 2 treatments (1 control)
# using Mahalanobis balance


for weights in ([.5, .5], [.3, .3, .4]):
    print('\nweights: ', weights )
    qrct = QuantileTargetingRCT(
        mahalanobis_balance(), DATA_PATH, weights, quantile_target=.05, seed=0,
        num_monte_carlo=200)
    
    iid_assignment = qrct.assignment_from_iid
    shuffled_assignment = qrct.assignment_from_shuffled
    
    pretty_print(iid_assignment['t'].value_counts(), '\n** qt iid assignment **')
    pretty_print(pvalues_report(df, iid_assignment), 'pvalues for treatment')
    pretty_print(shuffled_assignment['t'].value_counts(), '\n** qt shuffled assignment **')
    pretty_print(pvalues_report(df, shuffled_assignment), 'pvalues for treatment')


weights:  [0.5, 0.5]

** qt iid assignment **


Unnamed: 0,t
0,60
1,40


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.625966,0.826971,1.0



** qt shuffled assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.700332,0.759697,0.842654



weights:  [0.3, 0.3, 0.4]

** qt iid assignment **


Unnamed: 0,t
0,37
2,35
1,28


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.654768,0.989965,0.984517
t2,0.432941,0.949398,0.823875



** qt shuffled assignment **


Unnamed: 0,t
2,40
1,30
0,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.689936,0.458484,0.584621
t2,0.844818,0.722879,0.412683


In [4]:
# drawing top 5% balance i.i.d. and shuffled treatment assignements 
# for 1 treatment (1 control) and 2 treatments (1 control)
# using max pvalue balance


for weights in ([.5, .5], [.3, .3, .4]):
    print('\nweights: ', weights )
    qrct = QuantileTargetingRCT(
        pvalue_balance(), DATA_PATH, weights, quantile_target=.05, seed=0,
        num_monte_carlo=200)
    
    iid_assignment = qrct.assignment_from_iid
    shuffled_assignment = qrct.assignment_from_shuffled
    
    pretty_print(iid_assignment['t'].value_counts(), '\n** qt iid assignment **')
    pretty_print(pvalues_report(df, iid_assignment), 'pvalues for treatment')
    pretty_print(shuffled_assignment['t'].value_counts(), '\n** qt shuffled assignment **')
    pretty_print(pvalues_report(df, shuffled_assignment), 'pvalues for treatment')


weights:  [0.5, 0.5]

** qt iid assignment **


Unnamed: 0,t
0,54
1,46


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.727206,0.680913,0.904899



** qt shuffled assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.690019,0.792265,0.842654



weights:  [0.3, 0.3, 0.4]

** qt iid assignment **


Unnamed: 0,t
0,40
2,33
1,27


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.755273,0.812198,0.828627
t2,0.566008,0.687478,0.66301



** qt shuffled assignment **


Unnamed: 0,t
2,40
1,30
0,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.844096,0.937403,0.539269
t2,0.576668,0.485934,0.945564


In [5]:
# drawing top 5% balance i.i.d. and shuffled treatment assignements 
# for 1 treatment (1 control) and 2 treatments (1 control)
# attempting to block on 'C'

for weights in ([.5, .5], [.3, .3, .4]):
    print('\nweights: ', weights )
    qrct = QuantileTargetingRCT(
        block_balance(['C']), DATA_PATH, weights, quantile_target=.05, seed=0,
        num_monte_carlo=200)
    
    iid_assignment = qrct.assignment_from_iid
    shuffled_assignment = qrct.assignment_from_shuffled
    
    pretty_print(iid_assignment['t'].value_counts(), '\n** qt iid assignment **')
    pretty_print(pvalues_report(df, iid_assignment), 'pvalues for treatment')
    pretty_print(shuffled_assignment['t'].value_counts(), '\n** qt shuffled assignment **')
    pretty_print(pvalues_report(df, shuffled_assignment), 'pvalues for treatment')


weights:  [0.5, 0.5]

** qt iid assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.625522,0.314443,0.842654



** qt shuffled assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.041389,0.220909,0.842654



weights:  [0.3, 0.3, 0.4]

** qt iid assignment **


Unnamed: 0,t
2,36
0,34
1,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.993368,0.137835,0.978468
t2,0.582468,0.631088,0.85909



** qt shuffled assignment **


Unnamed: 0,t
2,40
1,30
0,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.778955,0.301075,0.681918
t2,0.6455,0.126929,0.681918


In [6]:
# drawing top 5% balance i.i.d. and shuffled treatment assignements 
# for 1 treatment (1 control) and 2 treatments (1 control)
# mixed objective

mixed_balance = block_balance(['C']) + mahalanobis_balance(['A', 'B'])

for weights in ([.5, .5], [.3, .3, .4]):
    print('\nweights: ', weights )
    qrct = QuantileTargetingRCT(
        mixed_balance, DATA_PATH, weights, quantile_target=.05, seed=0,
        num_monte_carlo=200)
    
    iid_assignment = qrct.assignment_from_iid
    shuffled_assignment = qrct.assignment_from_shuffled
    
    pretty_print(iid_assignment['t'].value_counts(), '\n** qt iid assignment **')
    pretty_print(pvalues_report(df, iid_assignment), 'pvalues for treatment')
    pretty_print(shuffled_assignment['t'].value_counts(), '\n** qt shuffled assignment **')
    pretty_print(pvalues_report(df, shuffled_assignment), 'pvalues for treatment')


weights:  [0.5, 0.5]

** qt iid assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.524265,0.863187,0.842654



** qt shuffled assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.754898,0.678008,0.842654



weights:  [0.3, 0.3, 0.4]

** qt iid assignment **


Unnamed: 0,t
2,36
0,34
1,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.993368,0.137835,0.978468
t2,0.582468,0.631088,0.85909



** qt shuffled assignment **


Unnamed: 0,t
2,40
1,30
0,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.519539,0.757391,0.681918
t2,0.496727,0.478861,0.681918
