In [1]:
import pandas as pd
from IPython.display import display, HTML

from rct.design import KRerandomizedRCT
from rct.balance import pvalues_report, pvalue_balance,mahalanobis_balance, block_balance

def pretty_print(df, message=''):
    if isinstance(df, pd.Series):
        df=df.to_frame()
    print(message)
    return display(HTML(df.to_html()))

In [2]:
# example data 

DATA_PATH = '../tests/test_data/example_covariates.csv'
df = pd.read_csv(DATA_PATH)
df.describe()

Unnamed: 0,A,B,C
count,100.0,100.0,100.0
mean,0.075969,-0.030332,0.45
std,1.057618,0.994407,0.5
min,-2.914937,-2.143484,0.0
25%,-0.63623,-0.710553,0.0
50%,0.108599,-0.015349,0.0
75%,0.777243,0.582555,1.0
max,2.799044,2.928059,1.0


In [3]:
# drawing k-rerandomized i.i.d. and shuffled treatment assignements 
# for 1 treatment (1 control) and 2 treatments (1 control)
# using Mahalanobis balance


for weights in ([.5, .5], [.3, .3, .4]):
    print('\nweights: ', weights )
    krct = KRerandomizedRCT(mahalanobis_balance(), DATA_PATH, weights, k=20, seed=0)
    
    iid_assignment = krct.assignment_from_iid
    shuffled_assignment = krct.assignment_from_shuffled
    
    pretty_print(iid_assignment['t'].value_counts(), '\n** k-rerand iid assignment **')
    pretty_print(pvalues_report(df, iid_assignment), 'pvalues for treatment')
    pretty_print(shuffled_assignment['t'].value_counts(), '\n** k-rerand shuffled assignment **')
    pretty_print(pvalues_report(df, shuffled_assignment), 'pvalues for treatment')


weights:  [0.5, 0.5]

** k-rerand iid assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.663702,0.866303,0.551186



** k-rerand shuffled assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.963649,0.971983,0.551186



weights:  [0.3, 0.3, 0.4]

** k-rerand iid assignment **


Unnamed: 0,t
2,40
1,34
0,26


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.903671,0.528733,0.477888
t2,0.966614,0.895375,0.970118



** k-rerand shuffled assignment **


Unnamed: 0,t
2,40
1,30
0,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.36175,0.598669,0.891584
t2,0.845258,0.46843,0.891584


In [4]:
# drawing k-rerandomized i.i.d. and shuffled treatment assignements 
# for 1 treatment (1 control) and 2 treatments (1 control)
# using max pvalue balance


for weights in ([.5, .5], [.3, .3, .4]):
    print('\nweights: ', weights )
    krct = KRerandomizedRCT(pvalue_balance(), DATA_PATH, weights, k=20, seed=0)
    
    iid_assignment = krct.assignment_from_iid
    shuffled_assignment = krct.assignment_from_shuffled
    
    pretty_print(iid_assignment['t'].value_counts(), '\n** k-rerand iid assignment **')
    pretty_print(pvalues_report(df, iid_assignment), 'pvalues for treatment')
    pretty_print(shuffled_assignment['t'].value_counts(), '\n** k-rerand shuffled assignment **')
    pretty_print(pvalues_report(df, shuffled_assignment), 'pvalues for treatment')


weights:  [0.5, 0.5]

** k-rerand iid assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.663702,0.866303,0.551186



** k-rerand shuffled assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.963649,0.971983,0.551186



weights:  [0.3, 0.3, 0.4]

** k-rerand iid assignment **


Unnamed: 0,t
2,40
1,34
0,26


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.903671,0.528733,0.477888
t2,0.966614,0.895375,0.970118



** k-rerand shuffled assignment **


Unnamed: 0,t
2,40
1,30
0,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.516997,0.765729,0.733228
t2,0.571203,0.890846,0.733228


In [5]:
# drawing k-rerandomized i.i.d. and shuffled treatment assignements 
# for 1 treatment (1 control) and 2 treatments (1 control)
# attempting to block on 'C'

for weights in ([.5, .5], [.3, .3, .4]):
    print('\nweights: ', weights )
    krct = KRerandomizedRCT(block_balance(['C']), DATA_PATH, weights, k=100, seed=0)
    
    iid_assignment = krct.assignment_from_iid
    shuffled_assignment = krct.assignment_from_shuffled
    
    pretty_print(iid_assignment['t'].value_counts(), '\n** k-rerand iid assignment **')
    pretty_print(pvalues_report(df, iid_assignment), 'pvalues for treatment')
    pretty_print(shuffled_assignment['t'].value_counts(), '\n** k-rerand shuffled assignment **')
    pretty_print(pvalues_report(df, shuffled_assignment), 'pvalues for treatment')


weights:  [0.5, 0.5]

** k-rerand iid assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.230043,0.462692,0.842654



** k-rerand shuffled assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.215936,0.835881,0.842654



weights:  [0.3, 0.3, 0.4]

** k-rerand iid assignment **


Unnamed: 0,t
2,36
0,34
1,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.993368,0.137835,0.978468
t2,0.582468,0.631088,0.85909



** k-rerand shuffled assignment **


Unnamed: 0,t
2,40
1,30
0,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.785626,0.885103,0.681918
t2,0.53637,0.051175,0.681918


In [6]:
# drawing k-rerandomized i.i.d. and shuffled treatment assignements 
# for 1 treatment (1 control) and 2 treatments (1 control)
# mixed objective

mixed_balance = block_balance(['C']) + mahalanobis_balance(['A', 'B'])

for weights in ([.5, .5], [.3, .3, .4]):
    print('\nweights: ', weights )
    krct = KRerandomizedRCT(mixed_balance, DATA_PATH, weights, k=100, seed=0)
    
    iid_assignment = krct.assignment_from_iid
    shuffled_assignment = krct.assignment_from_shuffled
    
    pretty_print(iid_assignment['t'].value_counts(), '\n** k-rerand iid assignment **')
    pretty_print(pvalues_report(df, iid_assignment), 'pvalues for treatment')
    pretty_print(shuffled_assignment['t'].value_counts(), '\n** k-rerand shuffled assignment **')
    pretty_print(pvalues_report(df, shuffled_assignment), 'pvalues for treatment')


weights:  [0.5, 0.5]

** k-rerand iid assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.87299,0.741099,0.842654



** k-rerand shuffled assignment **


Unnamed: 0,t
1,50
0,50


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.719567,0.895064,0.842654



weights:  [0.3, 0.3, 0.4]

** k-rerand iid assignment **


Unnamed: 0,t
2,36
0,34
1,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.993368,0.137835,0.978468
t2,0.582468,0.631088,0.85909



** k-rerand shuffled assignment **


Unnamed: 0,t
2,40
1,30
0,30


pvalues for treatment


Unnamed: 0,A,B,C
t1,0.617735,0.428009,0.681918
t2,0.389197,0.435034,0.681918
