In [1]:
import ast

import numpy as np
import pandas as pd

from sociodemographic_softlabels.run import map_annotators_to_groups
from sociodemographic_softlabels.datasets import kumar
from sociodemographic_softlabels.analysis.replicability import (
    significance_test_per_group,
    k_estimator_groups
)

SAMPLE_PATH = "../data/processed/kumar/sample_5000_annotators_v3.csv"
DATASET_PATH = 'kumar_dataset.json'

## Load Data

In [2]:
df = pd.concat([
    pd.read_csv('../experiments/1.1-age/result.csv'),
    pd.read_csv('../experiments/1.2-gender/result.csv'),
    pd.read_csv('../experiments/1.3-education/result.csv'),
    pd.read_csv('../experiments/1.4-lgbq/result.csv'),
    pd.read_csv('../experiments/2.1-age-random/result.csv'),
    pd.read_csv('../experiments/2.2-gender-random/result.csv'),
    pd.read_csv('../experiments/2.3-education-random/result.csv'),
    pd.read_csv('../experiments/2.4-lgbq-random/result.csv')   
]).rename(lambda c: c if not c.startswith('eval_') else c[5:], axis = 1)

df['attributes'] = df['attributes'].map({
        '[]': 'baseline',
        "['gender']": 'gender',
        "['age_range']": 'age',
        "['education']": 'education',
        "['lgbtq_status']": 'lgbq',
    })

df[['predictions_per_annotator', 'labels_raw', 'labels_per_annotator',  'annotator_indecies']] = df[['predictions_per_annotator', 'labels_raw', 'labels_per_annotator',  'annotator_indecies']].applymap(lambda x: x.replace('nan', 'None')).applymap(ast.literal_eval)

if any(df['annotator_indecies'].apply(lambda indecies: any([annotator == None for per_example in indecies for annotator in per_example]))):
    dtype = 'float'
else:
    dtype = 'int'
    
df[['predictions_per_annotator', 'labels_raw', 'labels_per_annotator',  'annotator_indecies']] = df[['predictions_per_annotator', 'labels_raw', 'labels_per_annotator',  'annotator_indecies']].applymap(lambda x: np.array(x, dtype=dtype))

In [7]:
all_data = kumar.Dataset.load(SAMPLE_PATH, metadata_path=DATASET_PATH)

to_groups = map_annotators_to_groups(
        all_data.annotators_mapping,
        all_data.sociodemographic_mapping,
        [
            "age_range",
            "lgbtq_status",
            "education",
            "gender"
        ]
    )

  0%|          | 0/23 [00:00<?, ?ba/s]

In [8]:
df[(df['random_seed'] == 2803636207) & (df['split'] == 0) & (df['attributes'] == 'gender') & (df['do_random_assignment'] == False)]

Unnamed: 0,timestamp,loss,is_parent - No - individual_0_precision,is_parent - No - individual_0_recall,is_parent - No - individual_0_f1-score,is_parent - No - individual_0_support,is_parent - No - individual_1_precision,is_parent - No - individual_1_recall,is_parent - No - individual_1_f1-score,is_parent - No - individual_1_support,...,attributes,map_ids_to_attributes,setting,output_dir,while_train,do_stack_groups,do_random_assignment,group_layers_depth,random_seed,split
0,2023-05-18 14:33:37.604774,0.0,0.819358,0.79681,0.807927,8903,0.435745,0.4718,0.453057,2961,...,gender,[],K_FOLD,./models/custom_mt_model_multilabel,False,True,False,1,2803636207,0


## Significance Tests

### Gender

In [9]:
gender_p_dicts = significance_test_per_group(df, attribute_a='randomized', attribute_b='gender', to_groups=to_groups, attributes=['gender'])



Seed: 2803636207 - Split: 0 - gender - Group: Male
data shape:  (11861, 1)
sample size: 5930


bootstrap: 100%|████████████████████████████| 1000/1000 [00:18<00:00, 52.98it/s]



count sample diff f1   is twice tot diff f1....... 455  / 1000    p < 0.455  
count sample diff prec is twice tot diff prec..... 454  / 1000    p < 0.454  
count sample diff rec  is twice tot diff rec ..... 439  / 1000    p < 0.439  
count sample diff acc  is twice tot diff acc...... 463  / 1000    p < 0.463  


Seed: 2803636207 - Split: 0 - gender - Group: Female
data shape:  (13664, 1)
sample size: 6832


bootstrap: 100%|████████████████████████████| 1000/1000 [00:20<00:00, 48.43it/s]



count sample diff f1   is twice tot diff f1....... 603  / 1000    p < 0.603  
count sample diff prec is twice tot diff prec..... 755  / 1000    p < 0.755  
count sample diff rec  is twice tot diff rec ..... 385  / 1000    p < 0.385  
count sample diff acc  is twice tot diff acc...... 975  / 1000    p < 0.975  [38;5;8m![0m


Seed: 2803636207 - Split: 0 - gender - Group: Nonbinary
data shape:  (120, 1)
sample size: 60


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 111.51it/s]



count sample diff f1   is twice tot diff f1....... 404  / 1000    p < 0.404  
count sample diff prec is twice tot diff prec..... 384  / 1000    p < 0.384  
count sample diff rec  is twice tot diff rec ..... 517  / 1000    p < 0.517  
count sample diff acc  is twice tot diff acc...... 214  / 1000    p < 0.214  


Seed: 2803636207 - Split: 1 - gender - Group: Male
data shape:  (11912, 1)
sample size: 5956


bootstrap: 100%|████████████████████████████| 1000/1000 [00:20<00:00, 49.36it/s]



count sample diff f1   is twice tot diff f1....... 762  / 1000    p < 0.762  
count sample diff prec is twice tot diff prec..... 675  / 1000    p < 0.675  
count sample diff rec  is twice tot diff rec ..... 819  / 1000    p < 0.819  
count sample diff acc  is twice tot diff acc...... 645  / 1000    p < 0.645  


Seed: 2803636207 - Split: 1 - gender - Group: Female
data shape:  (13452, 1)
sample size: 6726


bootstrap: 100%|████████████████████████████| 1000/1000 [00:38<00:00, 25.66it/s]



count sample diff f1   is twice tot diff f1....... 953  / 1000    p < 0.953  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 978  / 1000    p < 0.978  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 920  / 1000    p < 0.92   
count sample diff acc  is twice tot diff acc...... 996  / 1000    p < 0.996  [38;5;8m![0m


Seed: 2803636207 - Split: 1 - gender - Group: Nonbinary
data shape:  (122, 1)
sample size: 61


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 117.32it/s]



count sample diff f1   is twice tot diff f1....... 478  / 1000    p < 0.478  
count sample diff prec is twice tot diff prec..... 468  / 1000    p < 0.468  
count sample diff rec  is twice tot diff rec ..... 433  / 1000    p < 0.433  
count sample diff acc  is twice tot diff acc...... 619  / 1000    p < 0.619  


Seed: 2803636207 - Split: 2 - gender - Group: Male
data shape:  (11952, 1)
sample size: 5976


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 51.60it/s]



count sample diff f1   is twice tot diff f1....... 986  / 1000    p < 0.986  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 977  / 1000    p < 0.977  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 993  / 1000    p < 0.993  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 966  / 1000    p < 0.966  [38;5;8m![0m


Seed: 2803636207 - Split: 2 - gender - Group: Female
data shape:  (13589, 1)
sample size: 6794


bootstrap: 100%|████████████████████████████| 1000/1000 [00:23<00:00, 43.26it/s]



count sample diff f1   is twice tot diff f1....... 995  / 1000    p < 0.995  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 996  / 1000    p < 0.996  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 996  / 1000    p < 0.996  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 989  / 1000    p < 0.989  [38;5;8m![0m


Seed: 2803636207 - Split: 2 - gender - Group: Nonbinary
data shape:  (114, 1)
sample size: 57


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr


count sample diff f1   is twice tot diff f1....... 678  / 1000    p < 0.678  
count sample diff prec is twice tot diff prec..... 299  / 1000    p < 0.299  
count sample diff rec  is twice tot diff rec ..... 601  / 1000    p < 0.601  
count sample diff acc  is twice tot diff acc...... 97   / 1000    p < 0.097  


Seed: 2803636207 - Split: 3 - gender - Group: Male
data shape:  (11975, 1)
sample size: 5987


bootstrap: 100%|████████████████████████████| 1000/1000 [00:21<00:00, 47.24it/s]



count sample diff f1   is twice tot diff f1....... 582  / 1000    p < 0.582  
count sample diff prec is twice tot diff prec..... 553  / 1000    p < 0.553  
count sample diff rec  is twice tot diff rec ..... 581  / 1000    p < 0.581  
count sample diff acc  is twice tot diff acc...... 533  / 1000    p < 0.533  


Seed: 2803636207 - Split: 3 - gender - Group: Female
data shape:  (13515, 1)
sample size: 6757


bootstrap: 100%|████████████████████████████| 1000/1000 [00:26<00:00, 38.36it/s]



count sample diff f1   is twice tot diff f1....... 132  / 1000    p < 0.132  
count sample diff prec is twice tot diff prec..... 193  / 1000    p < 0.193  
count sample diff rec  is twice tot diff rec ..... 80   / 1000    p < 0.08   
count sample diff acc  is twice tot diff acc...... 398  / 1000    p < 0.398  


Seed: 2803636207 - Split: 3 - gender - Group: Nonbinary
data shape:  (104, 1)
sample size: 52


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr


count sample diff f1   is twice tot diff f1....... 32   / 1000    p < 0.032  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 23   / 1000    p < 0.023  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 142  / 1000    p < 0.142  
count sample diff acc  is twice tot diff acc...... 850  / 1000    p < 0.85   


Seed: 165043843 - Split: 0 - gender - Group: Male
data shape:  (12062, 1)
sample size: 6031


bootstrap: 100%|████████████████████████████| 1000/1000 [00:23<00:00, 43.14it/s]



count sample diff f1   is twice tot diff f1....... 9    / 1000    p < 0.009  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 23   / 1000    p < 0.023  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 5    / 1000    p < 0.005  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 38   / 1000    p < 0.038  [38;5;9m*[0m


Seed: 165043843 - Split: 0 - gender - Group: Female
data shape:  (13383, 1)
sample size: 6691


bootstrap: 100%|████████████████████████████| 1000/1000 [00:24<00:00, 41.23it/s]



count sample diff f1   is twice tot diff f1....... 536  / 1000    p < 0.536  
count sample diff prec is twice tot diff prec..... 585  / 1000    p < 0.585  
count sample diff rec  is twice tot diff rec ..... 489  / 1000    p < 0.489  
count sample diff acc  is twice tot diff acc...... 656  / 1000    p < 0.656  


Seed: 165043843 - Split: 0 - gender - Group: Nonbinary
data shape:  (114, 1)
sample size: 57


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 83.09it/s]



count sample diff f1   is twice tot diff f1....... 885  / 1000    p < 0.885  
count sample diff prec is twice tot diff prec..... 866  / 1000    p < 0.866  
count sample diff rec  is twice tot diff rec ..... 906  / 1000    p < 0.906  
count sample diff acc  is twice tot diff acc...... 717  / 1000    p < 0.717  


Seed: 165043843 - Split: 1 - gender - Group: Male
data shape:  (11915, 1)
sample size: 5957


bootstrap: 100%|████████████████████████████| 1000/1000 [00:23<00:00, 43.03it/s]



count sample diff f1   is twice tot diff f1....... 82   / 1000    p < 0.082  
count sample diff prec is twice tot diff prec..... 159  / 1000    p < 0.159  
count sample diff rec  is twice tot diff rec ..... 35   / 1000    p < 0.035  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 279  / 1000    p < 0.279  


Seed: 165043843 - Split: 1 - gender - Group: Female
data shape:  (13572, 1)
sample size: 6786


bootstrap: 100%|████████████████████████████| 1000/1000 [00:26<00:00, 38.09it/s]



count sample diff f1   is twice tot diff f1....... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 8    / 1000    p < 0.008  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 59   / 1000    p < 0.059  


Seed: 165043843 - Split: 1 - gender - Group: Nonbinary
data shape:  (120, 1)
sample size: 60


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 112.53it/s]



count sample diff f1   is twice tot diff f1....... 310  / 1000    p < 0.31   
count sample diff prec is twice tot diff prec..... 311  / 1000    p < 0.311  
count sample diff rec  is twice tot diff rec ..... 395  / 1000    p < 0.395  
count sample diff acc  is twice tot diff acc...... 155  / 1000    p < 0.155  


Seed: 165043843 - Split: 2 - gender - Group: Male
data shape:  (11843, 1)
sample size: 5921


bootstrap: 100%|████████████████████████████| 1000/1000 [00:22<00:00, 45.07it/s]



count sample diff f1   is twice tot diff f1....... 427  / 1000    p < 0.427  
count sample diff prec is twice tot diff prec..... 545  / 1000    p < 0.545  
count sample diff rec  is twice tot diff rec ..... 254  / 1000    p < 0.254  
count sample diff acc  is twice tot diff acc...... 685  / 1000    p < 0.685  


Seed: 165043843 - Split: 2 - gender - Group: Female
data shape:  (13638, 1)
sample size: 6819


bootstrap: 100%|████████████████████████████| 1000/1000 [00:21<00:00, 45.93it/s]



count sample diff f1   is twice tot diff f1....... 470  / 1000    p < 0.47   
count sample diff prec is twice tot diff prec..... 624  / 1000    p < 0.624  
count sample diff rec  is twice tot diff rec ..... 268  / 1000    p < 0.268  
count sample diff acc  is twice tot diff acc...... 895  / 1000    p < 0.895  


Seed: 165043843 - Split: 2 - gender - Group: Nonbinary
data shape:  (109, 1)
sample size: 54


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 113.68it/s]



count sample diff f1   is twice tot diff f1....... 580  / 1000    p < 0.58   
count sample diff prec is twice tot diff prec..... 611  / 1000    p < 0.611  
count sample diff rec  is twice tot diff rec ..... 549  / 1000    p < 0.549  
count sample diff acc  is twice tot diff acc...... 604  / 1000    p < 0.604  


Seed: 165043843 - Split: 3 - gender - Group: Male
data shape:  (11880, 1)
sample size: 5940


bootstrap: 100%|████████████████████████████| 1000/1000 [00:21<00:00, 45.59it/s]



count sample diff f1   is twice tot diff f1....... 417  / 1000    p < 0.417  
count sample diff prec is twice tot diff prec..... 480  / 1000    p < 0.48   
count sample diff rec  is twice tot diff rec ..... 319  / 1000    p < 0.319  
count sample diff acc  is twice tot diff acc...... 554  / 1000    p < 0.554  


Seed: 165043843 - Split: 3 - gender - Group: Female
data shape:  (13627, 1)
sample size: 6813


bootstrap: 100%|████████████████████████████| 1000/1000 [00:24<00:00, 41.27it/s]



count sample diff f1   is twice tot diff f1....... 505  / 1000    p < 0.505  
count sample diff prec is twice tot diff prec..... 599  / 1000    p < 0.599  
count sample diff rec  is twice tot diff rec ..... 381  / 1000    p < 0.381  
count sample diff acc  is twice tot diff acc...... 819  / 1000    p < 0.819  


Seed: 165043843 - Split: 3 - gender - Group: Nonbinary
data shape:  (117, 1)
sample size: 58


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 118.97it/s]



count sample diff f1   is twice tot diff f1....... 905  / 1000    p < 0.905  
count sample diff prec is twice tot diff prec..... 886  / 1000    p < 0.886  
count sample diff rec  is twice tot diff rec ..... 967  / 1000    p < 0.967  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 504  / 1000    p < 0.504  


Seed: 2923262358 - Split: 0 - gender - Group: Male
data shape:  (11985, 1)
sample size: 5992


bootstrap: 100%|████████████████████████████| 1000/1000 [00:20<00:00, 48.26it/s]



count sample diff f1   is twice tot diff f1....... 719  / 1000    p < 0.719  
count sample diff prec is twice tot diff prec..... 630  / 1000    p < 0.63   
count sample diff rec  is twice tot diff rec ..... 820  / 1000    p < 0.82   
count sample diff acc  is twice tot diff acc...... 487  / 1000    p < 0.487  


Seed: 2923262358 - Split: 0 - gender - Group: Female
data shape:  (13509, 1)
sample size: 6754


bootstrap: 100%|████████████████████████████| 1000/1000 [00:23<00:00, 41.83it/s]



count sample diff f1   is twice tot diff f1....... 689  / 1000    p < 0.689  
count sample diff prec is twice tot diff prec..... 678  / 1000    p < 0.678  
count sample diff rec  is twice tot diff rec ..... 689  / 1000    p < 0.689  
count sample diff acc  is twice tot diff acc...... 633  / 1000    p < 0.633  


Seed: 2923262358 - Split: 0 - gender - Group: Nonbinary
data shape:  (118, 1)
sample size: 59


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 108.90it/s]



count sample diff f1   is twice tot diff f1....... 479  / 1000    p < 0.479  
count sample diff prec is twice tot diff prec..... 460  / 1000    p < 0.46   
count sample diff rec  is twice tot diff rec ..... 571  / 1000    p < 0.571  
count sample diff acc  is twice tot diff acc...... 300  / 1000    p < 0.3    


Seed: 2923262358 - Split: 1 - gender - Group: Male
data shape:  (11917, 1)
sample size: 5958


bootstrap: 100%|████████████████████████████| 1000/1000 [00:21<00:00, 46.36it/s]



count sample diff f1   is twice tot diff f1....... 709  / 1000    p < 0.709  
count sample diff prec is twice tot diff prec..... 560  / 1000    p < 0.56   
count sample diff rec  is twice tot diff rec ..... 845  / 1000    p < 0.845  
count sample diff acc  is twice tot diff acc...... 413  / 1000    p < 0.413  


Seed: 2923262358 - Split: 1 - gender - Group: Female
data shape:  (13498, 1)
sample size: 6749


bootstrap: 100%|████████████████████████████| 1000/1000 [00:23<00:00, 42.78it/s]



count sample diff f1   is twice tot diff f1....... 994  / 1000    p < 0.994  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 984  / 1000    p < 0.984  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 998  / 1000    p < 0.998  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 925  / 1000    p < 0.925  


Seed: 2923262358 - Split: 1 - gender - Group: Nonbinary
data shape:  (105, 1)
sample size: 52


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 84.83it/s]



count sample diff f1   is twice tot diff f1....... 348  / 1000    p < 0.348  
count sample diff prec is twice tot diff prec..... 343  / 1000    p < 0.343  
count sample diff rec  is twice tot diff rec ..... 355  / 1000    p < 0.355  
count sample diff acc  is twice tot diff acc...... 443  / 1000    p < 0.443  


Seed: 2923262358 - Split: 2 - gender - Group: Male
data shape:  (11928, 1)
sample size: 5964


bootstrap: 100%|████████████████████████████| 1000/1000 [00:25<00:00, 39.59it/s]



count sample diff f1   is twice tot diff f1....... 239  / 1000    p < 0.239  
count sample diff prec is twice tot diff prec..... 527  / 1000    p < 0.527  
count sample diff rec  is twice tot diff rec ..... 109  / 1000    p < 0.109  
count sample diff acc  is twice tot diff acc...... 669  / 1000    p < 0.669  


Seed: 2923262358 - Split: 2 - gender - Group: Female
data shape:  (13560, 1)
sample size: 6780


bootstrap: 100%|████████████████████████████| 1000/1000 [00:24<00:00, 41.08it/s]



count sample diff f1   is twice tot diff f1....... 0    / 1000    p < 0.0    [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 0    / 1000    p < 0.0    [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 25   / 1000    p < 0.025  [38;5;9m*[0m


Seed: 2923262358 - Split: 2 - gender - Group: Nonbinary
data shape:  (117, 1)
sample size: 58


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 96.13it/s]



count sample diff f1   is twice tot diff f1....... 462  / 1000    p < 0.462  
count sample diff prec is twice tot diff prec..... 459  / 1000    p < 0.459  
count sample diff rec  is twice tot diff rec ..... 469  / 1000    p < 0.469  
count sample diff acc  is twice tot diff acc...... 504  / 1000    p < 0.504  


Seed: 2923262358 - Split: 3 - gender - Group: Male
data shape:  (11870, 1)
sample size: 5935


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 52.01it/s]



count sample diff f1   is twice tot diff f1....... 428  / 1000    p < 0.428  
count sample diff prec is twice tot diff prec..... 183  / 1000    p < 0.183  
count sample diff rec  is twice tot diff rec ..... 646  / 1000    p < 0.646  
count sample diff acc  is twice tot diff acc...... 91   / 1000    p < 0.091  


Seed: 2923262358 - Split: 3 - gender - Group: Female
data shape:  (13653, 1)
sample size: 6826


bootstrap: 100%|████████████████████████████| 1000/1000 [00:21<00:00, 46.46it/s]



count sample diff f1   is twice tot diff f1....... 996  / 1000    p < 0.996  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 984  / 1000    p < 0.984  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 889  / 1000    p < 0.889  


Seed: 2923262358 - Split: 3 - gender - Group: Nonbinary
data shape:  (120, 1)
sample size: 60


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 119.27it/s]


count sample diff f1   is twice tot diff f1....... 957  / 1000    p < 0.957  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 942  / 1000    p < 0.942  
count sample diff rec  is twice tot diff rec ..... 915  / 1000    p < 0.915  
count sample diff acc  is twice tot diff acc...... 618  / 1000    p < 0.618  





In [10]:
gender_ks = k_estimator_groups(gender_p_dicts)
gender_ks

{'Female': {'k_count': 2, 'k_bonferroni': 2},
 'Male': {'k_count': 1, 'k_bonferroni': -1},
 'Nonbinary': {'k_count': 1, 'k_bonferroni': -1}}

In [11]:
gender_ks_df = pd.DataFrame(gender_ks).T.sort_index()
gender_ks_df = gender_ks_df.replace({-1: 0})
gender_ks_df = gender_ks_df.rename(columns={'k_count': '$\hat{k}_{count}$', 'k_bonferroni':'$\hat{k}_{Bonf.}$'})
gender_ks_df

Unnamed: 0,$\hat{k}_{count}$,$\hat{k}_{Bonf.}$
Female,2,2
Male,1,0
Nonbinary,1,0


In [12]:
gender_ks_df.to_latex('../tables/replication/randomized/gender.tex', escape=False)

#### Age

In [13]:
age_p_dicts = significance_test_per_group(df, attribute_a='randomized', attribute_b='age', to_groups=to_groups, attributes=['age_range'])



Seed: 2803636207 - Split: 0 - age_range - Group: 25 - 34
data shape:  (10313, 1)
sample size: 5156


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 52.36it/s]



count sample diff f1   is twice tot diff f1....... 692  / 1000    p < 0.692  
count sample diff prec is twice tot diff prec..... 829  / 1000    p < 0.829  
count sample diff rec  is twice tot diff rec ..... 592  / 1000    p < 0.592  
count sample diff acc  is twice tot diff acc...... 882  / 1000    p < 0.882  


Seed: 2803636207 - Split: 0 - age_range - Group: 35 - 44
data shape:  (6220, 1)
sample size: 3110


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 62.13it/s]



count sample diff f1   is twice tot diff f1....... 834  / 1000    p < 0.834  
count sample diff prec is twice tot diff prec..... 859  / 1000    p < 0.859  
count sample diff rec  is twice tot diff rec ..... 783  / 1000    p < 0.783  
count sample diff acc  is twice tot diff acc...... 891  / 1000    p < 0.891  


Seed: 2803636207 - Split: 0 - age_range - Group: 65 or older
data shape:  (675, 1)
sample size: 337


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 114.05it/s]



count sample diff f1   is twice tot diff f1....... 93   / 1000    p < 0.093  
count sample diff prec is twice tot diff prec..... 51   / 1000    p < 0.051  
count sample diff rec  is twice tot diff rec ..... 134  / 1000    p < 0.134  
count sample diff acc  is twice tot diff acc...... 21   / 1000    p < 0.021  [38;5;9m*[0m


Seed: 2803636207 - Split: 0 - age_range - Group: 18 - 24
data shape:  (2651, 1)
sample size: 1325


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 99.35it/s]



count sample diff f1   is twice tot diff f1....... 57   / 1000    p < 0.057  
count sample diff prec is twice tot diff prec..... 54   / 1000    p < 0.054  
count sample diff rec  is twice tot diff rec ..... 57   / 1000    p < 0.057  
count sample diff acc  is twice tot diff acc...... 59   / 1000    p < 0.059  


Seed: 2803636207 - Split: 0 - age_range - Group: 55 - 64
data shape:  (1871, 1)
sample size: 935


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 105.37it/s]



count sample diff f1   is twice tot diff f1....... 623  / 1000    p < 0.623  
count sample diff prec is twice tot diff prec..... 553  / 1000    p < 0.553  
count sample diff rec  is twice tot diff rec ..... 722  / 1000    p < 0.722  
count sample diff acc  is twice tot diff acc...... 401  / 1000    p < 0.401  


Seed: 2803636207 - Split: 0 - age_range - Group: 45 - 54
data shape:  (2973, 1)
sample size: 1486


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 85.30it/s]



count sample diff f1   is twice tot diff f1....... 486  / 1000    p < 0.486  
count sample diff prec is twice tot diff prec..... 511  / 1000    p < 0.511  
count sample diff rec  is twice tot diff rec ..... 465  / 1000    p < 0.465  
count sample diff acc  is twice tot diff acc...... 522  / 1000    p < 0.522  


Seed: 2803636207 - Split: 1 - age_range - Group: 25 - 34
data shape:  (10292, 1)
sample size: 5146


bootstrap: 100%|████████████████████████████| 1000/1000 [00:18<00:00, 53.63it/s]



count sample diff f1   is twice tot diff f1....... 754  / 1000    p < 0.754  
count sample diff prec is twice tot diff prec..... 597  / 1000    p < 0.597  
count sample diff rec  is twice tot diff rec ..... 849  / 1000    p < 0.849  
count sample diff acc  is twice tot diff acc...... 507  / 1000    p < 0.507  


Seed: 2803636207 - Split: 1 - age_range - Group: 35 - 44
data shape:  (6255, 1)
sample size: 3127


bootstrap: 100%|████████████████████████████| 1000/1000 [00:18<00:00, 55.09it/s]



count sample diff f1   is twice tot diff f1....... 829  / 1000    p < 0.829  
count sample diff prec is twice tot diff prec..... 620  / 1000    p < 0.62   
count sample diff rec  is twice tot diff rec ..... 915  / 1000    p < 0.915  
count sample diff acc  is twice tot diff acc...... 367  / 1000    p < 0.367  


Seed: 2803636207 - Split: 1 - age_range - Group: 65 or older
data shape:  (690, 1)
sample size: 345


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 112.90it/s]



count sample diff f1   is twice tot diff f1....... 648  / 1000    p < 0.648  
count sample diff prec is twice tot diff prec..... 458  / 1000    p < 0.458  
count sample diff rec  is twice tot diff rec ..... 667  / 1000    p < 0.667  
count sample diff acc  is twice tot diff acc...... 436  / 1000    p < 0.436  


Seed: 2803636207 - Split: 1 - age_range - Group: 18 - 24
data shape:  (2560, 1)
sample size: 1280


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 89.11it/s]



count sample diff f1   is twice tot diff f1....... 821  / 1000    p < 0.821  
count sample diff prec is twice tot diff prec..... 713  / 1000    p < 0.713  
count sample diff rec  is twice tot diff rec ..... 894  / 1000    p < 0.894  
count sample diff acc  is twice tot diff acc...... 383  / 1000    p < 0.383  


Seed: 2803636207 - Split: 1 - age_range - Group: 55 - 64
data shape:  (1899, 1)
sample size: 949


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 70.90it/s]



count sample diff f1   is twice tot diff f1....... 41   / 1000    p < 0.041  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 4    / 1000    p < 0.004  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 139  / 1000    p < 0.139  
count sample diff acc  is twice tot diff acc...... 0    / 1000    p < 0.0    [38;5;9m**[0m


Seed: 2803636207 - Split: 1 - age_range - Group: 45 - 54
data shape:  (2971, 1)
sample size: 1485


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 87.08it/s]



count sample diff f1   is twice tot diff f1....... 872  / 1000    p < 0.872  
count sample diff prec is twice tot diff prec..... 713  / 1000    p < 0.713  
count sample diff rec  is twice tot diff rec ..... 918  / 1000    p < 0.918  
count sample diff acc  is twice tot diff acc...... 612  / 1000    p < 0.612  


Seed: 2803636207 - Split: 2 - age_range - Group: 25 - 34
data shape:  (10326, 1)
sample size: 5163


bootstrap: 100%|████████████████████████████| 1000/1000 [00:21<00:00, 45.71it/s]



count sample diff f1   is twice tot diff f1....... 178  / 1000    p < 0.178  
count sample diff prec is twice tot diff prec..... 120  / 1000    p < 0.12   
count sample diff rec  is twice tot diff rec ..... 243  / 1000    p < 0.243  
count sample diff acc  is twice tot diff acc...... 77   / 1000    p < 0.077  


Seed: 2803636207 - Split: 2 - age_range - Group: 35 - 44
data shape:  (6257, 1)
sample size: 3128


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 72.46it/s]



count sample diff f1   is twice tot diff f1....... 62   / 1000    p < 0.062  
count sample diff prec is twice tot diff prec..... 29   / 1000    p < 0.029  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 94   / 1000    p < 0.094  
count sample diff acc  is twice tot diff acc...... 20   / 1000    p < 0.02   [38;5;9m*[0m


Seed: 2803636207 - Split: 2 - age_range - Group: 65 or older
data shape:  (684, 1)
sample size: 342


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 95.49it/s]



count sample diff f1   is twice tot diff f1....... 828  / 1000    p < 0.828  
count sample diff prec is twice tot diff prec..... 727  / 1000    p < 0.727  
count sample diff rec  is twice tot diff rec ..... 898  / 1000    p < 0.898  
count sample diff acc  is twice tot diff acc...... 483  / 1000    p < 0.483  


Seed: 2803636207 - Split: 2 - age_range - Group: 18 - 24
data shape:  (2606, 1)
sample size: 1303


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 87.93it/s]



count sample diff f1   is twice tot diff f1....... 390  / 1000    p < 0.39   
count sample diff prec is twice tot diff prec..... 263  / 1000    p < 0.263  
count sample diff rec  is twice tot diff rec ..... 570  / 1000    p < 0.57   
count sample diff acc  is twice tot diff acc...... 39   / 1000    p < 0.039  [38;5;9m*[0m


Seed: 2803636207 - Split: 2 - age_range - Group: 55 - 64
data shape:  (1849, 1)
sample size: 924


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 79.87it/s]



count sample diff f1   is twice tot diff f1....... 631  / 1000    p < 0.631  
count sample diff prec is twice tot diff prec..... 308  / 1000    p < 0.308  
count sample diff rec  is twice tot diff rec ..... 765  / 1000    p < 0.765  
count sample diff acc  is twice tot diff acc...... 213  / 1000    p < 0.213  


Seed: 2803636207 - Split: 2 - age_range - Group: 45 - 54
data shape:  (3073, 1)
sample size: 1536


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 93.52it/s]



count sample diff f1   is twice tot diff f1....... 395  / 1000    p < 0.395  
count sample diff prec is twice tot diff prec..... 457  / 1000    p < 0.457  
count sample diff rec  is twice tot diff rec ..... 317  / 1000    p < 0.317  
count sample diff acc  is twice tot diff acc...... 521  / 1000    p < 0.521  


Seed: 2803636207 - Split: 3 - age_range - Group: 25 - 34
data shape:  (10329, 1)
sample size: 5164


bootstrap: 100%|████████████████████████████| 1000/1000 [00:20<00:00, 49.01it/s]



count sample diff f1   is twice tot diff f1....... 362  / 1000    p < 0.362  
count sample diff prec is twice tot diff prec..... 577  / 1000    p < 0.577  
count sample diff rec  is twice tot diff rec ..... 235  / 1000    p < 0.235  
count sample diff acc  is twice tot diff acc...... 682  / 1000    p < 0.682  


Seed: 2803636207 - Split: 3 - age_range - Group: 35 - 44
data shape:  (6268, 1)
sample size: 3134


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 62.59it/s]



count sample diff f1   is twice tot diff f1....... 11   / 1000    p < 0.011  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 9    / 1000    p < 0.009  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 16   / 1000    p < 0.016  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 7    / 1000    p < 0.007  [38;5;9m**[0m


Seed: 2803636207 - Split: 3 - age_range - Group: 65 or older
data shape:  (651, 1)
sample size: 325


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 100.38it/s]



count sample diff f1   is twice tot diff f1....... 741  / 1000    p < 0.741  
count sample diff prec is twice tot diff prec..... 662  / 1000    p < 0.662  
count sample diff rec  is twice tot diff rec ..... 749  / 1000    p < 0.749  
count sample diff acc  is twice tot diff acc...... 620  / 1000    p < 0.62   


Seed: 2803636207 - Split: 3 - age_range - Group: 18 - 24
data shape:  (2643, 1)
sample size: 1321


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 97.10it/s]



count sample diff f1   is twice tot diff f1....... 218  / 1000    p < 0.218  
count sample diff prec is twice tot diff prec..... 212  / 1000    p < 0.212  
count sample diff rec  is twice tot diff rec ..... 272  / 1000    p < 0.272  
count sample diff acc  is twice tot diff acc...... 132  / 1000    p < 0.132  


Seed: 2803636207 - Split: 3 - age_range - Group: 55 - 64
data shape:  (1841, 1)
sample size: 920


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 100.86it/s]



count sample diff f1   is twice tot diff f1....... 280  / 1000    p < 0.28   
count sample diff prec is twice tot diff prec..... 188  / 1000    p < 0.188  
count sample diff rec  is twice tot diff rec ..... 338  / 1000    p < 0.338  
count sample diff acc  is twice tot diff acc...... 132  / 1000    p < 0.132  


Seed: 2803636207 - Split: 3 - age_range - Group: 45 - 54
data shape:  (3083, 1)
sample size: 1541


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 88.46it/s]



count sample diff f1   is twice tot diff f1....... 341  / 1000    p < 0.341  
count sample diff prec is twice tot diff prec..... 341  / 1000    p < 0.341  
count sample diff rec  is twice tot diff rec ..... 342  / 1000    p < 0.342  
count sample diff acc  is twice tot diff acc...... 352  / 1000    p < 0.352  


Seed: 165043843 - Split: 0 - age_range - Group: 25 - 34
data shape:  (10457, 1)
sample size: 5228


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 51.25it/s]



count sample diff f1   is twice tot diff f1....... 347  / 1000    p < 0.347  
count sample diff prec is twice tot diff prec..... 682  / 1000    p < 0.682  
count sample diff rec  is twice tot diff rec ..... 148  / 1000    p < 0.148  
count sample diff acc  is twice tot diff acc...... 871  / 1000    p < 0.871  


Seed: 165043843 - Split: 0 - age_range - Group: 35 - 44
data shape:  (6238, 1)
sample size: 3119


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 60.71it/s]



count sample diff f1   is twice tot diff f1....... 256  / 1000    p < 0.256  
count sample diff prec is twice tot diff prec..... 520  / 1000    p < 0.52   
count sample diff rec  is twice tot diff rec ..... 82   / 1000    p < 0.082  
count sample diff acc  is twice tot diff acc...... 856  / 1000    p < 0.856  


Seed: 165043843 - Split: 0 - age_range - Group: 65 or older
data shape:  (643, 1)
sample size: 321


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 117.40it/s]



count sample diff f1   is twice tot diff f1....... 58   / 1000    p < 0.058  
count sample diff prec is twice tot diff prec..... 67   / 1000    p < 0.067  
count sample diff rec  is twice tot diff rec ..... 49   / 1000    p < 0.049  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 100  / 1000    p < 0.1    


Seed: 165043843 - Split: 0 - age_range - Group: 18 - 24
data shape:  (2521, 1)
sample size: 1260


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 97.06it/s]



count sample diff f1   is twice tot diff f1....... 310  / 1000    p < 0.31   
count sample diff prec is twice tot diff prec..... 402  / 1000    p < 0.402  
count sample diff rec  is twice tot diff rec ..... 240  / 1000    p < 0.24   
count sample diff acc  is twice tot diff acc...... 623  / 1000    p < 0.623  


Seed: 165043843 - Split: 0 - age_range - Group: 55 - 64
data shape:  (1894, 1)
sample size: 947


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 80.25it/s]



count sample diff f1   is twice tot diff f1....... 290  / 1000    p < 0.29   
count sample diff prec is twice tot diff prec..... 531  / 1000    p < 0.531  
count sample diff rec  is twice tot diff rec ..... 217  / 1000    p < 0.217  
count sample diff acc  is twice tot diff acc...... 573  / 1000    p < 0.573  


Seed: 165043843 - Split: 0 - age_range - Group: 45 - 54
data shape:  (3058, 1)
sample size: 1529


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 84.36it/s]



count sample diff f1   is twice tot diff f1....... 120  / 1000    p < 0.12   
count sample diff prec is twice tot diff prec..... 191  / 1000    p < 0.191  
count sample diff rec  is twice tot diff rec ..... 67   / 1000    p < 0.067  
count sample diff acc  is twice tot diff acc...... 305  / 1000    p < 0.305  


Seed: 165043843 - Split: 1 - age_range - Group: 25 - 34
data shape:  (10244, 1)
sample size: 5122


bootstrap: 100%|████████████████████████████| 1000/1000 [00:21<00:00, 47.06it/s]



count sample diff f1   is twice tot diff f1....... 933  / 1000    p < 0.933  
count sample diff prec is twice tot diff prec..... 935  / 1000    p < 0.935  
count sample diff rec  is twice tot diff rec ..... 922  / 1000    p < 0.922  
count sample diff acc  is twice tot diff acc...... 943  / 1000    p < 0.943  


Seed: 165043843 - Split: 1 - age_range - Group: 35 - 44
data shape:  (6191, 1)
sample size: 3095


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 73.66it/s]



count sample diff f1   is twice tot diff f1....... 896  / 1000    p < 0.896  
count sample diff prec is twice tot diff prec..... 906  / 1000    p < 0.906  
count sample diff rec  is twice tot diff rec ..... 885  / 1000    p < 0.885  
count sample diff acc  is twice tot diff acc...... 912  / 1000    p < 0.912  


Seed: 165043843 - Split: 1 - age_range - Group: 65 or older
data shape:  (704, 1)
sample size: 352


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 109.03it/s]



count sample diff f1   is twice tot diff f1....... 280  / 1000    p < 0.28   
count sample diff prec is twice tot diff prec..... 239  / 1000    p < 0.239  
count sample diff rec  is twice tot diff rec ..... 312  / 1000    p < 0.312  
count sample diff acc  is twice tot diff acc...... 195  / 1000    p < 0.195  


Seed: 165043843 - Split: 1 - age_range - Group: 18 - 24
data shape:  (2638, 1)
sample size: 1319


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 89.46it/s]



count sample diff f1   is twice tot diff f1....... 915  / 1000    p < 0.915  
count sample diff prec is twice tot diff prec..... 909  / 1000    p < 0.909  
count sample diff rec  is twice tot diff rec ..... 917  / 1000    p < 0.917  
count sample diff acc  is twice tot diff acc...... 885  / 1000    p < 0.885  


Seed: 165043843 - Split: 1 - age_range - Group: 55 - 64
data shape:  (1831, 1)
sample size: 915


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 96.24it/s]



count sample diff f1   is twice tot diff f1....... 729  / 1000    p < 0.729  
count sample diff prec is twice tot diff prec..... 709  / 1000    p < 0.709  
count sample diff rec  is twice tot diff rec ..... 801  / 1000    p < 0.801  
count sample diff acc  is twice tot diff acc...... 551  / 1000    p < 0.551  


Seed: 165043843 - Split: 1 - age_range - Group: 45 - 54
data shape:  (3048, 1)
sample size: 1524


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 74.91it/s]



count sample diff f1   is twice tot diff f1....... 891  / 1000    p < 0.891  
count sample diff prec is twice tot diff prec..... 894  / 1000    p < 0.894  
count sample diff rec  is twice tot diff rec ..... 879  / 1000    p < 0.879  
count sample diff acc  is twice tot diff acc...... 889  / 1000    p < 0.889  


Seed: 165043843 - Split: 2 - age_range - Group: 25 - 34
data shape:  (10311, 1)
sample size: 5155


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 58.34it/s]



count sample diff f1   is twice tot diff f1....... 282  / 1000    p < 0.282  
count sample diff prec is twice tot diff prec..... 173  / 1000    p < 0.173  
count sample diff rec  is twice tot diff rec ..... 406  / 1000    p < 0.406  
count sample diff acc  is twice tot diff acc...... 133  / 1000    p < 0.133  


Seed: 165043843 - Split: 2 - age_range - Group: 35 - 44
data shape:  (6247, 1)
sample size: 3123


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 73.00it/s]



count sample diff f1   is twice tot diff f1....... 122  / 1000    p < 0.122  
count sample diff prec is twice tot diff prec..... 109  / 1000    p < 0.109  
count sample diff rec  is twice tot diff rec ..... 168  / 1000    p < 0.168  
count sample diff acc  is twice tot diff acc...... 77   / 1000    p < 0.077  


Seed: 165043843 - Split: 2 - age_range - Group: 65 or older
data shape:  (675, 1)
sample size: 337


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 115.19it/s]



count sample diff f1   is twice tot diff f1....... 102  / 1000    p < 0.102  
count sample diff prec is twice tot diff prec..... 52   / 1000    p < 0.052  
count sample diff rec  is twice tot diff rec ..... 163  / 1000    p < 0.163  
count sample diff acc  is twice tot diff acc...... 17   / 1000    p < 0.017  [38;5;9m*[0m


Seed: 165043843 - Split: 2 - age_range - Group: 18 - 24
data shape:  (2697, 1)
sample size: 1348


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 89.28it/s]



count sample diff f1   is twice tot diff f1....... 624  / 1000    p < 0.624  
count sample diff prec is twice tot diff prec..... 617  / 1000    p < 0.617  
count sample diff rec  is twice tot diff rec ..... 649  / 1000    p < 0.649  
count sample diff acc  is twice tot diff acc...... 569  / 1000    p < 0.569  


Seed: 165043843 - Split: 2 - age_range - Group: 55 - 64
data shape:  (1883, 1)
sample size: 941


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 88.14it/s]



count sample diff f1   is twice tot diff f1....... 937  / 1000    p < 0.937  
count sample diff prec is twice tot diff prec..... 855  / 1000    p < 0.855  
count sample diff rec  is twice tot diff rec ..... 969  / 1000    p < 0.969  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 675  / 1000    p < 0.675  


Seed: 165043843 - Split: 2 - age_range - Group: 45 - 54
data shape:  (2929, 1)
sample size: 1464


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 85.85it/s]



count sample diff f1   is twice tot diff f1....... 43   / 1000    p < 0.043  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 42   / 1000    p < 0.042  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 49   / 1000    p < 0.049  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 38   / 1000    p < 0.038  [38;5;9m*[0m


Seed: 165043843 - Split: 3 - age_range - Group: 25 - 34
data shape:  (10248, 1)
sample size: 5124


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 50.80it/s]



count sample diff f1   is twice tot diff f1....... 153  / 1000    p < 0.153  
count sample diff prec is twice tot diff prec..... 220  / 1000    p < 0.22   
count sample diff rec  is twice tot diff rec ..... 106  / 1000    p < 0.106  
count sample diff acc  is twice tot diff acc...... 303  / 1000    p < 0.303  


Seed: 165043843 - Split: 3 - age_range - Group: 35 - 44
data shape:  (6324, 1)
sample size: 3162


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 68.71it/s]



count sample diff f1   is twice tot diff f1....... 316  / 1000    p < 0.316  
count sample diff prec is twice tot diff prec..... 359  / 1000    p < 0.359  
count sample diff rec  is twice tot diff rec ..... 239  / 1000    p < 0.239  
count sample diff acc  is twice tot diff acc...... 468  / 1000    p < 0.468  


Seed: 165043843 - Split: 3 - age_range - Group: 65 or older
data shape:  (678, 1)
sample size: 339


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 101.90it/s]



count sample diff f1   is twice tot diff f1....... 193  / 1000    p < 0.193  
count sample diff prec is twice tot diff prec..... 160  / 1000    p < 0.16   
count sample diff rec  is twice tot diff rec ..... 212  / 1000    p < 0.212  
count sample diff acc  is twice tot diff acc...... 172  / 1000    p < 0.172  


Seed: 165043843 - Split: 3 - age_range - Group: 18 - 24
data shape:  (2604, 1)
sample size: 1302


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 77.30it/s]



count sample diff f1   is twice tot diff f1....... 10   / 1000    p < 0.01   [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 9    / 1000    p < 0.009  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 8    / 1000    p < 0.008  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 15   / 1000    p < 0.015  [38;5;9m*[0m


Seed: 165043843 - Split: 3 - age_range - Group: 55 - 64
data shape:  (1852, 1)
sample size: 926


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 103.79it/s]



count sample diff f1   is twice tot diff f1....... 19   / 1000    p < 0.019  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 25   / 1000    p < 0.025  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 20   / 1000    p < 0.02   [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 27   / 1000    p < 0.027  [38;5;9m*[0m


Seed: 165043843 - Split: 3 - age_range - Group: 45 - 54
data shape:  (3065, 1)
sample size: 1532


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 93.97it/s]



count sample diff f1   is twice tot diff f1....... 51   / 1000    p < 0.051  
count sample diff prec is twice tot diff prec..... 77   / 1000    p < 0.077  
count sample diff rec  is twice tot diff rec ..... 40   / 1000    p < 0.04   [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 105  / 1000    p < 0.105  


Seed: 2923262358 - Split: 0 - age_range - Group: 25 - 34
data shape:  (10253, 1)
sample size: 5126


bootstrap: 100%|████████████████████████████| 1000/1000 [00:18<00:00, 52.98it/s]



count sample diff f1   is twice tot diff f1....... 707  / 1000    p < 0.707  
count sample diff prec is twice tot diff prec..... 755  / 1000    p < 0.755  
count sample diff rec  is twice tot diff rec ..... 572  / 1000    p < 0.572  
count sample diff acc  is twice tot diff acc...... 863  / 1000    p < 0.863  


Seed: 2923262358 - Split: 0 - age_range - Group: 35 - 44
data shape:  (6321, 1)
sample size: 3160


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 62.43it/s]



count sample diff f1   is twice tot diff f1....... 939  / 1000    p < 0.939  
count sample diff prec is twice tot diff prec..... 946  / 1000    p < 0.946  
count sample diff rec  is twice tot diff rec ..... 922  / 1000    p < 0.922  
count sample diff acc  is twice tot diff acc...... 960  / 1000    p < 0.96   [38;5;8m![0m


Seed: 2923262358 - Split: 0 - age_range - Group: 65 or older
data shape:  (646, 1)
sample size: 323


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 98.50it/s]



count sample diff f1   is twice tot diff f1....... 452  / 1000    p < 0.452  
count sample diff prec is twice tot diff prec..... 409  / 1000    p < 0.409  
count sample diff rec  is twice tot diff rec ..... 513  / 1000    p < 0.513  
count sample diff acc  is twice tot diff acc...... 355  / 1000    p < 0.355  


Seed: 2923262358 - Split: 0 - age_range - Group: 18 - 24
data shape:  (2644, 1)
sample size: 1322


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 92.38it/s]



count sample diff f1   is twice tot diff f1....... 644  / 1000    p < 0.644  
count sample diff prec is twice tot diff prec..... 639  / 1000    p < 0.639  
count sample diff rec  is twice tot diff rec ..... 672  / 1000    p < 0.672  
count sample diff acc  is twice tot diff acc...... 579  / 1000    p < 0.579  


Seed: 2923262358 - Split: 0 - age_range - Group: 55 - 64
data shape:  (1869, 1)
sample size: 934


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 96.85it/s]



count sample diff f1   is twice tot diff f1....... 820  / 1000    p < 0.82   
count sample diff prec is twice tot diff prec..... 696  / 1000    p < 0.696  
count sample diff rec  is twice tot diff rec ..... 917  / 1000    p < 0.917  
count sample diff acc  is twice tot diff acc...... 404  / 1000    p < 0.404  


Seed: 2923262358 - Split: 0 - age_range - Group: 45 - 54
data shape:  (3030, 1)
sample size: 1515


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 72.33it/s]



count sample diff f1   is twice tot diff f1....... 798  / 1000    p < 0.798  
count sample diff prec is twice tot diff prec..... 671  / 1000    p < 0.671  
count sample diff rec  is twice tot diff rec ..... 866  / 1000    p < 0.866  
count sample diff acc  is twice tot diff acc...... 570  / 1000    p < 0.57   


Seed: 2923262358 - Split: 1 - age_range - Group: 25 - 34
data shape:  (10398, 1)
sample size: 5199


bootstrap: 100%|████████████████████████████| 1000/1000 [00:22<00:00, 44.96it/s]



count sample diff f1   is twice tot diff f1....... 666  / 1000    p < 0.666  
count sample diff prec is twice tot diff prec..... 678  / 1000    p < 0.678  
count sample diff rec  is twice tot diff rec ..... 661  / 1000    p < 0.661  
count sample diff acc  is twice tot diff acc...... 670  / 1000    p < 0.67   


Seed: 2923262358 - Split: 1 - age_range - Group: 35 - 44
data shape:  (6179, 1)
sample size: 3089


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 71.39it/s]



count sample diff f1   is twice tot diff f1....... 958  / 1000    p < 0.958  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 913  / 1000    p < 0.913  
count sample diff rec  is twice tot diff rec ..... 968  / 1000    p < 0.968  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 835  / 1000    p < 0.835  


Seed: 2923262358 - Split: 1 - age_range - Group: 65 or older
data shape:  (675, 1)
sample size: 337


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 100.64it/s]



count sample diff f1   is twice tot diff f1....... 742  / 1000    p < 0.742  
count sample diff prec is twice tot diff prec..... 591  / 1000    p < 0.591  
count sample diff rec  is twice tot diff rec ..... 776  / 1000    p < 0.776  
count sample diff acc  is twice tot diff acc...... 508  / 1000    p < 0.508  


Seed: 2923262358 - Split: 1 - age_range - Group: 18 - 24
data shape:  (2556, 1)
sample size: 1278


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 82.74it/s]



count sample diff f1   is twice tot diff f1....... 931  / 1000    p < 0.931  
count sample diff prec is twice tot diff prec..... 896  / 1000    p < 0.896  
count sample diff rec  is twice tot diff rec ..... 942  / 1000    p < 0.942  
count sample diff acc  is twice tot diff acc...... 735  / 1000    p < 0.735  


Seed: 2923262358 - Split: 1 - age_range - Group: 55 - 64
data shape:  (1832, 1)
sample size: 916


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 103.70it/s]



count sample diff f1   is twice tot diff f1....... 783  / 1000    p < 0.783  
count sample diff prec is twice tot diff prec..... 584  / 1000    p < 0.584  
count sample diff rec  is twice tot diff rec ..... 893  / 1000    p < 0.893  
count sample diff acc  is twice tot diff acc...... 348  / 1000    p < 0.348  


Seed: 2923262358 - Split: 1 - age_range - Group: 45 - 54
data shape:  (3046, 1)
sample size: 1523


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 89.10it/s]



count sample diff f1   is twice tot diff f1....... 954  / 1000    p < 0.954  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 921  / 1000    p < 0.921  
count sample diff rec  is twice tot diff rec ..... 976  / 1000    p < 0.976  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 841  / 1000    p < 0.841  


Seed: 2923262358 - Split: 2 - age_range - Group: 25 - 34
data shape:  (10301, 1)
sample size: 5150


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 50.78it/s]



count sample diff f1   is twice tot diff f1....... 932  / 1000    p < 0.932  
count sample diff prec is twice tot diff prec..... 946  / 1000    p < 0.946  
count sample diff rec  is twice tot diff rec ..... 917  / 1000    p < 0.917  
count sample diff acc  is twice tot diff acc...... 954  / 1000    p < 0.954  [38;5;8m![0m


Seed: 2923262358 - Split: 2 - age_range - Group: 35 - 44
data shape:  (6314, 1)
sample size: 3157


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 73.42it/s]



count sample diff f1   is twice tot diff f1....... 974  / 1000    p < 0.974  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 964  / 1000    p < 0.964  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 977  / 1000    p < 0.977  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 932  / 1000    p < 0.932  


Seed: 2923262358 - Split: 2 - age_range - Group: 65 or older
data shape:  (683, 1)
sample size: 341


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 115.32it/s]



count sample diff f1   is twice tot diff f1....... 135  / 1000    p < 0.135  
count sample diff prec is twice tot diff prec..... 161  / 1000    p < 0.161  
count sample diff rec  is twice tot diff rec ..... 117  / 1000    p < 0.117  
count sample diff acc  is twice tot diff acc...... 194  / 1000    p < 0.194  


Seed: 2923262358 - Split: 2 - age_range - Group: 18 - 24
data shape:  (2596, 1)
sample size: 1298


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 96.09it/s]



count sample diff f1   is twice tot diff f1....... 634  / 1000    p < 0.634  
count sample diff prec is twice tot diff prec..... 652  / 1000    p < 0.652  
count sample diff rec  is twice tot diff rec ..... 619  / 1000    p < 0.619  
count sample diff acc  is twice tot diff acc...... 695  / 1000    p < 0.695  


Seed: 2923262358 - Split: 2 - age_range - Group: 55 - 64
data shape:  (1856, 1)
sample size: 928


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 93.47it/s]



count sample diff f1   is twice tot diff f1....... 41   / 1000    p < 0.041  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 20   / 1000    p < 0.02   [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 96   / 1000    p < 0.096  
count sample diff acc  is twice tot diff acc...... 6    / 1000    p < 0.006  [38;5;9m**[0m


Seed: 2923262358 - Split: 2 - age_range - Group: 45 - 54
data shape:  (3010, 1)
sample size: 1505


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 93.00it/s]



count sample diff f1   is twice tot diff f1....... 851  / 1000    p < 0.851  
count sample diff prec is twice tot diff prec..... 815  / 1000    p < 0.815  
count sample diff rec  is twice tot diff rec ..... 866  / 1000    p < 0.866  
count sample diff acc  is twice tot diff acc...... 778  / 1000    p < 0.778  


Seed: 2923262358 - Split: 3 - age_range - Group: 25 - 34
data shape:  (10308, 1)
sample size: 5154


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 56.30it/s]



count sample diff f1   is twice tot diff f1....... 979  / 1000    p < 0.979  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 972  / 1000    p < 0.972  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 977  / 1000    p < 0.977  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 961  / 1000    p < 0.961  [38;5;8m![0m


Seed: 2923262358 - Split: 3 - age_range - Group: 35 - 44
data shape:  (6186, 1)
sample size: 3093


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 64.00it/s]



count sample diff f1   is twice tot diff f1....... 830  / 1000    p < 0.83   
count sample diff prec is twice tot diff prec..... 693  / 1000    p < 0.693  
count sample diff rec  is twice tot diff rec ..... 918  / 1000    p < 0.918  
count sample diff acc  is twice tot diff acc...... 455  / 1000    p < 0.455  


Seed: 2923262358 - Split: 3 - age_range - Group: 65 or older
data shape:  (696, 1)
sample size: 348


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 114.63it/s]



count sample diff f1   is twice tot diff f1....... 15   / 1000    p < 0.015  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 11   / 1000    p < 0.011  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 19   / 1000    p < 0.019  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 8    / 1000    p < 0.008  [38;5;9m**[0m


Seed: 2923262358 - Split: 3 - age_range - Group: 18 - 24
data shape:  (2664, 1)
sample size: 1332


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 97.93it/s]



count sample diff f1   is twice tot diff f1....... 847  / 1000    p < 0.847  
count sample diff prec is twice tot diff prec..... 797  / 1000    p < 0.797  
count sample diff rec  is twice tot diff rec ..... 890  / 1000    p < 0.89   
count sample diff acc  is twice tot diff acc...... 547  / 1000    p < 0.547  


Seed: 2923262358 - Split: 3 - age_range - Group: 55 - 64
data shape:  (1903, 1)
sample size: 951


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 97.13it/s]



count sample diff f1   is twice tot diff f1....... 932  / 1000    p < 0.932  
count sample diff prec is twice tot diff prec..... 878  / 1000    p < 0.878  
count sample diff rec  is twice tot diff rec ..... 975  / 1000    p < 0.975  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 628  / 1000    p < 0.628  


Seed: 2923262358 - Split: 3 - age_range - Group: 45 - 54
data shape:  (3014, 1)
sample size: 1507


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 89.36it/s]


count sample diff f1   is twice tot diff f1....... 466  / 1000    p < 0.466  
count sample diff prec is twice tot diff prec..... 306  / 1000    p < 0.306  
count sample diff rec  is twice tot diff rec ..... 610  / 1000    p < 0.61   
count sample diff acc  is twice tot diff acc...... 169  / 1000    p < 0.169  





In [14]:
age_ks = k_estimator_groups(age_p_dicts)
age_ks

{'18 - 24': {'k_count': 1, 'k_bonferroni': -1},
 '25 - 34': {'k_count': -1, 'k_bonferroni': -1},
 '65 or older': {'k_count': 1, 'k_bonferroni': -1},
 '45 - 54': {'k_count': 1, 'k_bonferroni': -1},
 '55 - 64': {'k_count': 3, 'k_bonferroni': -1},
 '35 - 44': {'k_count': 1, 'k_bonferroni': -1}}

In [15]:
age_ks_df = pd.DataFrame(age_ks).T.sort_index()
age_ks_df = age_ks_df.replace({-1: 0})
age_ks_df = age_ks_df.rename(columns={'k_count': '$\hat{k}_{count}$', 'k_bonferroni':'$\hat{k}_{Bonf.}$'})
age_ks_df

Unnamed: 0,$\hat{k}_{count}$,$\hat{k}_{Bonf.}$
18 - 24,1,0
25 - 34,0,0
35 - 44,1,0
45 - 54,1,0
55 - 64,3,0
65 or older,1,0


In [16]:
age_ks_df.to_latex('../tables/replication/randomized/age.tex', escape=False)

#### Education

In [17]:
education_p_dicts = significance_test_per_group(df, attribute_a='randomized', attribute_b='education', to_groups=to_groups, attributes=['education'])



Seed: 2803636207 - Split: 0 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10439, 1)
sample size: 5219


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 52.27it/s]



count sample diff f1   is twice tot diff f1....... 458  / 1000    p < 0.458  
count sample diff prec is twice tot diff prec..... 504  / 1000    p < 0.504  
count sample diff rec  is twice tot diff rec ..... 448  / 1000    p < 0.448  
count sample diff acc  is twice tot diff acc...... 530  / 1000    p < 0.53   


Seed: 2803636207 - Split: 0 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2130, 1)
sample size: 1065


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 102.69it/s]



count sample diff f1   is twice tot diff f1....... 28   / 1000    p < 0.028  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 72   / 1000    p < 0.072  
count sample diff rec  is twice tot diff rec ..... 20   / 1000    p < 0.02   [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 270  / 1000    p < 0.27   


Seed: 2803636207 - Split: 0 - education - Group: Master's degree
data shape:  (3534, 1)
sample size: 1767


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 88.76it/s]



count sample diff f1   is twice tot diff f1....... 590  / 1000    p < 0.59   
count sample diff prec is twice tot diff prec..... 682  / 1000    p < 0.682  
count sample diff rec  is twice tot diff rec ..... 519  / 1000    p < 0.519  
count sample diff acc  is twice tot diff acc...... 698  / 1000    p < 0.698  


Seed: 2803636207 - Split: 0 - education - Group: Associate degree in college (2-year)
data shape:  (2603, 1)
sample size: 1301


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 89.61it/s]



count sample diff f1   is twice tot diff f1....... 40   / 1000    p < 0.04   [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 65   / 1000    p < 0.065  
count sample diff rec  is twice tot diff rec ..... 27   / 1000    p < 0.027  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 226  / 1000    p < 0.226  


Seed: 2803636207 - Split: 0 - education - Group: Some college but no degree
data shape:  (4759, 1)
sample size: 2379


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 77.10it/s]



count sample diff f1   is twice tot diff f1....... 2    / 1000    p < 0.002  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 2    / 1000    p < 0.002  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 1    / 1000    p < 0.001  [38;5;9m**[0m


Seed: 2803636207 - Split: 0 - education - Group: Doctoral degree
data shape:  (277, 1)
sample size: 138


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 109.33it/s]



count sample diff f1   is twice tot diff f1....... 155  / 1000    p < 0.155  
count sample diff prec is twice tot diff prec..... 374  / 1000    p < 0.374  
count sample diff rec  is twice tot diff rec ..... 130  / 1000    p < 0.13   
count sample diff acc  is twice tot diff acc...... 331  / 1000    p < 0.331  


Seed: 2803636207 - Split: 0 - education - Group: Less than high school degree
data shape:  (159, 1)
sample size: 79


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 112.50it/s]



count sample diff f1   is twice tot diff f1....... 546  / 1000    p < 0.546  
count sample diff prec is twice tot diff prec..... 520  / 1000    p < 0.52   
count sample diff rec  is twice tot diff rec ..... 546  / 1000    p < 0.546  
count sample diff acc  is twice tot diff acc...... 481  / 1000    p < 0.481  


Seed: 2803636207 - Split: 0 - education - Group: Professional degree (JD, MD)
data shape:  (362, 1)
sample size: 181


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 100.98it/s]



count sample diff f1   is twice tot diff f1....... 134  / 1000    p < 0.134  
count sample diff prec is twice tot diff prec..... 125  / 1000    p < 0.125  
count sample diff rec  is twice tot diff rec ..... 142  / 1000    p < 0.142  
count sample diff acc  is twice tot diff acc...... 118  / 1000    p < 0.118  


Seed: 2803636207 - Split: 1 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10541, 1)
sample size: 5270


bootstrap: 100%|████████████████████████████| 1000/1000 [00:21<00:00, 46.91it/s]



count sample diff f1   is twice tot diff f1....... 19   / 1000    p < 0.019  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 10   / 1000    p < 0.01   [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 53   / 1000    p < 0.053  
count sample diff acc  is twice tot diff acc...... 2    / 1000    p < 0.002  [38;5;9m**[0m


Seed: 2803636207 - Split: 1 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2052, 1)
sample size: 1026


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 99.27it/s]



count sample diff f1   is twice tot diff f1....... 270  / 1000    p < 0.27   
count sample diff prec is twice tot diff prec..... 232  / 1000    p < 0.232  
count sample diff rec  is twice tot diff rec ..... 347  / 1000    p < 0.347  
count sample diff acc  is twice tot diff acc...... 147  / 1000    p < 0.147  


Seed: 2803636207 - Split: 1 - education - Group: Master's degree
data shape:  (3455, 1)
sample size: 1727


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 90.57it/s]



count sample diff f1   is twice tot diff f1....... 134  / 1000    p < 0.134  
count sample diff prec is twice tot diff prec..... 122  / 1000    p < 0.122  
count sample diff rec  is twice tot diff rec ..... 142  / 1000    p < 0.142  
count sample diff acc  is twice tot diff acc...... 114  / 1000    p < 0.114  


Seed: 2803636207 - Split: 1 - education - Group: Associate degree in college (2-year)
data shape:  (2697, 1)
sample size: 1348


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 79.52it/s]



count sample diff f1   is twice tot diff f1....... 291  / 1000    p < 0.291  
count sample diff prec is twice tot diff prec..... 282  / 1000    p < 0.282  
count sample diff rec  is twice tot diff rec ..... 321  / 1000    p < 0.321  
count sample diff acc  is twice tot diff acc...... 239  / 1000    p < 0.239  


Seed: 2803636207 - Split: 1 - education - Group: Some college but no degree
data shape:  (4539, 1)
sample size: 2269


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 77.68it/s]



count sample diff f1   is twice tot diff f1....... 973  / 1000    p < 0.973  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 964  / 1000    p < 0.964  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 978  / 1000    p < 0.978  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 903  / 1000    p < 0.903  


Seed: 2803636207 - Split: 1 - education - Group: Doctoral degree
data shape:  (297, 1)
sample size: 148


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 86.00it/s]



count sample diff f1   is twice tot diff f1....... 626  / 1000    p < 0.626  
count sample diff prec is twice tot diff prec..... 603  / 1000    p < 0.603  
count sample diff rec  is twice tot diff rec ..... 648  / 1000    p < 0.648  
count sample diff acc  is twice tot diff acc...... 526  / 1000    p < 0.526  


Seed: 2803636207 - Split: 1 - education - Group: Less than high school degree
data shape:  (152, 1)
sample size: 76


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 117.57it/s]



count sample diff f1   is twice tot diff f1....... 486  / 1000    p < 0.486  
count sample diff prec is twice tot diff prec..... 305  / 1000    p < 0.305  
count sample diff rec  is twice tot diff rec ..... 537  / 1000    p < 0.537  
count sample diff acc  is twice tot diff acc...... 326  / 1000    p < 0.326  


Seed: 2803636207 - Split: 1 - education - Group: Professional degree (JD, MD)
data shape:  (372, 1)
sample size: 186


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 111.23it/s]



count sample diff f1   is twice tot diff f1....... 829  / 1000    p < 0.829  
count sample diff prec is twice tot diff prec..... 770  / 1000    p < 0.77   
count sample diff rec  is twice tot diff rec ..... 883  / 1000    p < 0.883  
count sample diff acc  is twice tot diff acc...... 725  / 1000    p < 0.725  


Seed: 2803636207 - Split: 2 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10471, 1)
sample size: 5235


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 50.34it/s]



count sample diff f1   is twice tot diff f1....... 963  / 1000    p < 0.963  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 985  / 1000    p < 0.985  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 927  / 1000    p < 0.927  
count sample diff acc  is twice tot diff acc...... 991  / 1000    p < 0.991  [38;5;8m![0m


Seed: 2803636207 - Split: 2 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2106, 1)
sample size: 1053


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 80.49it/s]



count sample diff f1   is twice tot diff f1....... 314  / 1000    p < 0.314  
count sample diff prec is twice tot diff prec..... 348  / 1000    p < 0.348  
count sample diff rec  is twice tot diff rec ..... 265  / 1000    p < 0.265  
count sample diff acc  is twice tot diff acc...... 463  / 1000    p < 0.463  


Seed: 2803636207 - Split: 2 - education - Group: Master's degree
data shape:  (3510, 1)
sample size: 1755


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 88.49it/s]



count sample diff f1   is twice tot diff f1....... 683  / 1000    p < 0.683  
count sample diff prec is twice tot diff prec..... 634  / 1000    p < 0.634  
count sample diff rec  is twice tot diff rec ..... 710  / 1000    p < 0.71   
count sample diff acc  is twice tot diff acc...... 621  / 1000    p < 0.621  


Seed: 2803636207 - Split: 2 - education - Group: Associate degree in college (2-year)
data shape:  (2604, 1)
sample size: 1302


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 98.54it/s]



count sample diff f1   is twice tot diff f1....... 98   / 1000    p < 0.098  
count sample diff prec is twice tot diff prec..... 106  / 1000    p < 0.106  
count sample diff rec  is twice tot diff rec ..... 95   / 1000    p < 0.095  
count sample diff acc  is twice tot diff acc...... 135  / 1000    p < 0.135  


Seed: 2803636207 - Split: 2 - education - Group: Some college but no degree
data shape:  (4644, 1)
sample size: 2322


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 75.01it/s]



count sample diff f1   is twice tot diff f1....... 158  / 1000    p < 0.158  
count sample diff prec is twice tot diff prec..... 162  / 1000    p < 0.162  
count sample diff rec  is twice tot diff rec ..... 174  / 1000    p < 0.174  
count sample diff acc  is twice tot diff acc...... 161  / 1000    p < 0.161  


Seed: 2803636207 - Split: 2 - education - Group: Doctoral degree
data shape:  (319, 1)
sample size: 159


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 107.18it/s]



count sample diff f1   is twice tot diff f1....... 114  / 1000    p < 0.114  
count sample diff prec is twice tot diff prec..... 186  / 1000    p < 0.186  
count sample diff rec  is twice tot diff rec ..... 97   / 1000    p < 0.097  
count sample diff acc  is twice tot diff acc...... 189  / 1000    p < 0.189  


Seed: 2803636207 - Split: 2 - education - Group: Less than high school degree
data shape:  (184, 1)
sample size: 92


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 98.76it/s]



count sample diff f1   is twice tot diff f1....... 266  / 1000    p < 0.266  
count sample diff prec is twice tot diff prec..... 323  / 1000    p < 0.323  
count sample diff rec  is twice tot diff rec ..... 258  / 1000    p < 0.258  
count sample diff acc  is twice tot diff acc...... 325  / 1000    p < 0.325  


Seed: 2803636207 - Split: 2 - education - Group: Professional degree (JD, MD)
data shape:  (411, 1)
sample size: 205


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 109.16it/s]



count sample diff f1   is twice tot diff f1....... 198  / 1000    p < 0.198  
count sample diff prec is twice tot diff prec..... 162  / 1000    p < 0.162  
count sample diff rec  is twice tot diff rec ..... 233  / 1000    p < 0.233  
count sample diff acc  is twice tot diff acc...... 175  / 1000    p < 0.175  


Seed: 2803636207 - Split: 3 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10589, 1)
sample size: 5294


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 56.69it/s]



count sample diff f1   is twice tot diff f1....... 545  / 1000    p < 0.545  
count sample diff prec is twice tot diff prec..... 447  / 1000    p < 0.447  
count sample diff rec  is twice tot diff rec ..... 639  / 1000    p < 0.639  
count sample diff acc  is twice tot diff acc...... 326  / 1000    p < 0.326  


Seed: 2803636207 - Split: 3 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2032, 1)
sample size: 1016


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 95.15it/s]



count sample diff f1   is twice tot diff f1....... 899  / 1000    p < 0.899  
count sample diff prec is twice tot diff prec..... 879  / 1000    p < 0.879  
count sample diff rec  is twice tot diff rec ..... 918  / 1000    p < 0.918  
count sample diff acc  is twice tot diff acc...... 761  / 1000    p < 0.761  


Seed: 2803636207 - Split: 3 - education - Group: Master's degree
data shape:  (3561, 1)
sample size: 1780


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 82.42it/s]



count sample diff f1   is twice tot diff f1....... 466  / 1000    p < 0.466  
count sample diff prec is twice tot diff prec..... 458  / 1000    p < 0.458  
count sample diff rec  is twice tot diff rec ..... 467  / 1000    p < 0.467  
count sample diff acc  is twice tot diff acc...... 451  / 1000    p < 0.451  


Seed: 2803636207 - Split: 3 - education - Group: Associate degree in college (2-year)
data shape:  (2516, 1)
sample size: 1258


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 83.64it/s]



count sample diff f1   is twice tot diff f1....... 809  / 1000    p < 0.809  
count sample diff prec is twice tot diff prec..... 783  / 1000    p < 0.783  
count sample diff rec  is twice tot diff rec ..... 826  / 1000    p < 0.826  
count sample diff acc  is twice tot diff acc...... 662  / 1000    p < 0.662  


Seed: 2803636207 - Split: 3 - education - Group: Some college but no degree
data shape:  (4718, 1)
sample size: 2359


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 82.90it/s]



count sample diff f1   is twice tot diff f1....... 998  / 1000    p < 0.998  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 981  / 1000    p < 0.981  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 999  / 1000    p < 0.999  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 810  / 1000    p < 0.81   


Seed: 2803636207 - Split: 3 - education - Group: Doctoral degree
data shape:  (327, 1)
sample size: 163


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 103.01it/s]



count sample diff f1   is twice tot diff f1....... 846  / 1000    p < 0.846  
count sample diff prec is twice tot diff prec..... 829  / 1000    p < 0.829  
count sample diff rec  is twice tot diff rec ..... 873  / 1000    p < 0.873  
count sample diff acc  is twice tot diff acc...... 657  / 1000    p < 0.657  


Seed: 2803636207 - Split: 3 - education - Group: Less than high school degree
data shape:  (165, 1)
sample size: 82


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 107.11it/s]



count sample diff f1   is twice tot diff f1....... 294  / 1000    p < 0.294  
count sample diff prec is twice tot diff prec..... 257  / 1000    p < 0.257  
count sample diff rec  is twice tot diff rec ..... 328  / 1000    p < 0.328  
count sample diff acc  is twice tot diff acc...... 236  / 1000    p < 0.236  


Seed: 2803636207 - Split: 3 - education - Group: Professional degree (JD, MD)
data shape:  (375, 1)
sample size: 187


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 96.85it/s]



count sample diff f1   is twice tot diff f1....... 553  / 1000    p < 0.553  
count sample diff prec is twice tot diff prec..... 532  / 1000    p < 0.532  
count sample diff rec  is twice tot diff rec ..... 614  / 1000    p < 0.614  
count sample diff acc  is twice tot diff acc...... 445  / 1000    p < 0.445  


Seed: 165043843 - Split: 0 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10700, 1)
sample size: 5350


bootstrap: 100%|████████████████████████████| 1000/1000 [00:18<00:00, 53.20it/s]



count sample diff f1   is twice tot diff f1....... 905  / 1000    p < 0.905  
count sample diff prec is twice tot diff prec..... 958  / 1000    p < 0.958  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 858  / 1000    p < 0.858  
count sample diff acc  is twice tot diff acc...... 970  / 1000    p < 0.97   [38;5;8m![0m


Seed: 165043843 - Split: 0 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2075, 1)
sample size: 1037


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 81.58it/s]



count sample diff f1   is twice tot diff f1....... 860  / 1000    p < 0.86   
count sample diff prec is twice tot diff prec..... 928  / 1000    p < 0.928  
count sample diff rec  is twice tot diff rec ..... 786  / 1000    p < 0.786  
count sample diff acc  is twice tot diff acc...... 968  / 1000    p < 0.968  [38;5;8m![0m


Seed: 165043843 - Split: 0 - education - Group: Master's degree
data shape:  (3425, 1)
sample size: 1712


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 78.41it/s]



count sample diff f1   is twice tot diff f1....... 740  / 1000    p < 0.74   
count sample diff prec is twice tot diff prec..... 713  / 1000    p < 0.713  
count sample diff rec  is twice tot diff rec ..... 747  / 1000    p < 0.747  
count sample diff acc  is twice tot diff acc...... 707  / 1000    p < 0.707  


Seed: 165043843 - Split: 0 - education - Group: Associate degree in college (2-year)
data shape:  (2598, 1)
sample size: 1299


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 87.85it/s]



count sample diff f1   is twice tot diff f1....... 785  / 1000    p < 0.785  
count sample diff prec is twice tot diff prec..... 833  / 1000    p < 0.833  
count sample diff rec  is twice tot diff rec ..... 730  / 1000    p < 0.73   
count sample diff acc  is twice tot diff acc...... 882  / 1000    p < 0.882  


Seed: 165043843 - Split: 0 - education - Group: Some college but no degree
data shape:  (4560, 1)
sample size: 2280


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 63.92it/s]



count sample diff f1   is twice tot diff f1....... 574  / 1000    p < 0.574  
count sample diff prec is twice tot diff prec..... 634  / 1000    p < 0.634  
count sample diff rec  is twice tot diff rec ..... 491  / 1000    p < 0.491  
count sample diff acc  is twice tot diff acc...... 796  / 1000    p < 0.796  


Seed: 165043843 - Split: 0 - education - Group: Doctoral degree
data shape:  (320, 1)
sample size: 160


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 120.47it/s]



count sample diff f1   is twice tot diff f1....... 730  / 1000    p < 0.73   
count sample diff prec is twice tot diff prec..... 234  / 1000    p < 0.234  
count sample diff rec  is twice tot diff rec ..... 901  / 1000    p < 0.901  
count sample diff acc  is twice tot diff acc...... 222  / 1000    p < 0.222  


Seed: 165043843 - Split: 0 - education - Group: Less than high school degree
data shape:  (144, 1)
sample size: 72


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 122.69it/s]



count sample diff f1   is twice tot diff f1....... 659  / 1000    p < 0.659  
count sample diff prec is twice tot diff prec..... 546  / 1000    p < 0.546  
count sample diff rec  is twice tot diff rec ..... 703  / 1000    p < 0.703  
count sample diff acc  is twice tot diff acc...... 262  / 1000    p < 0.262  


Seed: 165043843 - Split: 0 - education - Group: Professional degree (JD, MD)
data shape:  (384, 1)
sample size: 192


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 110.58it/s]



count sample diff f1   is twice tot diff f1....... 536  / 1000    p < 0.536  
count sample diff prec is twice tot diff prec..... 582  / 1000    p < 0.582  
count sample diff rec  is twice tot diff rec ..... 502  / 1000    p < 0.502  
count sample diff acc  is twice tot diff acc...... 544  / 1000    p < 0.544  


Seed: 165043843 - Split: 1 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10348, 1)
sample size: 5174


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 50.55it/s]



count sample diff f1   is twice tot diff f1....... 885  / 1000    p < 0.885  
count sample diff prec is twice tot diff prec..... 897  / 1000    p < 0.897  
count sample diff rec  is twice tot diff rec ..... 886  / 1000    p < 0.886  
count sample diff acc  is twice tot diff acc...... 901  / 1000    p < 0.901  


Seed: 165043843 - Split: 1 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2075, 1)
sample size: 1037


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 85.04it/s]



count sample diff f1   is twice tot diff f1....... 466  / 1000    p < 0.466  
count sample diff prec is twice tot diff prec..... 409  / 1000    p < 0.409  
count sample diff rec  is twice tot diff rec ..... 537  / 1000    p < 0.537  
count sample diff acc  is twice tot diff acc...... 294  / 1000    p < 0.294  


Seed: 165043843 - Split: 1 - education - Group: Master's degree
data shape:  (3580, 1)
sample size: 1790


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 65.23it/s]



count sample diff f1   is twice tot diff f1....... 281  / 1000    p < 0.281  
count sample diff prec is twice tot diff prec..... 210  / 1000    p < 0.21   
count sample diff rec  is twice tot diff rec ..... 320  / 1000    p < 0.32   
count sample diff acc  is twice tot diff acc...... 197  / 1000    p < 0.197  


Seed: 165043843 - Split: 1 - education - Group: Associate degree in college (2-year)
data shape:  (2625, 1)
sample size: 1312


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 90.10it/s]



count sample diff f1   is twice tot diff f1....... 386  / 1000    p < 0.386  
count sample diff prec is twice tot diff prec..... 340  / 1000    p < 0.34   
count sample diff rec  is twice tot diff rec ..... 460  / 1000    p < 0.46   
count sample diff acc  is twice tot diff acc...... 237  / 1000    p < 0.237  


Seed: 165043843 - Split: 1 - education - Group: Some college but no degree
data shape:  (4654, 1)
sample size: 2327


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 65.44it/s]



count sample diff f1   is twice tot diff f1....... 707  / 1000    p < 0.707  
count sample diff prec is twice tot diff prec..... 696  / 1000    p < 0.696  
count sample diff rec  is twice tot diff rec ..... 717  / 1000    p < 0.717  
count sample diff acc  is twice tot diff acc...... 632  / 1000    p < 0.632  


Seed: 165043843 - Split: 1 - education - Group: Doctoral degree
data shape:  (332, 1)
sample size: 166


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 99.08it/s]



count sample diff f1   is twice tot diff f1....... 61   / 1000    p < 0.061  
count sample diff prec is twice tot diff prec..... 35   / 1000    p < 0.035  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 207  / 1000    p < 0.207  
count sample diff acc  is twice tot diff acc...... 1    / 1000    p < 0.001  [38;5;9m**[0m


Seed: 165043843 - Split: 1 - education - Group: Less than high school degree
data shape:  (179, 1)
sample size: 89


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 94.89it/s]



count sample diff f1   is twice tot diff f1....... 154  / 1000    p < 0.154  
count sample diff prec is twice tot diff prec..... 42   / 1000    p < 0.042  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 220  / 1000    p < 0.22   
count sample diff acc  is twice tot diff acc...... 31   / 1000    p < 0.031  [38;5;9m*[0m


Seed: 165043843 - Split: 1 - education - Group: Professional degree (JD, MD)
data shape:  (400, 1)
sample size: 200


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 111.82it/s]



count sample diff f1   is twice tot diff f1....... 768  / 1000    p < 0.768  
count sample diff prec is twice tot diff prec..... 765  / 1000    p < 0.765  
count sample diff rec  is twice tot diff rec ..... 766  / 1000    p < 0.766  
count sample diff acc  is twice tot diff acc...... 741  / 1000    p < 0.741  


Seed: 165043843 - Split: 2 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10497, 1)
sample size: 5248


bootstrap: 100%|████████████████████████████| 1000/1000 [00:18<00:00, 53.99it/s]



count sample diff f1   is twice tot diff f1....... 878  / 1000    p < 0.878  
count sample diff prec is twice tot diff prec..... 839  / 1000    p < 0.839  
count sample diff rec  is twice tot diff rec ..... 898  / 1000    p < 0.898  
count sample diff acc  is twice tot diff acc...... 801  / 1000    p < 0.801  


Seed: 165043843 - Split: 2 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2064, 1)
sample size: 1032


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 84.35it/s]



count sample diff f1   is twice tot diff f1....... 732  / 1000    p < 0.732  
count sample diff prec is twice tot diff prec..... 662  / 1000    p < 0.662  
count sample diff rec  is twice tot diff rec ..... 807  / 1000    p < 0.807  
count sample diff acc  is twice tot diff acc...... 338  / 1000    p < 0.338  


Seed: 165043843 - Split: 2 - education - Group: Master's degree
data shape:  (3487, 1)
sample size: 1743


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 75.50it/s]



count sample diff f1   is twice tot diff f1....... 517  / 1000    p < 0.517  
count sample diff prec is twice tot diff prec..... 400  / 1000    p < 0.4    
count sample diff rec  is twice tot diff rec ..... 583  / 1000    p < 0.583  
count sample diff acc  is twice tot diff acc...... 361  / 1000    p < 0.361  


Seed: 165043843 - Split: 2 - education - Group: Associate degree in college (2-year)
data shape:  (2546, 1)
sample size: 1273


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 99.56it/s]



count sample diff f1   is twice tot diff f1....... 974  / 1000    p < 0.974  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 958  / 1000    p < 0.958  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 981  / 1000    p < 0.981  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 838  / 1000    p < 0.838  


Seed: 165043843 - Split: 2 - education - Group: Some college but no degree
data shape:  (4776, 1)
sample size: 2388


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 80.63it/s]



count sample diff f1   is twice tot diff f1....... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 998  / 1000    p < 0.998  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 899  / 1000    p < 0.899  


Seed: 165043843 - Split: 2 - education - Group: Doctoral degree
data shape:  (270, 1)
sample size: 135


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 92.49it/s]



count sample diff f1   is twice tot diff f1....... 800  / 1000    p < 0.8    
count sample diff prec is twice tot diff prec..... 731  / 1000    p < 0.731  
count sample diff rec  is twice tot diff rec ..... 912  / 1000    p < 0.912  
count sample diff acc  is twice tot diff acc...... 531  / 1000    p < 0.531  


Seed: 165043843 - Split: 2 - education - Group: Less than high school degree
data shape:  (174, 1)
sample size: 87


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 117.61it/s]



count sample diff f1   is twice tot diff f1....... 618  / 1000    p < 0.618  
count sample diff prec is twice tot diff prec..... 546  / 1000    p < 0.546  
count sample diff rec  is twice tot diff rec ..... 742  / 1000    p < 0.742  
count sample diff acc  is twice tot diff acc...... 319  / 1000    p < 0.319  


Seed: 165043843 - Split: 2 - education - Group: Professional degree (JD, MD)
data shape:  (375, 1)
sample size: 187


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 66.42it/s]



count sample diff f1   is twice tot diff f1....... 596  / 1000    p < 0.596  
count sample diff prec is twice tot diff prec..... 602  / 1000    p < 0.602  
count sample diff rec  is twice tot diff rec ..... 526  / 1000    p < 0.526  
count sample diff acc  is twice tot diff acc...... 644  / 1000    p < 0.644  


Seed: 165043843 - Split: 3 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10495, 1)
sample size: 5247


bootstrap: 100%|████████████████████████████| 1000/1000 [00:18<00:00, 53.33it/s]



count sample diff f1   is twice tot diff f1....... 640  / 1000    p < 0.64   
count sample diff prec is twice tot diff prec..... 465  / 1000    p < 0.465  
count sample diff rec  is twice tot diff rec ..... 764  / 1000    p < 0.764  
count sample diff acc  is twice tot diff acc...... 289  / 1000    p < 0.289  


Seed: 165043843 - Split: 3 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2106, 1)
sample size: 1053


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 77.69it/s]



count sample diff f1   is twice tot diff f1....... 703  / 1000    p < 0.703  
count sample diff prec is twice tot diff prec..... 599  / 1000    p < 0.599  
count sample diff rec  is twice tot diff rec ..... 758  / 1000    p < 0.758  
count sample diff acc  is twice tot diff acc...... 403  / 1000    p < 0.403  


Seed: 165043843 - Split: 3 - education - Group: Master's degree
data shape:  (3568, 1)
sample size: 1784


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 80.05it/s]



count sample diff f1   is twice tot diff f1....... 865  / 1000    p < 0.865  
count sample diff prec is twice tot diff prec..... 741  / 1000    p < 0.741  
count sample diff rec  is twice tot diff rec ..... 916  / 1000    p < 0.916  
count sample diff acc  is twice tot diff acc...... 727  / 1000    p < 0.727  


Seed: 165043843 - Split: 3 - education - Group: Associate degree in college (2-year)
data shape:  (2651, 1)
sample size: 1325


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 99.12it/s]



count sample diff f1   is twice tot diff f1....... 980  / 1000    p < 0.98   [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 932  / 1000    p < 0.932  
count sample diff rec  is twice tot diff rec ..... 992  / 1000    p < 0.992  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 642  / 1000    p < 0.642  


Seed: 165043843 - Split: 3 - education - Group: Some college but no degree
data shape:  (4670, 1)
sample size: 2335


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 70.00it/s]



count sample diff f1   is twice tot diff f1....... 746  / 1000    p < 0.746  
count sample diff prec is twice tot diff prec..... 644  / 1000    p < 0.644  
count sample diff rec  is twice tot diff rec ..... 825  / 1000    p < 0.825  
count sample diff acc  is twice tot diff acc...... 273  / 1000    p < 0.273  


Seed: 165043843 - Split: 3 - education - Group: Doctoral degree
data shape:  (298, 1)
sample size: 149


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 98.02it/s]



count sample diff f1   is twice tot diff f1....... 680  / 1000    p < 0.68   
count sample diff prec is twice tot diff prec..... 568  / 1000    p < 0.568  
count sample diff rec  is twice tot diff rec ..... 828  / 1000    p < 0.828  
count sample diff acc  is twice tot diff acc...... 170  / 1000    p < 0.17   


Seed: 165043843 - Split: 3 - education - Group: Less than high school degree
data shape:  (163, 1)
sample size: 81


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 117.95it/s]



count sample diff f1   is twice tot diff f1....... 932  / 1000    p < 0.932  
count sample diff prec is twice tot diff prec..... 875  / 1000    p < 0.875  
count sample diff rec  is twice tot diff rec ..... 955  / 1000    p < 0.955  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 726  / 1000    p < 0.726  


Seed: 165043843 - Split: 3 - education - Group: Professional degree (JD, MD)
data shape:  (361, 1)
sample size: 180


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 112.89it/s]



count sample diff f1   is twice tot diff f1....... 450  / 1000    p < 0.45   
count sample diff prec is twice tot diff prec..... 315  / 1000    p < 0.315  
count sample diff rec  is twice tot diff rec ..... 537  / 1000    p < 0.537  
count sample diff acc  is twice tot diff acc...... 261  / 1000    p < 0.261  


Seed: 2923262358 - Split: 0 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10517, 1)
sample size: 5258


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 51.04it/s]



count sample diff f1   is twice tot diff f1....... 375  / 1000    p < 0.375  
count sample diff prec is twice tot diff prec..... 269  / 1000    p < 0.269  
count sample diff rec  is twice tot diff rec ..... 501  / 1000    p < 0.501  
count sample diff acc  is twice tot diff acc...... 197  / 1000    p < 0.197  


Seed: 2923262358 - Split: 0 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2015, 1)
sample size: 1007


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 85.02it/s]



count sample diff f1   is twice tot diff f1....... 348  / 1000    p < 0.348  
count sample diff prec is twice tot diff prec..... 431  / 1000    p < 0.431  
count sample diff rec  is twice tot diff rec ..... 208  / 1000    p < 0.208  
count sample diff acc  is twice tot diff acc...... 693  / 1000    p < 0.693  


Seed: 2923262358 - Split: 0 - education - Group: Master's degree
data shape:  (3562, 1)
sample size: 1781


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 84.59it/s]



count sample diff f1   is twice tot diff f1....... 341  / 1000    p < 0.341  
count sample diff prec is twice tot diff prec..... 331  / 1000    p < 0.331  
count sample diff rec  is twice tot diff rec ..... 356  / 1000    p < 0.356  
count sample diff acc  is twice tot diff acc...... 330  / 1000    p < 0.33   


Seed: 2923262358 - Split: 0 - education - Group: Associate degree in college (2-year)
data shape:  (2619, 1)
sample size: 1309


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 90.28it/s]



count sample diff f1   is twice tot diff f1....... 477  / 1000    p < 0.477  
count sample diff prec is twice tot diff prec..... 400  / 1000    p < 0.4    
count sample diff rec  is twice tot diff rec ..... 637  / 1000    p < 0.637  
count sample diff acc  is twice tot diff acc...... 166  / 1000    p < 0.166  


Seed: 2923262358 - Split: 0 - education - Group: Some college but no degree
data shape:  (4698, 1)
sample size: 2349


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 60.79it/s]



count sample diff f1   is twice tot diff f1....... 971  / 1000    p < 0.971  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 972  / 1000    p < 0.972  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 970  / 1000    p < 0.97   [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 948  / 1000    p < 0.948  


Seed: 2923262358 - Split: 0 - education - Group: Doctoral degree
data shape:  (306, 1)
sample size: 153


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 92.88it/s]



count sample diff f1   is twice tot diff f1....... 179  / 1000    p < 0.179  
count sample diff prec is twice tot diff prec..... 68   / 1000    p < 0.068  
count sample diff rec  is twice tot diff rec ..... 250  / 1000    p < 0.25   
count sample diff acc  is twice tot diff acc...... 48   / 1000    p < 0.048  [38;5;9m*[0m


Seed: 2923262358 - Split: 0 - education - Group: Less than high school degree
data shape:  (158, 1)
sample size: 79


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 81.44it/s]



count sample diff f1   is twice tot diff f1....... 32   / 1000    p < 0.032  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 23   / 1000    p < 0.023  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 50   / 1000    p < 0.05   
count sample diff acc  is twice tot diff acc...... 12   / 1000    p < 0.012  [38;5;9m*[0m


Seed: 2923262358 - Split: 0 - education - Group: Professional degree (JD, MD)
data shape:  (352, 1)
sample size: 176


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 76.67it/s]



count sample diff f1   is twice tot diff f1....... 693  / 1000    p < 0.693  
count sample diff prec is twice tot diff prec..... 742  / 1000    p < 0.742  
count sample diff rec  is twice tot diff rec ..... 598  / 1000    p < 0.598  
count sample diff acc  is twice tot diff acc...... 781  / 1000    p < 0.781  


Seed: 2923262358 - Split: 1 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10498, 1)
sample size: 5249


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 50.95it/s]



count sample diff f1   is twice tot diff f1....... 998  / 1000    p < 0.998  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 971  / 1000    p < 0.971  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 999  / 1000    p < 0.999  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 886  / 1000    p < 0.886  


Seed: 2923262358 - Split: 1 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2139, 1)
sample size: 1069


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 87.36it/s]



count sample diff f1   is twice tot diff f1....... 792  / 1000    p < 0.792  
count sample diff prec is twice tot diff prec..... 827  / 1000    p < 0.827  
count sample diff rec  is twice tot diff rec ..... 704  / 1000    p < 0.704  
count sample diff acc  is twice tot diff acc...... 950  / 1000    p < 0.95   


Seed: 2923262358 - Split: 1 - education - Group: Master's degree
data shape:  (3486, 1)
sample size: 1743


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 68.62it/s]



count sample diff f1   is twice tot diff f1....... 98   / 1000    p < 0.098  
count sample diff prec is twice tot diff prec..... 101  / 1000    p < 0.101  
count sample diff rec  is twice tot diff rec ..... 96   / 1000    p < 0.096  
count sample diff acc  is twice tot diff acc...... 96   / 1000    p < 0.096  


Seed: 2923262358 - Split: 1 - education - Group: Associate degree in college (2-year)
data shape:  (2588, 1)
sample size: 1294


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 91.84it/s]



count sample diff f1   is twice tot diff f1....... 823  / 1000    p < 0.823  
count sample diff prec is twice tot diff prec..... 722  / 1000    p < 0.722  
count sample diff rec  is twice tot diff rec ..... 901  / 1000    p < 0.901  
count sample diff acc  is twice tot diff acc...... 285  / 1000    p < 0.285  


Seed: 2923262358 - Split: 1 - education - Group: Some college but no degree
data shape:  (4666, 1)
sample size: 2333


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 74.08it/s]



count sample diff f1   is twice tot diff f1....... 719  / 1000    p < 0.719  
count sample diff prec is twice tot diff prec..... 667  / 1000    p < 0.667  
count sample diff rec  is twice tot diff rec ..... 758  / 1000    p < 0.758  
count sample diff acc  is twice tot diff acc...... 418  / 1000    p < 0.418  


Seed: 2923262358 - Split: 1 - education - Group: Doctoral degree
data shape:  (295, 1)
sample size: 147


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 97.81it/s]



count sample diff f1   is twice tot diff f1....... 836  / 1000    p < 0.836  
count sample diff prec is twice tot diff prec..... 608  / 1000    p < 0.608  
count sample diff rec  is twice tot diff rec ..... 849  / 1000    p < 0.849  
count sample diff acc  is twice tot diff acc...... 427  / 1000    p < 0.427  


Seed: 2923262358 - Split: 1 - education - Group: Less than high school degree
data shape:  (164, 1)
sample size: 82


  _warn_prf(average, modifier, msg_start, len(result))
bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 118.24it/s]



count sample diff f1   is twice tot diff f1....... 745  / 1000    p < 0.745  
count sample diff prec is twice tot diff prec..... 409  / 1000    p < 0.409  
count sample diff rec  is twice tot diff rec ..... 762  / 1000    p < 0.762  
count sample diff acc  is twice tot diff acc...... 332  / 1000    p < 0.332  


Seed: 2923262358 - Split: 1 - education - Group: Professional degree (JD, MD)
data shape:  (378, 1)
sample size: 189


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 96.03it/s]



count sample diff f1   is twice tot diff f1....... 782  / 1000    p < 0.782  
count sample diff prec is twice tot diff prec..... 425  / 1000    p < 0.425  
count sample diff rec  is twice tot diff rec ..... 811  / 1000    p < 0.811  
count sample diff acc  is twice tot diff acc...... 492  / 1000    p < 0.492  


Seed: 2923262358 - Split: 2 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10478, 1)
sample size: 5239


bootstrap: 100%|████████████████████████████| 1000/1000 [00:20<00:00, 49.95it/s]



count sample diff f1   is twice tot diff f1....... 982  / 1000    p < 0.982  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 951  / 1000    p < 0.951  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 994  / 1000    p < 0.994  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 924  / 1000    p < 0.924  


Seed: 2923262358 - Split: 2 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2076, 1)
sample size: 1038


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 94.04it/s]



count sample diff f1   is twice tot diff f1....... 975  / 1000    p < 0.975  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 972  / 1000    p < 0.972  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 977  / 1000    p < 0.977  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 955  / 1000    p < 0.955  [38;5;8m![0m


Seed: 2923262358 - Split: 2 - education - Group: Master's degree
data shape:  (3516, 1)
sample size: 1758


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 65.97it/s]



count sample diff f1   is twice tot diff f1....... 274  / 1000    p < 0.274  
count sample diff prec is twice tot diff prec..... 213  / 1000    p < 0.213  
count sample diff rec  is twice tot diff rec ..... 310  / 1000    p < 0.31   
count sample diff acc  is twice tot diff acc...... 212  / 1000    p < 0.212  


Seed: 2923262358 - Split: 2 - education - Group: Associate degree in college (2-year)
data shape:  (2575, 1)
sample size: 1287


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 70.70it/s]



count sample diff f1   is twice tot diff f1....... 852  / 1000    p < 0.852  
count sample diff prec is twice tot diff prec..... 841  / 1000    p < 0.841  
count sample diff rec  is twice tot diff rec ..... 855  / 1000    p < 0.855  
count sample diff acc  is twice tot diff acc...... 754  / 1000    p < 0.754  


Seed: 2923262358 - Split: 2 - education - Group: Some college but no degree
data shape:  (4686, 1)
sample size: 2343


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 72.30it/s]



count sample diff f1   is twice tot diff f1....... 159  / 1000    p < 0.159  
count sample diff prec is twice tot diff prec..... 251  / 1000    p < 0.251  
count sample diff rec  is twice tot diff rec ..... 108  / 1000    p < 0.108  
count sample diff acc  is twice tot diff acc...... 684  / 1000    p < 0.684  


Seed: 2923262358 - Split: 2 - education - Group: Doctoral degree
data shape:  (308, 1)
sample size: 154


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 101.97it/s]



count sample diff f1   is twice tot diff f1....... 918  / 1000    p < 0.918  
count sample diff prec is twice tot diff prec..... 929  / 1000    p < 0.929  
count sample diff rec  is twice tot diff rec ..... 915  / 1000    p < 0.915  
count sample diff acc  is twice tot diff acc...... 951  / 1000    p < 0.951  [38;5;8m![0m


Seed: 2923262358 - Split: 2 - education - Group: Less than high school degree
data shape:  (168, 1)
sample size: 84


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 85.20it/s]



count sample diff f1   is twice tot diff f1....... 484  / 1000    p < 0.484  
count sample diff prec is twice tot diff prec..... 187  / 1000    p < 0.187  
count sample diff rec  is twice tot diff rec ..... 591  / 1000    p < 0.591  
count sample diff acc  is twice tot diff acc...... 64   / 1000    p < 0.064  


Seed: 2923262358 - Split: 2 - education - Group: Professional degree (JD, MD)
data shape:  (402, 1)
sample size: 201


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 89.08it/s]



count sample diff f1   is twice tot diff f1....... 942  / 1000    p < 0.942  
count sample diff prec is twice tot diff prec..... 948  / 1000    p < 0.948  
count sample diff rec  is twice tot diff rec ..... 934  / 1000    p < 0.934  
count sample diff acc  is twice tot diff acc...... 964  / 1000    p < 0.964  [38;5;8m![0m


Seed: 2923262358 - Split: 3 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10547, 1)
sample size: 5273


bootstrap: 100%|████████████████████████████| 1000/1000 [00:20<00:00, 48.57it/s]



count sample diff f1   is twice tot diff f1....... 557  / 1000    p < 0.557  
count sample diff prec is twice tot diff prec..... 733  / 1000    p < 0.733  
count sample diff rec  is twice tot diff rec ..... 390  / 1000    p < 0.39   
count sample diff acc  is twice tot diff acc...... 866  / 1000    p < 0.866  


Seed: 2923262358 - Split: 3 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2090, 1)
sample size: 1045


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 68.59it/s]



count sample diff f1   is twice tot diff f1....... 23   / 1000    p < 0.023  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 41   / 1000    p < 0.041  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 8    / 1000    p < 0.008  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 265  / 1000    p < 0.265  


Seed: 2923262358 - Split: 3 - education - Group: Master's degree
data shape:  (3496, 1)
sample size: 1748


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 74.85it/s]



count sample diff f1   is twice tot diff f1....... 606  / 1000    p < 0.606  
count sample diff prec is twice tot diff prec..... 705  / 1000    p < 0.705  
count sample diff rec  is twice tot diff rec ..... 568  / 1000    p < 0.568  
count sample diff acc  is twice tot diff acc...... 707  / 1000    p < 0.707  


Seed: 2923262358 - Split: 3 - education - Group: Associate degree in college (2-year)
data shape:  (2638, 1)
sample size: 1319


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 65.02it/s]



count sample diff f1   is twice tot diff f1....... 10   / 1000    p < 0.01   [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 29   / 1000    p < 0.029  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 4    / 1000    p < 0.004  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 123  / 1000    p < 0.123  


Seed: 2923262358 - Split: 3 - education - Group: Some college but no degree
data shape:  (4610, 1)
sample size: 2305


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 77.88it/s]



count sample diff f1   is twice tot diff f1....... 68   / 1000    p < 0.068  
count sample diff prec is twice tot diff prec..... 225  / 1000    p < 0.225  
count sample diff rec  is twice tot diff rec ..... 24   / 1000    p < 0.024  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 866  / 1000    p < 0.866  


Seed: 2923262358 - Split: 3 - education - Group: Doctoral degree
data shape:  (311, 1)
sample size: 155


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 116.53it/s]



count sample diff f1   is twice tot diff f1....... 118  / 1000    p < 0.118  
count sample diff prec is twice tot diff prec..... 199  / 1000    p < 0.199  
count sample diff rec  is twice tot diff rec ..... 68   / 1000    p < 0.068  
count sample diff acc  is twice tot diff acc...... 446  / 1000    p < 0.446  


Seed: 2923262358 - Split: 3 - education - Group: Less than high school degree
data shape:  (170, 1)
sample size: 85


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 99.63it/s]



count sample diff f1   is twice tot diff f1....... 7    / 1000    p < 0.007  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 19   / 1000    p < 0.019  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 2    / 1000    p < 0.002  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 42   / 1000    p < 0.042  [38;5;9m*[0m


Seed: 2923262358 - Split: 3 - education - Group: Professional degree (JD, MD)
data shape:  (388, 1)
sample size: 194


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 94.99it/s]


count sample diff f1   is twice tot diff f1....... 365  / 1000    p < 0.365  
count sample diff prec is twice tot diff prec..... 357  / 1000    p < 0.357  
count sample diff rec  is twice tot diff rec ..... 408  / 1000    p < 0.408  
count sample diff acc  is twice tot diff acc...... 337  / 1000    p < 0.337  





In [18]:
education_ks = k_estimator_groups(education_p_dicts)
education_ks

{'Professional degree (JD, MD)': {'k_count': -1, 'k_bonferroni': -1},
 'Doctoral degree': {'k_count': -1, 'k_bonferroni': -1},
 'High school graduate (high school diploma or equivalent including GED)': {'k_count': 2,
  'k_bonferroni': -1},
 "Bachelor's degree in college (4-year)": {'k_count': 1, 'k_bonferroni': -1},
 'Associate degree in college (2-year)': {'k_count': 2, 'k_bonferroni': -1},
 'Less than high school degree': {'k_count': 2, 'k_bonferroni': -1},
 'Some college but no degree': {'k_count': 1, 'k_bonferroni': 1},
 "Master's degree": {'k_count': -1, 'k_bonferroni': -1}}

In [19]:
education_ks_df = pd.DataFrame(education_ks).T.sort_index()
education_ks_df = education_ks_df.replace({-1: 0})
rename_mapping = {
        'Less than high school degree': 'Belowhigh school', 
        'Some college but no degree': 'College, no degree',
        'Associate degree in college (2-year)': 'Associate degree',
        "Bachelor's degree in college (4-year)":  "Bachelor's degree",
        'High school graduate (high school diploma or equivalent including GED)': 'High school',
        'Professional degree (JD, MD)': 'Professional degree'
    }
education_ks_df.index = education_ks_df.index.map(lambda i: rename_mapping[i] if i in rename_mapping else i)
education_ks_df = education_ks_df.rename(columns={'k_count': '$\hat{k}_{count}$', 'k_bonferroni':'$\hat{k}_{Bonf.}$'})
education_ks_df

Unnamed: 0,$\hat{k}_{count}$,$\hat{k}_{Bonf.}$
Associate degree,2,0
Bachelor's degree,1,0
Doctoral degree,0,0
High school,2,0
Belowhigh school,2,0
Master's degree,0,0
Professional degree,0,0
"College, no degree",1,1


In [20]:
education_ks_df.to_latex('../tables/replication/randomized/education.tex', escape=False)

### Sexual Orientation

In [21]:
lgbq_p_dicts = significance_test_per_group(df, attribute_a='randomized', attribute_b='lgbq', to_groups=to_groups, attributes=['lgbtq_status'])



Seed: 2803636207 - Split: 0 - lgbtq_status - Group: Heterosexual
data shape:  (22623, 1)
sample size: 11311


bootstrap: 100%|████████████████████████████| 1000/1000 [00:34<00:00, 29.22it/s]



count sample diff f1   is twice tot diff f1....... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 1000 / 1000    p < 1.0    [38;5;8m![0m


Seed: 2803636207 - Split: 0 - lgbtq_status - Group: Homosexual
data shape:  (739, 1)
sample size: 369


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 90.56it/s]



count sample diff f1   is twice tot diff f1....... 679  / 1000    p < 0.679  
count sample diff prec is twice tot diff prec..... 835  / 1000    p < 0.835  
count sample diff rec  is twice tot diff rec ..... 557  / 1000    p < 0.557  
count sample diff acc  is twice tot diff acc...... 943  / 1000    p < 0.943  


Seed: 2803636207 - Split: 0 - lgbtq_status - Group: Bisexual
data shape:  (2418, 1)
sample size: 1209


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 81.70it/s]



count sample diff f1   is twice tot diff f1....... 41   / 1000    p < 0.041  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 40   / 1000    p < 0.04   [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 41   / 1000    p < 0.041  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 40   / 1000    p < 0.04   [38;5;9m*[0m


Seed: 2803636207 - Split: 1 - lgbtq_status - Group: Heterosexual
data shape:  (22678, 1)
sample size: 11339


bootstrap: 100%|████████████████████████████| 1000/1000 [00:31<00:00, 31.39it/s]



count sample diff f1   is twice tot diff f1....... 789  / 1000    p < 0.789  
count sample diff prec is twice tot diff prec..... 874  / 1000    p < 0.874  
count sample diff rec  is twice tot diff rec ..... 615  / 1000    p < 0.615  
count sample diff acc  is twice tot diff acc...... 974  / 1000    p < 0.974  [38;5;8m![0m


Seed: 2803636207 - Split: 1 - lgbtq_status - Group: Homosexual
data shape:  (686, 1)
sample size: 343


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 85.09it/s]



count sample diff f1   is twice tot diff f1....... 966  / 1000    p < 0.966  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 949  / 1000    p < 0.949  
count sample diff rec  is twice tot diff rec ..... 974  / 1000    p < 0.974  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 919  / 1000    p < 0.919  


Seed: 2803636207 - Split: 1 - lgbtq_status - Group: Bisexual
data shape:  (2450, 1)
sample size: 1225


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 90.10it/s]



count sample diff f1   is twice tot diff f1....... 647  / 1000    p < 0.647  
count sample diff prec is twice tot diff prec..... 515  / 1000    p < 0.515  
count sample diff rec  is twice tot diff rec ..... 654  / 1000    p < 0.654  
count sample diff acc  is twice tot diff acc...... 559  / 1000    p < 0.559  


Seed: 2803636207 - Split: 2 - lgbtq_status - Group: Heterosexual
data shape:  (22603, 1)
sample size: 11301


bootstrap: 100%|████████████████████████████| 1000/1000 [00:30<00:00, 32.85it/s]



count sample diff f1   is twice tot diff f1....... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 999  / 1000    p < 0.999  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 967  / 1000    p < 0.967  [38;5;8m![0m


Seed: 2803636207 - Split: 2 - lgbtq_status - Group: Homosexual
data shape:  (733, 1)
sample size: 366


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 113.67it/s]



count sample diff f1   is twice tot diff f1....... 150  / 1000    p < 0.15   
count sample diff prec is twice tot diff prec..... 160  / 1000    p < 0.16   
count sample diff rec  is twice tot diff rec ..... 142  / 1000    p < 0.142  
count sample diff acc  is twice tot diff acc...... 169  / 1000    p < 0.169  


Seed: 2803636207 - Split: 2 - lgbtq_status - Group: Bisexual
data shape:  (2478, 1)
sample size: 1239


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 88.66it/s]



count sample diff f1   is twice tot diff f1....... 7    / 1000    p < 0.007  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 7    / 1000    p < 0.007  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 7    / 1000    p < 0.007  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 7    / 1000    p < 0.007  [38;5;9m**[0m


Seed: 2803636207 - Split: 3 - lgbtq_status - Group: Heterosexual
data shape:  (22616, 1)
sample size: 11308


bootstrap: 100%|████████████████████████████| 1000/1000 [00:32<00:00, 31.09it/s]



count sample diff f1   is twice tot diff f1....... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 989  / 1000    p < 0.989  [38;5;8m![0m


Seed: 2803636207 - Split: 3 - lgbtq_status - Group: Homosexual
data shape:  (742, 1)
sample size: 371


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 95.22it/s]



count sample diff f1   is twice tot diff f1....... 815  / 1000    p < 0.815  
count sample diff prec is twice tot diff prec..... 794  / 1000    p < 0.794  
count sample diff rec  is twice tot diff rec ..... 846  / 1000    p < 0.846  
count sample diff acc  is twice tot diff acc...... 749  / 1000    p < 0.749  


Seed: 2803636207 - Split: 3 - lgbtq_status - Group: Bisexual
data shape:  (2434, 1)
sample size: 1217


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 68.46it/s]



count sample diff f1   is twice tot diff f1....... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 1    / 1000    p < 0.001  [38;5;9m**[0m


Seed: 165043843 - Split: 0 - lgbtq_status - Group: Heterosexual
data shape:  (22615, 1)
sample size: 11307


bootstrap: 100%|████████████████████████████| 1000/1000 [00:36<00:00, 27.49it/s]



count sample diff f1   is twice tot diff f1....... 280  / 1000    p < 0.28   
count sample diff prec is twice tot diff prec..... 248  / 1000    p < 0.248  
count sample diff rec  is twice tot diff rec ..... 313  / 1000    p < 0.313  
count sample diff acc  is twice tot diff acc...... 216  / 1000    p < 0.216  


Seed: 165043843 - Split: 0 - lgbtq_status - Group: Homosexual
data shape:  (742, 1)
sample size: 371


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 81.23it/s]



count sample diff f1   is twice tot diff f1....... 159  / 1000    p < 0.159  
count sample diff prec is twice tot diff prec..... 127  / 1000    p < 0.127  
count sample diff rec  is twice tot diff rec ..... 262  / 1000    p < 0.262  
count sample diff acc  is twice tot diff acc...... 71   / 1000    p < 0.071  


Seed: 165043843 - Split: 0 - lgbtq_status - Group: Bisexual
data shape:  (2387, 1)
sample size: 1193


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 96.57it/s]



count sample diff f1   is twice tot diff f1....... 317  / 1000    p < 0.317  
count sample diff prec is twice tot diff prec..... 228  / 1000    p < 0.228  
count sample diff rec  is twice tot diff rec ..... 300  / 1000    p < 0.3    
count sample diff acc  is twice tot diff acc...... 268  / 1000    p < 0.268  


Seed: 165043843 - Split: 1 - lgbtq_status - Group: Heterosexual
data shape:  (22659, 1)
sample size: 11329


bootstrap: 100%|████████████████████████████| 1000/1000 [00:30<00:00, 32.76it/s]



count sample diff f1   is twice tot diff f1....... 0    / 1000    p < 0.0    [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 5    / 1000    p < 0.005  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 0    / 1000    p < 0.0    [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 151  / 1000    p < 0.151  


Seed: 165043843 - Split: 1 - lgbtq_status - Group: Homosexual
data shape:  (729, 1)
sample size: 364


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 112.73it/s]



count sample diff f1   is twice tot diff f1....... 158  / 1000    p < 0.158  
count sample diff prec is twice tot diff prec..... 135  / 1000    p < 0.135  
count sample diff rec  is twice tot diff rec ..... 171  / 1000    p < 0.171  
count sample diff acc  is twice tot diff acc...... 138  / 1000    p < 0.138  


Seed: 165043843 - Split: 1 - lgbtq_status - Group: Bisexual
data shape:  (2413, 1)
sample size: 1206


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 87.08it/s]



count sample diff f1   is twice tot diff f1....... 986  / 1000    p < 0.986  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 985  / 1000    p < 0.985  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 986  / 1000    p < 0.986  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 985  / 1000    p < 0.985  [38;5;8m![0m


Seed: 165043843 - Split: 2 - lgbtq_status - Group: Heterosexual
data shape:  (22538, 1)
sample size: 11269


bootstrap: 100%|████████████████████████████| 1000/1000 [00:31<00:00, 32.11it/s]



count sample diff f1   is twice tot diff f1....... 412  / 1000    p < 0.412  
count sample diff prec is twice tot diff prec..... 352  / 1000    p < 0.352  
count sample diff rec  is twice tot diff rec ..... 474  / 1000    p < 0.474  
count sample diff acc  is twice tot diff acc...... 242  / 1000    p < 0.242  


Seed: 165043843 - Split: 2 - lgbtq_status - Group: Homosexual
data shape:  (759, 1)
sample size: 379


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 109.53it/s]



count sample diff f1   is twice tot diff f1....... 407  / 1000    p < 0.407  
count sample diff prec is twice tot diff prec..... 407  / 1000    p < 0.407  
count sample diff rec  is twice tot diff rec ..... 408  / 1000    p < 0.408  
count sample diff acc  is twice tot diff acc...... 408  / 1000    p < 0.408  


Seed: 165043843 - Split: 2 - lgbtq_status - Group: Bisexual
data shape:  (2479, 1)
sample size: 1239


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 94.17it/s]



count sample diff f1   is twice tot diff f1....... 235  / 1000    p < 0.235  
count sample diff prec is twice tot diff prec..... 241  / 1000    p < 0.241  
count sample diff rec  is twice tot diff rec ..... 235  / 1000    p < 0.235  
count sample diff acc  is twice tot diff acc...... 230  / 1000    p < 0.23   


Seed: 165043843 - Split: 3 - lgbtq_status - Group: Heterosexual
data shape:  (22708, 1)
sample size: 11354


bootstrap: 100%|████████████████████████████| 1000/1000 [00:30<00:00, 33.07it/s]



count sample diff f1   is twice tot diff f1....... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 992  / 1000    p < 0.992  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 712  / 1000    p < 0.712  


Seed: 165043843 - Split: 3 - lgbtq_status - Group: Homosexual
data shape:  (670, 1)
sample size: 335


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 80.54it/s]



count sample diff f1   is twice tot diff f1....... 88   / 1000    p < 0.088  
count sample diff prec is twice tot diff prec..... 70   / 1000    p < 0.07   
count sample diff rec  is twice tot diff rec ..... 189  / 1000    p < 0.189  
count sample diff acc  is twice tot diff acc...... 18   / 1000    p < 0.018  [38;5;9m*[0m


Seed: 165043843 - Split: 3 - lgbtq_status - Group: Bisexual
data shape:  (2501, 1)
sample size: 1250


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 93.97it/s]



count sample diff f1   is twice tot diff f1....... 57   / 1000    p < 0.057  
count sample diff prec is twice tot diff prec..... 56   / 1000    p < 0.056  
count sample diff rec  is twice tot diff rec ..... 60   / 1000    p < 0.06   
count sample diff acc  is twice tot diff acc...... 56   / 1000    p < 0.056  


Seed: 2923262358 - Split: 0 - lgbtq_status - Group: Heterosexual
data shape:  (22649, 1)
sample size: 11324


bootstrap: 100%|████████████████████████████| 1000/1000 [00:28<00:00, 34.96it/s]



count sample diff f1   is twice tot diff f1....... 568  / 1000    p < 0.568  
count sample diff prec is twice tot diff prec..... 573  / 1000    p < 0.573  
count sample diff rec  is twice tot diff rec ..... 506  / 1000    p < 0.506  
count sample diff acc  is twice tot diff acc...... 656  / 1000    p < 0.656  


Seed: 2923262358 - Split: 0 - lgbtq_status - Group: Homosexual
data shape:  (709, 1)
sample size: 354


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 113.75it/s]



count sample diff f1   is twice tot diff f1....... 316  / 1000    p < 0.316  
count sample diff prec is twice tot diff prec..... 220  / 1000    p < 0.22   
count sample diff rec  is twice tot diff rec ..... 455  / 1000    p < 0.455  
count sample diff acc  is twice tot diff acc...... 139  / 1000    p < 0.139  


Seed: 2923262358 - Split: 0 - lgbtq_status - Group: Bisexual
data shape:  (2444, 1)
sample size: 1222


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 84.32it/s]



count sample diff f1   is twice tot diff f1....... 11   / 1000    p < 0.011  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 7    / 1000    p < 0.007  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 13   / 1000    p < 0.013  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 7    / 1000    p < 0.007  [38;5;9m**[0m


Seed: 2923262358 - Split: 1 - lgbtq_status - Group: Heterosexual
data shape:  (22726, 1)
sample size: 11363


bootstrap: 100%|████████████████████████████| 1000/1000 [00:31<00:00, 32.04it/s]



count sample diff f1   is twice tot diff f1....... 71   / 1000    p < 0.071  
count sample diff prec is twice tot diff prec..... 430  / 1000    p < 0.43   
count sample diff rec  is twice tot diff rec ..... 3    / 1000    p < 0.003  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 954  / 1000    p < 0.954  [38;5;8m![0m


Seed: 2923262358 - Split: 1 - lgbtq_status - Group: Homosexual
data shape:  (726, 1)
sample size: 363


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 112.68it/s]



count sample diff f1   is twice tot diff f1....... 215  / 1000    p < 0.215  
count sample diff prec is twice tot diff prec..... 172  / 1000    p < 0.172  
count sample diff rec  is twice tot diff rec ..... 261  / 1000    p < 0.261  
count sample diff acc  is twice tot diff acc...... 135  / 1000    p < 0.135  


Seed: 2923262358 - Split: 1 - lgbtq_status - Group: Bisexual
data shape:  (2383, 1)
sample size: 1191


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 88.57it/s]



count sample diff f1   is twice tot diff f1....... 397  / 1000    p < 0.397  
count sample diff prec is twice tot diff prec..... 408  / 1000    p < 0.408  
count sample diff rec  is twice tot diff rec ..... 403  / 1000    p < 0.403  
count sample diff acc  is twice tot diff acc...... 422  / 1000    p < 0.422  


Seed: 2923262358 - Split: 2 - lgbtq_status - Group: Heterosexual
data shape:  (22507, 1)
sample size: 11253


bootstrap: 100%|████████████████████████████| 1000/1000 [00:31<00:00, 32.23it/s]



count sample diff f1   is twice tot diff f1....... 58   / 1000    p < 0.058  
count sample diff prec is twice tot diff prec..... 57   / 1000    p < 0.057  
count sample diff rec  is twice tot diff rec ..... 60   / 1000    p < 0.06   
count sample diff acc  is twice tot diff acc...... 63   / 1000    p < 0.063  


Seed: 2923262358 - Split: 2 - lgbtq_status - Group: Homosexual
data shape:  (753, 1)
sample size: 376


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 92.73it/s]



count sample diff f1   is twice tot diff f1....... 209  / 1000    p < 0.209  
count sample diff prec is twice tot diff prec..... 154  / 1000    p < 0.154  
count sample diff rec  is twice tot diff rec ..... 262  / 1000    p < 0.262  
count sample diff acc  is twice tot diff acc...... 125  / 1000    p < 0.125  


Seed: 2923262358 - Split: 2 - lgbtq_status - Group: Bisexual
data shape:  (2492, 1)
sample size: 1246


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 79.19it/s]



count sample diff f1   is twice tot diff f1....... 36   / 1000    p < 0.036  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 30   / 1000    p < 0.03   [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 36   / 1000    p < 0.036  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 32   / 1000    p < 0.032  [38;5;9m*[0m


Seed: 2923262358 - Split: 3 - lgbtq_status - Group: Heterosexual
data shape:  (22638, 1)
sample size: 11319


bootstrap: 100%|████████████████████████████| 1000/1000 [00:33<00:00, 29.63it/s]



count sample diff f1   is twice tot diff f1....... 788  / 1000    p < 0.788  
count sample diff prec is twice tot diff prec..... 560  / 1000    p < 0.56   
count sample diff rec  is twice tot diff rec ..... 949  / 1000    p < 0.949  
count sample diff acc  is twice tot diff acc...... 133  / 1000    p < 0.133  


Seed: 2923262358 - Split: 3 - lgbtq_status - Group: Homosexual
data shape:  (712, 1)
sample size: 356


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 70.28it/s]



count sample diff f1   is twice tot diff f1....... 463  / 1000    p < 0.463  
count sample diff prec is twice tot diff prec..... 283  / 1000    p < 0.283  
count sample diff rec  is twice tot diff rec ..... 621  / 1000    p < 0.621  
count sample diff acc  is twice tot diff acc...... 118  / 1000    p < 0.118  


Seed: 2923262358 - Split: 3 - lgbtq_status - Group: Bisexual
data shape:  (2461, 1)
sample size: 1230


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 88.80it/s]


count sample diff f1   is twice tot diff f1....... 0    / 1000    p < 0.0    [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 0    / 1000    p < 0.0    [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 0    / 1000    p < 0.0    [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 0    / 1000    p < 0.0    [38;5;9m**[0m





In [22]:
lgbq_ks = k_estimator_groups(lgbq_p_dicts)
lgbq_ks

{'Homosexual': {'k_count': -1, 'k_bonferroni': -1},
 'Heterosexual': {'k_count': 1, 'k_bonferroni': 1},
 'Bisexual': {'k_count': 6, 'k_bonferroni': 2}}

In [23]:
lgbq_ks_df = pd.DataFrame(lgbq_ks).T.sort_index()
lgbq_ks_df = lgbq_ks_df.replace({-1: 0})
lgbq_ks_df = lgbq_ks_df.rename(columns={'k_count': '$\hat{k}_{count}$', 'k_bonferroni':'$\hat{k}_{Bonf.}$'})
lgbq_ks_df

Unnamed: 0,$\hat{k}_{count}$,$\hat{k}_{Bonf.}$
Bisexual,6,2
Heterosexual,1,1
Homosexual,0,0


In [24]:
lgbq_ks_df.to_latex('../tables/replication/randomized/lgbq.tex', escape=False)