In [1]:
import ast

import numpy as np
import pandas as pd

from sociodemographic_softlabels.run import map_annotators_to_groups
from sociodemographic_softlabels.datasets import kumar
from sociodemographic_softlabels.analysis.replicability import (
    significance_test_per_group,
    k_estimator_groups
)

SAMPLE_PATH = "../data/processed/kumar/sample_5000_annotators_v3.csv"
DATASET_PATH = 'kumar_dataset.json'

## Load Data

In [2]:
df = pd.concat([
    pd.read_csv('../experiments/0.2-baseline/result.csv'),
    pd.read_csv('../experiments/1.1-age/result.csv'),
    pd.read_csv('../experiments/1.2-gender/result.csv'),
    pd.read_csv('../experiments/1.3-education/result.csv'),
    pd.read_csv('../experiments/1.4-lgbq/result.csv'),
]).rename(lambda c: c if not c.startswith('eval_') else c[5:], axis = 1).reset_index(drop=True)

df['attributes'] = df['attributes'].map({
        "['ALL_0']": 'same layer',
        "['ALL_0', 'ALL_1', 'ALL_2', 'ALL_3', 'ALL_4']": '5 same layers',
        '[]': 'no extra layers',
        'Not applicable': 'single-task',
        "['gender']": 'gender',
        "['lgbtq_status']": 'lgbq',
        "['education']": 'education',
        "['age_range']": 'age',
        })

df[['predictions_per_annotator', 'labels_raw', 'labels_per_annotator',  'annotator_indecies']] = df[['predictions_per_annotator', 'labels_raw', 'labels_per_annotator',  'annotator_indecies']].applymap(lambda x: x.replace('nan', 'None')).applymap(ast.literal_eval).applymap(lambda x: np.array(x, dtype='float'))

In [3]:
all_data = kumar.Dataset.load(SAMPLE_PATH, metadata_path=DATASET_PATH)

to_groups = map_annotators_to_groups(
        all_data.annotators_mapping,
        all_data.sociodemographic_mapping,
        [
            "age_range",
            "lgbtq_status",
            "education",
            "gender"
        ]
    )

  0%|          | 0/23 [00:00<?, ?ba/s]

In [4]:
df[(df['random_seed'] == 2803636207) & (df['split'] == 0) & (df['attributes'] == 'no extra layers') & (df['do_random_assignment'] == False)]

Unnamed: 0,timestamp,loss,religion_important - Not too important - individual_0_precision,religion_important - Not too important - individual_0_recall,religion_important - Not too important - individual_0_f1-score,religion_important - Not too important - individual_0_support,religion_important - Not too important - individual_1_precision,religion_important - Not too important - individual_1_recall,religion_important - Not too important - individual_1_f1-score,religion_important - Not too important - individual_1_support,...,attributes,map_ids_to_attributes,setting,output_dir,while_train,do_stack_groups,do_random_assignment,group_layers_depth,random_seed,split
0,2023-05-18 14:17:53.858165,0.0,0.803314,0.808934,0.806114,1858,0.401349,0.392739,0.396997,606,...,no extra layers,[],K_FOLD,./models/custom_mt_model_multilabel,False,True,False,1,2803636207,0


## Significance Tests

### Gender

In [5]:
gender_p_dicts = significance_test_per_group(df, attribute_a='no extra layers', attribute_b='gender', to_groups=to_groups, attributes=['gender'])



Seed: 2803636207 - Split: 0 - gender - Group: Male
data shape:  (11861, 1)
sample size: 5930


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 65.93it/s]



count sample diff f1   is twice tot diff f1....... 299  / 1000    p < 0.299  
count sample diff prec is twice tot diff prec..... 226  / 1000    p < 0.226  
count sample diff rec  is twice tot diff rec ..... 366  / 1000    p < 0.366  
count sample diff acc  is twice tot diff acc...... 196  / 1000    p < 0.196  


Seed: 2803636207 - Split: 0 - gender - Group: Female
data shape:  (13664, 1)
sample size: 6832


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 60.26it/s]



count sample diff f1   is twice tot diff f1....... 713  / 1000    p < 0.713  
count sample diff prec is twice tot diff prec..... 818  / 1000    p < 0.818  
count sample diff rec  is twice tot diff rec ..... 543  / 1000    p < 0.543  
count sample diff acc  is twice tot diff acc...... 951  / 1000    p < 0.951  [38;5;8m![0m


Seed: 2803636207 - Split: 0 - gender - Group: Nonbinary
data shape:  (120, 1)
sample size: 60


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 217.10it/s]



count sample diff f1   is twice tot diff f1....... 10   / 1000    p < 0.01   [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 40   / 1000    p < 0.04   [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 23   / 1000    p < 0.023  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 2    / 1000    p < 0.002  [38;5;9m**[0m


Seed: 2803636207 - Split: 1 - gender - Group: Male
data shape:  (11912, 1)
sample size: 5956


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 66.43it/s]



count sample diff f1   is twice tot diff f1....... 860  / 1000    p < 0.86   
count sample diff prec is twice tot diff prec..... 654  / 1000    p < 0.654  
count sample diff rec  is twice tot diff rec ..... 935  / 1000    p < 0.935  
count sample diff acc  is twice tot diff acc...... 537  / 1000    p < 0.537  


Seed: 2803636207 - Split: 1 - gender - Group: Female
data shape:  (13452, 1)
sample size: 6726


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 61.44it/s]



count sample diff f1   is twice tot diff f1....... 986  / 1000    p < 0.986  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 984  / 1000    p < 0.984  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 992  / 1000    p < 0.992  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 917  / 1000    p < 0.917  


Seed: 2803636207 - Split: 1 - gender - Group: Nonbinary
data shape:  (122, 1)
sample size: 61


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 215.21it/s]



count sample diff f1   is twice tot diff f1....... 820  / 1000    p < 0.82   
count sample diff prec is twice tot diff prec..... 815  / 1000    p < 0.815  
count sample diff rec  is twice tot diff rec ..... 776  / 1000    p < 0.776  
count sample diff acc  is twice tot diff acc...... 866  / 1000    p < 0.866  


Seed: 2803636207 - Split: 2 - gender - Group: Male
data shape:  (11952, 1)
sample size: 5976


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 65.15it/s]



count sample diff f1   is twice tot diff f1....... 989  / 1000    p < 0.989  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 963  / 1000    p < 0.963  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 991  / 1000    p < 0.991  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 930  / 1000    p < 0.93   


Seed: 2803636207 - Split: 2 - gender - Group: Female
data shape:  (13589, 1)
sample size: 6794


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 58.31it/s]



count sample diff f1   is twice tot diff f1....... 999  / 1000    p < 0.999  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 999  / 1000    p < 0.999  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 999  / 1000    p < 0.999  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 999  / 1000    p < 0.999  [38;5;8m![0m


Seed: 2803636207 - Split: 2 - gender - Group: Nonbinary
data shape:  (114, 1)
sample size: 57


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr


count sample diff f1   is twice tot diff f1....... 922  / 1000    p < 0.922  
count sample diff prec is twice tot diff prec..... 423  / 1000    p < 0.423  
count sample diff rec  is twice tot diff rec ..... 899  / 1000    p < 0.899  
count sample diff acc  is twice tot diff acc...... 332  / 1000    p < 0.332  


Seed: 2803636207 - Split: 3 - gender - Group: Male
data shape:  (11975, 1)
sample size: 5987


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 65.89it/s]



count sample diff f1   is twice tot diff f1....... 758  / 1000    p < 0.758  
count sample diff prec is twice tot diff prec..... 744  / 1000    p < 0.744  
count sample diff rec  is twice tot diff rec ..... 764  / 1000    p < 0.764  
count sample diff acc  is twice tot diff acc...... 720  / 1000    p < 0.72   


Seed: 2803636207 - Split: 3 - gender - Group: Female
data shape:  (13515, 1)
sample size: 6757


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 59.29it/s]



count sample diff f1   is twice tot diff f1....... 617  / 1000    p < 0.617  
count sample diff prec is twice tot diff prec..... 720  / 1000    p < 0.72   
count sample diff rec  is twice tot diff rec ..... 504  / 1000    p < 0.504  
count sample diff acc  is twice tot diff acc...... 883  / 1000    p < 0.883  


Seed: 2803636207 - Split: 3 - gender - Group: Nonbinary
data shape:  (104, 1)
sample size: 52


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 215.94it/s]



count sample diff f1   is twice tot diff f1....... 661  / 1000    p < 0.661  
count sample diff prec is twice tot diff prec..... 655  / 1000    p < 0.655  
count sample diff rec  is twice tot diff rec ..... 706  / 1000    p < 0.706  
count sample diff acc  is twice tot diff acc...... 438  / 1000    p < 0.438  


Seed: 165043843 - Split: 0 - gender - Group: Male
data shape:  (12062, 1)
sample size: 6031


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 64.82it/s]



count sample diff f1   is twice tot diff f1....... 185  / 1000    p < 0.185  
count sample diff prec is twice tot diff prec..... 281  / 1000    p < 0.281  
count sample diff rec  is twice tot diff rec ..... 130  / 1000    p < 0.13   
count sample diff acc  is twice tot diff acc...... 359  / 1000    p < 0.359  


Seed: 165043843 - Split: 0 - gender - Group: Female
data shape:  (13383, 1)
sample size: 6691


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 60.52it/s]



count sample diff f1   is twice tot diff f1....... 27   / 1000    p < 0.027  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 16   / 1000    p < 0.016  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 55   / 1000    p < 0.055  
count sample diff acc  is twice tot diff acc...... 3    / 1000    p < 0.003  [38;5;9m**[0m


Seed: 165043843 - Split: 0 - gender - Group: Nonbinary
data shape:  (114, 1)
sample size: 57


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 216.12it/s]



count sample diff f1   is twice tot diff f1....... 289  / 1000    p < 0.289  
count sample diff prec is twice tot diff prec..... 295  / 1000    p < 0.295  
count sample diff rec  is twice tot diff rec ..... 305  / 1000    p < 0.305  
count sample diff acc  is twice tot diff acc...... 211  / 1000    p < 0.211  


Seed: 165043843 - Split: 1 - gender - Group: Male
data shape:  (11915, 1)
sample size: 5957


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 65.75it/s]



count sample diff f1   is twice tot diff f1....... 756  / 1000    p < 0.756  
count sample diff prec is twice tot diff prec..... 894  / 1000    p < 0.894  
count sample diff rec  is twice tot diff rec ..... 561  / 1000    p < 0.561  
count sample diff acc  is twice tot diff acc...... 952  / 1000    p < 0.952  [38;5;8m![0m


Seed: 165043843 - Split: 1 - gender - Group: Female
data shape:  (13572, 1)
sample size: 6786


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 58.46it/s]



count sample diff f1   is twice tot diff f1....... 20   / 1000    p < 0.02   [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 31   / 1000    p < 0.031  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 7    / 1000    p < 0.007  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 125  / 1000    p < 0.125  


Seed: 165043843 - Split: 1 - gender - Group: Nonbinary
data shape:  (120, 1)
sample size: 60


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 217.76it/s]



count sample diff f1   is twice tot diff f1....... 183  / 1000    p < 0.183  
count sample diff prec is twice tot diff prec..... 178  / 1000    p < 0.178  
count sample diff rec  is twice tot diff rec ..... 227  / 1000    p < 0.227  
count sample diff acc  is twice tot diff acc...... 116  / 1000    p < 0.116  


Seed: 165043843 - Split: 2 - gender - Group: Male
data shape:  (11843, 1)
sample size: 5921


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 64.04it/s]



count sample diff f1   is twice tot diff f1....... 414  / 1000    p < 0.414  
count sample diff prec is twice tot diff prec..... 478  / 1000    p < 0.478  
count sample diff rec  is twice tot diff rec ..... 315  / 1000    p < 0.315  
count sample diff acc  is twice tot diff acc...... 600  / 1000    p < 0.6    


Seed: 165043843 - Split: 2 - gender - Group: Female
data shape:  (13638, 1)
sample size: 6819


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 57.32it/s]



count sample diff f1   is twice tot diff f1....... 202  / 1000    p < 0.202  
count sample diff prec is twice tot diff prec..... 245  / 1000    p < 0.245  
count sample diff rec  is twice tot diff rec ..... 134  / 1000    p < 0.134  
count sample diff acc  is twice tot diff acc...... 402  / 1000    p < 0.402  


Seed: 165043843 - Split: 2 - gender - Group: Nonbinary
data shape:  (109, 1)
sample size: 54


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 216.62it/s]



count sample diff f1   is twice tot diff f1....... 463  / 1000    p < 0.463  
count sample diff prec is twice tot diff prec..... 456  / 1000    p < 0.456  
count sample diff rec  is twice tot diff rec ..... 595  / 1000    p < 0.595  
count sample diff acc  is twice tot diff acc...... 269  / 1000    p < 0.269  


Seed: 165043843 - Split: 3 - gender - Group: Male
data shape:  (11880, 1)
sample size: 5940


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 62.46it/s]



count sample diff f1   is twice tot diff f1....... 546  / 1000    p < 0.546  
count sample diff prec is twice tot diff prec..... 759  / 1000    p < 0.759  
count sample diff rec  is twice tot diff rec ..... 319  / 1000    p < 0.319  
count sample diff acc  is twice tot diff acc...... 862  / 1000    p < 0.862  


Seed: 165043843 - Split: 3 - gender - Group: Female
data shape:  (13627, 1)
sample size: 6813


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 57.74it/s]



count sample diff f1   is twice tot diff f1....... 134  / 1000    p < 0.134  
count sample diff prec is twice tot diff prec..... 203  / 1000    p < 0.203  
count sample diff rec  is twice tot diff rec ..... 83   / 1000    p < 0.083  
count sample diff acc  is twice tot diff acc...... 423  / 1000    p < 0.423  


Seed: 165043843 - Split: 3 - gender - Group: Nonbinary
data shape:  (117, 1)
sample size: 58


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 216.12it/s]



count sample diff f1   is twice tot diff f1....... 451  / 1000    p < 0.451  
count sample diff prec is twice tot diff prec..... 487  / 1000    p < 0.487  
count sample diff rec  is twice tot diff rec ..... 566  / 1000    p < 0.566  
count sample diff acc  is twice tot diff acc...... 204  / 1000    p < 0.204  


Seed: 2923262358 - Split: 0 - gender - Group: Male
data shape:  (11985, 1)
sample size: 5992


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 64.15it/s]



count sample diff f1   is twice tot diff f1....... 930  / 1000    p < 0.93   
count sample diff prec is twice tot diff prec..... 950  / 1000    p < 0.95   
count sample diff rec  is twice tot diff rec ..... 898  / 1000    p < 0.898  
count sample diff acc  is twice tot diff acc...... 966  / 1000    p < 0.966  [38;5;8m![0m


Seed: 2923262358 - Split: 0 - gender - Group: Female
data shape:  (13509, 1)
sample size: 6754


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 59.56it/s]



count sample diff f1   is twice tot diff f1....... 91   / 1000    p < 0.091  
count sample diff prec is twice tot diff prec..... 129  / 1000    p < 0.129  
count sample diff rec  is twice tot diff rec ..... 54   / 1000    p < 0.054  
count sample diff acc  is twice tot diff acc...... 309  / 1000    p < 0.309  


Seed: 2923262358 - Split: 0 - gender - Group: Nonbinary
data shape:  (118, 1)
sample size: 59


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 211.34it/s]



count sample diff f1   is twice tot diff f1....... 176  / 1000    p < 0.176  
count sample diff prec is twice tot diff prec..... 196  / 1000    p < 0.196  
count sample diff rec  is twice tot diff rec ..... 242  / 1000    p < 0.242  
count sample diff acc  is twice tot diff acc...... 96   / 1000    p < 0.096  


Seed: 2923262358 - Split: 1 - gender - Group: Male
data shape:  (11917, 1)
sample size: 5958


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 63.64it/s]



count sample diff f1   is twice tot diff f1....... 626  / 1000    p < 0.626  
count sample diff prec is twice tot diff prec..... 719  / 1000    p < 0.719  
count sample diff rec  is twice tot diff rec ..... 566  / 1000    p < 0.566  
count sample diff acc  is twice tot diff acc...... 725  / 1000    p < 0.725  


Seed: 2923262358 - Split: 1 - gender - Group: Female
data shape:  (13498, 1)
sample size: 6749


bootstrap: 100%|████████████████████████████| 1000/1000 [00:20<00:00, 49.29it/s]



count sample diff f1   is twice tot diff f1....... 495  / 1000    p < 0.495  
count sample diff prec is twice tot diff prec..... 566  / 1000    p < 0.566  
count sample diff rec  is twice tot diff rec ..... 448  / 1000    p < 0.448  
count sample diff acc  is twice tot diff acc...... 657  / 1000    p < 0.657  


Seed: 2923262358 - Split: 1 - gender - Group: Nonbinary
data shape:  (105, 1)
sample size: 52


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 206.78it/s]



count sample diff f1   is twice tot diff f1....... 692  / 1000    p < 0.692  
count sample diff prec is twice tot diff prec..... 684  / 1000    p < 0.684  
count sample diff rec  is twice tot diff rec ..... 671  / 1000    p < 0.671  
count sample diff acc  is twice tot diff acc...... 638  / 1000    p < 0.638  


Seed: 2923262358 - Split: 2 - gender - Group: Male
data shape:  (11928, 1)
sample size: 5964


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 52.45it/s]



count sample diff f1   is twice tot diff f1....... 704  / 1000    p < 0.704  
count sample diff prec is twice tot diff prec..... 804  / 1000    p < 0.804  
count sample diff rec  is twice tot diff rec ..... 576  / 1000    p < 0.576  
count sample diff acc  is twice tot diff acc...... 853  / 1000    p < 0.853  


Seed: 2923262358 - Split: 2 - gender - Group: Female
data shape:  (13560, 1)
sample size: 6780


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 55.99it/s]



count sample diff f1   is twice tot diff f1....... 244  / 1000    p < 0.244  
count sample diff prec is twice tot diff prec..... 480  / 1000    p < 0.48   
count sample diff rec  is twice tot diff rec ..... 100  / 1000    p < 0.1    
count sample diff acc  is twice tot diff acc...... 839  / 1000    p < 0.839  


Seed: 2923262358 - Split: 2 - gender - Group: Nonbinary
data shape:  (117, 1)
sample size: 58


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 183.29it/s]



count sample diff f1   is twice tot diff f1....... 510  / 1000    p < 0.51   
count sample diff prec is twice tot diff prec..... 521  / 1000    p < 0.521  
count sample diff rec  is twice tot diff rec ..... 534  / 1000    p < 0.534  
count sample diff acc  is twice tot diff acc...... 456  / 1000    p < 0.456  


Seed: 2923262358 - Split: 3 - gender - Group: Male
data shape:  (11870, 1)
sample size: 5935


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 57.60it/s]



count sample diff f1   is twice tot diff f1....... 492  / 1000    p < 0.492  
count sample diff prec is twice tot diff prec..... 260  / 1000    p < 0.26   
count sample diff rec  is twice tot diff rec ..... 646  / 1000    p < 0.646  
count sample diff acc  is twice tot diff acc...... 175  / 1000    p < 0.175  


Seed: 2923262358 - Split: 3 - gender - Group: Female
data shape:  (13653, 1)
sample size: 6826


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 51.58it/s]



count sample diff f1   is twice tot diff f1....... 931  / 1000    p < 0.931  
count sample diff prec is twice tot diff prec..... 863  / 1000    p < 0.863  
count sample diff rec  is twice tot diff rec ..... 969  / 1000    p < 0.969  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 565  / 1000    p < 0.565  


Seed: 2923262358 - Split: 3 - gender - Group: Nonbinary
data shape:  (120, 1)
sample size: 60


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 217.68it/s]


count sample diff f1   is twice tot diff f1....... 809  / 1000    p < 0.809  
count sample diff prec is twice tot diff prec..... 804  / 1000    p < 0.804  
count sample diff rec  is twice tot diff rec ..... 722  / 1000    p < 0.722  
count sample diff acc  is twice tot diff acc...... 208  / 1000    p < 0.208  





In [6]:
gender_ks = k_estimator_groups(gender_p_dicts)
gender_ks

{'Nonbinary': {'k_count': 1, 'k_bonferroni': -1},
 'Male': {'k_count': -1, 'k_bonferroni': -1},
 'Female': {'k_count': 2, 'k_bonferroni': -1}}

In [8]:
gender_ks_df = pd.DataFrame(gender_ks).T.sort_index()
gender_ks_df = gender_ks_df.replace({-1: 0})
gender_ks_df = gender_ks_df.rename(columns={'k_count': '$\hat{k}_{count}$', 'k_bonferroni':'$\hat{k}_{Bonf.}$'})
gender_ks_df

Unnamed: 0,$\hat{k}_{count}$,$\hat{k}_{Bonf.}$
Female,2,0
Male,0,0
Nonbinary,1,0


In [9]:
gender_ks_df.to_latex('../tables/replication/baseline/gender.tex', escape=False)

#### Age

In [10]:
age_p_dicts = significance_test_per_group(df, attribute_a='no extra layers', attribute_b='age', to_groups=to_groups, attributes=['age_range'])



Seed: 2803636207 - Split: 0 - age_range - Group: 25 - 34
data shape:  (10313, 1)
sample size: 5156


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 70.54it/s]



count sample diff f1   is twice tot diff f1....... 381  / 1000    p < 0.381  
count sample diff prec is twice tot diff prec..... 328  / 1000    p < 0.328  
count sample diff rec  is twice tot diff rec ..... 415  / 1000    p < 0.415  
count sample diff acc  is twice tot diff acc...... 301  / 1000    p < 0.301  


Seed: 2803636207 - Split: 0 - age_range - Group: 35 - 44
data shape:  (6220, 1)
sample size: 3110


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 97.96it/s]



count sample diff f1   is twice tot diff f1....... 198  / 1000    p < 0.198  
count sample diff prec is twice tot diff prec..... 202  / 1000    p < 0.202  
count sample diff rec  is twice tot diff rec ..... 200  / 1000    p < 0.2    
count sample diff acc  is twice tot diff acc...... 203  / 1000    p < 0.203  


Seed: 2803636207 - Split: 0 - age_range - Group: 65 or older
data shape:  (675, 1)
sample size: 337


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 189.93it/s]



count sample diff f1   is twice tot diff f1....... 88   / 1000    p < 0.088  
count sample diff prec is twice tot diff prec..... 48   / 1000    p < 0.048  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 118  / 1000    p < 0.118  
count sample diff acc  is twice tot diff acc...... 35   / 1000    p < 0.035  [38;5;9m*[0m


Seed: 2803636207 - Split: 0 - age_range - Group: 18 - 24
data shape:  (2651, 1)
sample size: 1325


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 145.00it/s]



count sample diff f1   is twice tot diff f1....... 90   / 1000    p < 0.09   
count sample diff prec is twice tot diff prec..... 120  / 1000    p < 0.12   
count sample diff rec  is twice tot diff rec ..... 66   / 1000    p < 0.066  
count sample diff acc  is twice tot diff acc...... 238  / 1000    p < 0.238  


Seed: 2803636207 - Split: 0 - age_range - Group: 55 - 64
data shape:  (1871, 1)
sample size: 935


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 158.44it/s]



count sample diff f1   is twice tot diff f1....... 480  / 1000    p < 0.48   
count sample diff prec is twice tot diff prec..... 388  / 1000    p < 0.388  
count sample diff rec  is twice tot diff rec ..... 586  / 1000    p < 0.586  
count sample diff acc  is twice tot diff acc...... 238  / 1000    p < 0.238  


Seed: 2803636207 - Split: 0 - age_range - Group: 45 - 54
data shape:  (2973, 1)
sample size: 1486


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 133.95it/s]



count sample diff f1   is twice tot diff f1....... 182  / 1000    p < 0.182  
count sample diff prec is twice tot diff prec..... 225  / 1000    p < 0.225  
count sample diff rec  is twice tot diff rec ..... 163  / 1000    p < 0.163  
count sample diff acc  is twice tot diff acc...... 258  / 1000    p < 0.258  


Seed: 2803636207 - Split: 1 - age_range - Group: 25 - 34
data shape:  (10292, 1)
sample size: 5146


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 69.29it/s]



count sample diff f1   is twice tot diff f1....... 575  / 1000    p < 0.575  
count sample diff prec is twice tot diff prec..... 287  / 1000    p < 0.287  
count sample diff rec  is twice tot diff rec ..... 792  / 1000    p < 0.792  
count sample diff acc  is twice tot diff acc...... 150  / 1000    p < 0.15   


Seed: 2803636207 - Split: 1 - age_range - Group: 35 - 44
data shape:  (6255, 1)
sample size: 3127


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 95.74it/s]



count sample diff f1   is twice tot diff f1....... 414  / 1000    p < 0.414  
count sample diff prec is twice tot diff prec..... 135  / 1000    p < 0.135  
count sample diff rec  is twice tot diff rec ..... 642  / 1000    p < 0.642  
count sample diff acc  is twice tot diff acc...... 31   / 1000    p < 0.031  [38;5;9m*[0m


Seed: 2803636207 - Split: 1 - age_range - Group: 65 or older
data shape:  (690, 1)
sample size: 345


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 188.14it/s]



count sample diff f1   is twice tot diff f1....... 723  / 1000    p < 0.723  
count sample diff prec is twice tot diff prec..... 383  / 1000    p < 0.383  
count sample diff rec  is twice tot diff rec ..... 794  / 1000    p < 0.794  
count sample diff acc  is twice tot diff acc...... 345  / 1000    p < 0.345  


Seed: 2803636207 - Split: 1 - age_range - Group: 18 - 24
data shape:  (2560, 1)
sample size: 1280


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 147.62it/s]



count sample diff f1   is twice tot diff f1....... 500  / 1000    p < 0.5    
count sample diff prec is twice tot diff prec..... 359  / 1000    p < 0.359  
count sample diff rec  is twice tot diff rec ..... 649  / 1000    p < 0.649  
count sample diff acc  is twice tot diff acc...... 58   / 1000    p < 0.058  


Seed: 2803636207 - Split: 1 - age_range - Group: 55 - 64
data shape:  (1899, 1)
sample size: 949


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 163.58it/s]



count sample diff f1   is twice tot diff f1....... 30   / 1000    p < 0.03   [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 150  / 1000    p < 0.15   
count sample diff acc  is twice tot diff acc...... 0    / 1000    p < 0.0    [38;5;9m**[0m


Seed: 2803636207 - Split: 1 - age_range - Group: 45 - 54
data shape:  (2971, 1)
sample size: 1485


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 138.65it/s]



count sample diff f1   is twice tot diff f1....... 957  / 1000    p < 0.957  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 862  / 1000    p < 0.862  
count sample diff rec  is twice tot diff rec ..... 976  / 1000    p < 0.976  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 808  / 1000    p < 0.808  


Seed: 2803636207 - Split: 2 - age_range - Group: 25 - 34
data shape:  (10326, 1)
sample size: 5163


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 70.01it/s]



count sample diff f1   is twice tot diff f1....... 765  / 1000    p < 0.765  
count sample diff prec is twice tot diff prec..... 718  / 1000    p < 0.718  
count sample diff rec  is twice tot diff rec ..... 794  / 1000    p < 0.794  
count sample diff acc  is twice tot diff acc...... 678  / 1000    p < 0.678  


Seed: 2803636207 - Split: 2 - age_range - Group: 35 - 44
data shape:  (6257, 1)
sample size: 3128


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 96.64it/s]



count sample diff f1   is twice tot diff f1....... 106  / 1000    p < 0.106  
count sample diff prec is twice tot diff prec..... 76   / 1000    p < 0.076  
count sample diff rec  is twice tot diff rec ..... 130  / 1000    p < 0.13   
count sample diff acc  is twice tot diff acc...... 64   / 1000    p < 0.064  


Seed: 2803636207 - Split: 2 - age_range - Group: 65 or older
data shape:  (684, 1)
sample size: 342


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 192.83it/s]



count sample diff f1   is twice tot diff f1....... 559  / 1000    p < 0.559  
count sample diff prec is twice tot diff prec..... 436  / 1000    p < 0.436  
count sample diff rec  is twice tot diff rec ..... 624  / 1000    p < 0.624  
count sample diff acc  is twice tot diff acc...... 282  / 1000    p < 0.282  


Seed: 2803636207 - Split: 2 - age_range - Group: 18 - 24
data shape:  (2606, 1)
sample size: 1303


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 132.36it/s]



count sample diff f1   is twice tot diff f1....... 395  / 1000    p < 0.395  
count sample diff prec is twice tot diff prec..... 319  / 1000    p < 0.319  
count sample diff rec  is twice tot diff rec ..... 444  / 1000    p < 0.444  
count sample diff acc  is twice tot diff acc...... 188  / 1000    p < 0.188  


Seed: 2803636207 - Split: 2 - age_range - Group: 55 - 64
data shape:  (1849, 1)
sample size: 924


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 148.16it/s]



count sample diff f1   is twice tot diff f1....... 404  / 1000    p < 0.404  
count sample diff prec is twice tot diff prec..... 206  / 1000    p < 0.206  
count sample diff rec  is twice tot diff rec ..... 499  / 1000    p < 0.499  
count sample diff acc  is twice tot diff acc...... 166  / 1000    p < 0.166  


Seed: 2803636207 - Split: 2 - age_range - Group: 45 - 54
data shape:  (3073, 1)
sample size: 1536


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 126.16it/s]



count sample diff f1   is twice tot diff f1....... 213  / 1000    p < 0.213  
count sample diff prec is twice tot diff prec..... 219  / 1000    p < 0.219  
count sample diff rec  is twice tot diff rec ..... 211  / 1000    p < 0.211  
count sample diff acc  is twice tot diff acc...... 227  / 1000    p < 0.227  


Seed: 2803636207 - Split: 3 - age_range - Group: 25 - 34
data shape:  (10329, 1)
sample size: 5164


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 66.42it/s]



count sample diff f1   is twice tot diff f1....... 48   / 1000    p < 0.048  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 71   / 1000    p < 0.071  
count sample diff rec  is twice tot diff rec ..... 41   / 1000    p < 0.041  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 83   / 1000    p < 0.083  


Seed: 2803636207 - Split: 3 - age_range - Group: 35 - 44
data shape:  (6268, 1)
sample size: 3134


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 90.94it/s]



count sample diff f1   is twice tot diff f1....... 112  / 1000    p < 0.112  
count sample diff prec is twice tot diff prec..... 44   / 1000    p < 0.044  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 227  / 1000    p < 0.227  
count sample diff acc  is twice tot diff acc...... 27   / 1000    p < 0.027  [38;5;9m*[0m


Seed: 2803636207 - Split: 3 - age_range - Group: 65 or older
data shape:  (651, 1)
sample size: 325


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 183.99it/s]



count sample diff f1   is twice tot diff f1....... 962  / 1000    p < 0.962  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 919  / 1000    p < 0.919  
count sample diff rec  is twice tot diff rec ..... 969  / 1000    p < 0.969  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 857  / 1000    p < 0.857  


Seed: 2803636207 - Split: 3 - age_range - Group: 18 - 24
data shape:  (2643, 1)
sample size: 1321


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 124.32it/s]



count sample diff f1   is twice tot diff f1....... 537  / 1000    p < 0.537  
count sample diff prec is twice tot diff prec..... 586  / 1000    p < 0.586  
count sample diff rec  is twice tot diff rec ..... 476  / 1000    p < 0.476  
count sample diff acc  is twice tot diff acc...... 730  / 1000    p < 0.73   


Seed: 2803636207 - Split: 3 - age_range - Group: 55 - 64
data shape:  (1841, 1)
sample size: 920


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 160.96it/s]



count sample diff f1   is twice tot diff f1....... 151  / 1000    p < 0.151  
count sample diff prec is twice tot diff prec..... 50   / 1000    p < 0.05   
count sample diff rec  is twice tot diff rec ..... 232  / 1000    p < 0.232  
count sample diff acc  is twice tot diff acc...... 21   / 1000    p < 0.021  [38;5;9m*[0m


Seed: 2803636207 - Split: 3 - age_range - Group: 45 - 54
data shape:  (3083, 1)
sample size: 1541


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 135.61it/s]



count sample diff f1   is twice tot diff f1....... 255  / 1000    p < 0.255  
count sample diff prec is twice tot diff prec..... 253  / 1000    p < 0.253  
count sample diff rec  is twice tot diff rec ..... 261  / 1000    p < 0.261  
count sample diff acc  is twice tot diff acc...... 252  / 1000    p < 0.252  


Seed: 165043843 - Split: 0 - age_range - Group: 25 - 34
data shape:  (10457, 1)
sample size: 5228


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 67.67it/s]



count sample diff f1   is twice tot diff f1....... 157  / 1000    p < 0.157  
count sample diff prec is twice tot diff prec..... 217  / 1000    p < 0.217  
count sample diff rec  is twice tot diff rec ..... 98   / 1000    p < 0.098  
count sample diff acc  is twice tot diff acc...... 317  / 1000    p < 0.317  


Seed: 165043843 - Split: 0 - age_range - Group: 35 - 44
data shape:  (6238, 1)
sample size: 3119


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 94.48it/s]



count sample diff f1   is twice tot diff f1....... 175  / 1000    p < 0.175  
count sample diff prec is twice tot diff prec..... 247  / 1000    p < 0.247  
count sample diff rec  is twice tot diff rec ..... 97   / 1000    p < 0.097  
count sample diff acc  is twice tot diff acc...... 477  / 1000    p < 0.477  


Seed: 165043843 - Split: 0 - age_range - Group: 65 or older
data shape:  (643, 1)
sample size: 321


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 191.55it/s]



count sample diff f1   is twice tot diff f1....... 130  / 1000    p < 0.13   
count sample diff prec is twice tot diff prec..... 123  / 1000    p < 0.123  
count sample diff rec  is twice tot diff rec ..... 170  / 1000    p < 0.17   
count sample diff acc  is twice tot diff acc...... 93   / 1000    p < 0.093  


Seed: 165043843 - Split: 0 - age_range - Group: 18 - 24
data shape:  (2521, 1)
sample size: 1260


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 105.55it/s]



count sample diff f1   is twice tot diff f1....... 7    / 1000    p < 0.007  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 7    / 1000    p < 0.007  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 9    / 1000    p < 0.009  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 5    / 1000    p < 0.005  [38;5;9m**[0m


Seed: 165043843 - Split: 0 - age_range - Group: 55 - 64
data shape:  (1894, 1)
sample size: 947


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 162.70it/s]



count sample diff f1   is twice tot diff f1....... 279  / 1000    p < 0.279  
count sample diff prec is twice tot diff prec..... 240  / 1000    p < 0.24   
count sample diff rec  is twice tot diff rec ..... 299  / 1000    p < 0.299  
count sample diff acc  is twice tot diff acc...... 211  / 1000    p < 0.211  


Seed: 165043843 - Split: 0 - age_range - Group: 45 - 54
data shape:  (3058, 1)
sample size: 1529


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 138.09it/s]



count sample diff f1   is twice tot diff f1....... 123  / 1000    p < 0.123  
count sample diff prec is twice tot diff prec..... 217  / 1000    p < 0.217  
count sample diff rec  is twice tot diff rec ..... 67   / 1000    p < 0.067  
count sample diff acc  is twice tot diff acc...... 347  / 1000    p < 0.347  


Seed: 165043843 - Split: 1 - age_range - Group: 25 - 34
data shape:  (10244, 1)
sample size: 5122


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 64.74it/s]



count sample diff f1   is twice tot diff f1....... 918  / 1000    p < 0.918  
count sample diff prec is twice tot diff prec..... 966  / 1000    p < 0.966  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 848  / 1000    p < 0.848  
count sample diff acc  is twice tot diff acc...... 981  / 1000    p < 0.981  [38;5;8m![0m


Seed: 165043843 - Split: 1 - age_range - Group: 35 - 44
data shape:  (6191, 1)
sample size: 3095


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 74.66it/s]



count sample diff f1   is twice tot diff f1....... 352  / 1000    p < 0.352  
count sample diff prec is twice tot diff prec..... 484  / 1000    p < 0.484  
count sample diff rec  is twice tot diff rec ..... 235  / 1000    p < 0.235  
count sample diff acc  is twice tot diff acc...... 672  / 1000    p < 0.672  


Seed: 165043843 - Split: 1 - age_range - Group: 65 or older
data shape:  (704, 1)
sample size: 352


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 158.77it/s]



count sample diff f1   is twice tot diff f1....... 174  / 1000    p < 0.174  
count sample diff prec is twice tot diff prec..... 220  / 1000    p < 0.22   
count sample diff rec  is twice tot diff rec ..... 164  / 1000    p < 0.164  
count sample diff acc  is twice tot diff acc...... 249  / 1000    p < 0.249  


Seed: 165043843 - Split: 1 - age_range - Group: 18 - 24
data shape:  (2638, 1)
sample size: 1319


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 88.77it/s]



count sample diff f1   is twice tot diff f1....... 95   / 1000    p < 0.095  
count sample diff prec is twice tot diff prec..... 100  / 1000    p < 0.1    
count sample diff rec  is twice tot diff rec ..... 91   / 1000    p < 0.091  
count sample diff acc  is twice tot diff acc...... 152  / 1000    p < 0.152  


Seed: 165043843 - Split: 1 - age_range - Group: 55 - 64
data shape:  (1831, 1)
sample size: 915


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 145.54it/s]



count sample diff f1   is twice tot diff f1....... 192  / 1000    p < 0.192  
count sample diff prec is twice tot diff prec..... 228  / 1000    p < 0.228  
count sample diff rec  is twice tot diff rec ..... 148  / 1000    p < 0.148  
count sample diff acc  is twice tot diff acc...... 353  / 1000    p < 0.353  


Seed: 165043843 - Split: 1 - age_range - Group: 45 - 54
data shape:  (3048, 1)
sample size: 1524


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 127.36it/s]



count sample diff f1   is twice tot diff f1....... 806  / 1000    p < 0.806  
count sample diff prec is twice tot diff prec..... 852  / 1000    p < 0.852  
count sample diff rec  is twice tot diff rec ..... 744  / 1000    p < 0.744  
count sample diff acc  is twice tot diff acc...... 889  / 1000    p < 0.889  


Seed: 165043843 - Split: 2 - age_range - Group: 25 - 34
data shape:  (10311, 1)
sample size: 5155


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 58.90it/s]



count sample diff f1   is twice tot diff f1....... 13   / 1000    p < 0.013  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 8    / 1000    p < 0.008  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 20   / 1000    p < 0.02   [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 4    / 1000    p < 0.004  [38;5;9m**[0m


Seed: 165043843 - Split: 2 - age_range - Group: 35 - 44
data shape:  (6247, 1)
sample size: 3123


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 69.33it/s]



count sample diff f1   is twice tot diff f1....... 18   / 1000    p < 0.018  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 10   / 1000    p < 0.01   [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 39   / 1000    p < 0.039  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 3    / 1000    p < 0.003  [38;5;9m**[0m


Seed: 165043843 - Split: 2 - age_range - Group: 65 or older
data shape:  (675, 1)
sample size: 337


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 189.25it/s]



count sample diff f1   is twice tot diff f1....... 111  / 1000    p < 0.111  
count sample diff prec is twice tot diff prec..... 63   / 1000    p < 0.063  
count sample diff rec  is twice tot diff rec ..... 160  / 1000    p < 0.16   
count sample diff acc  is twice tot diff acc...... 29   / 1000    p < 0.029  [38;5;9m*[0m


Seed: 165043843 - Split: 2 - age_range - Group: 18 - 24
data shape:  (2697, 1)
sample size: 1348


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 147.26it/s]



count sample diff f1   is twice tot diff f1....... 471  / 1000    p < 0.471  
count sample diff prec is twice tot diff prec..... 463  / 1000    p < 0.463  
count sample diff rec  is twice tot diff rec ..... 509  / 1000    p < 0.509  
count sample diff acc  is twice tot diff acc...... 390  / 1000    p < 0.39   


Seed: 165043843 - Split: 2 - age_range - Group: 55 - 64
data shape:  (1883, 1)
sample size: 941


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 160.13it/s]



count sample diff f1   is twice tot diff f1....... 631  / 1000    p < 0.631  
count sample diff prec is twice tot diff prec..... 538  / 1000    p < 0.538  
count sample diff rec  is twice tot diff rec ..... 722  / 1000    p < 0.722  
count sample diff acc  is twice tot diff acc...... 405  / 1000    p < 0.405  


Seed: 165043843 - Split: 2 - age_range - Group: 45 - 54
data shape:  (2929, 1)
sample size: 1464


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 136.36it/s]



count sample diff f1   is twice tot diff f1....... 126  / 1000    p < 0.126  
count sample diff prec is twice tot diff prec..... 94   / 1000    p < 0.094  
count sample diff rec  is twice tot diff rec ..... 144  / 1000    p < 0.144  
count sample diff acc  is twice tot diff acc...... 86   / 1000    p < 0.086  


Seed: 165043843 - Split: 3 - age_range - Group: 25 - 34
data shape:  (10248, 1)
sample size: 5124


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 63.82it/s]



count sample diff f1   is twice tot diff f1....... 119  / 1000    p < 0.119  
count sample diff prec is twice tot diff prec..... 201  / 1000    p < 0.201  
count sample diff rec  is twice tot diff rec ..... 78   / 1000    p < 0.078  
count sample diff acc  is twice tot diff acc...... 285  / 1000    p < 0.285  


Seed: 165043843 - Split: 3 - age_range - Group: 35 - 44
data shape:  (6324, 1)
sample size: 3162


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 97.26it/s]



count sample diff f1   is twice tot diff f1....... 366  / 1000    p < 0.366  
count sample diff prec is twice tot diff prec..... 479  / 1000    p < 0.479  
count sample diff rec  is twice tot diff rec ..... 249  / 1000    p < 0.249  
count sample diff acc  is twice tot diff acc...... 644  / 1000    p < 0.644  


Seed: 165043843 - Split: 3 - age_range - Group: 65 or older
data shape:  (678, 1)
sample size: 339


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 192.48it/s]



count sample diff f1   is twice tot diff f1....... 260  / 1000    p < 0.26   
count sample diff prec is twice tot diff prec..... 243  / 1000    p < 0.243  
count sample diff rec  is twice tot diff rec ..... 273  / 1000    p < 0.273  
count sample diff acc  is twice tot diff acc...... 251  / 1000    p < 0.251  


Seed: 165043843 - Split: 3 - age_range - Group: 18 - 24
data shape:  (2604, 1)
sample size: 1302


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 127.79it/s]



count sample diff f1   is twice tot diff f1....... 35   / 1000    p < 0.035  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 23   / 1000    p < 0.023  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 50   / 1000    p < 0.05   
count sample diff acc  is twice tot diff acc...... 13   / 1000    p < 0.013  [38;5;9m*[0m


Seed: 165043843 - Split: 3 - age_range - Group: 55 - 64
data shape:  (1852, 1)
sample size: 926


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 126.48it/s]



count sample diff f1   is twice tot diff f1....... 207  / 1000    p < 0.207  
count sample diff prec is twice tot diff prec..... 183  / 1000    p < 0.183  
count sample diff rec  is twice tot diff rec ..... 247  / 1000    p < 0.247  
count sample diff acc  is twice tot diff acc...... 153  / 1000    p < 0.153  


Seed: 165043843 - Split: 3 - age_range - Group: 45 - 54
data shape:  (3065, 1)
sample size: 1532


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 136.59it/s]



count sample diff f1   is twice tot diff f1....... 113  / 1000    p < 0.113  
count sample diff prec is twice tot diff prec..... 139  / 1000    p < 0.139  
count sample diff rec  is twice tot diff rec ..... 106  / 1000    p < 0.106  
count sample diff acc  is twice tot diff acc...... 156  / 1000    p < 0.156  


Seed: 2923262358 - Split: 0 - age_range - Group: 25 - 34
data shape:  (10253, 1)
sample size: 5126


bootstrap: 100%|████████████████████████████| 1000/1000 [00:19<00:00, 52.55it/s]



count sample diff f1   is twice tot diff f1....... 813  / 1000    p < 0.813  
count sample diff prec is twice tot diff prec..... 887  / 1000    p < 0.887  
count sample diff rec  is twice tot diff rec ..... 651  / 1000    p < 0.651  
count sample diff acc  is twice tot diff acc...... 950  / 1000    p < 0.95   


Seed: 2923262358 - Split: 0 - age_range - Group: 35 - 44
data shape:  (6321, 1)
sample size: 3160


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 98.01it/s]



count sample diff f1   is twice tot diff f1....... 466  / 1000    p < 0.466  
count sample diff prec is twice tot diff prec..... 534  / 1000    p < 0.534  
count sample diff rec  is twice tot diff rec ..... 370  / 1000    p < 0.37   
count sample diff acc  is twice tot diff acc...... 673  / 1000    p < 0.673  


Seed: 2923262358 - Split: 0 - age_range - Group: 65 or older
data shape:  (646, 1)
sample size: 323


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 191.60it/s]



count sample diff f1   is twice tot diff f1....... 50   / 1000    p < 0.05   
count sample diff prec is twice tot diff prec..... 34   / 1000    p < 0.034  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 71   / 1000    p < 0.071  
count sample diff acc  is twice tot diff acc...... 15   / 1000    p < 0.015  [38;5;9m*[0m


Seed: 2923262358 - Split: 0 - age_range - Group: 18 - 24
data shape:  (2644, 1)
sample size: 1322


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 144.27it/s]



count sample diff f1   is twice tot diff f1....... 306  / 1000    p < 0.306  
count sample diff prec is twice tot diff prec..... 291  / 1000    p < 0.291  
count sample diff rec  is twice tot diff rec ..... 352  / 1000    p < 0.352  
count sample diff acc  is twice tot diff acc...... 217  / 1000    p < 0.217  


Seed: 2923262358 - Split: 0 - age_range - Group: 55 - 64
data shape:  (1869, 1)
sample size: 934


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 163.12it/s]



count sample diff f1   is twice tot diff f1....... 424  / 1000    p < 0.424  
count sample diff prec is twice tot diff prec..... 321  / 1000    p < 0.321  
count sample diff rec  is twice tot diff rec ..... 531  / 1000    p < 0.531  
count sample diff acc  is twice tot diff acc...... 189  / 1000    p < 0.189  


Seed: 2923262358 - Split: 0 - age_range - Group: 45 - 54
data shape:  (3030, 1)
sample size: 1515


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 126.97it/s]



count sample diff f1   is twice tot diff f1....... 783  / 1000    p < 0.783  
count sample diff prec is twice tot diff prec..... 742  / 1000    p < 0.742  
count sample diff rec  is twice tot diff rec ..... 808  / 1000    p < 0.808  
count sample diff acc  is twice tot diff acc...... 706  / 1000    p < 0.706  


Seed: 2923262358 - Split: 1 - age_range - Group: 25 - 34
data shape:  (10398, 1)
sample size: 5199


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 66.86it/s]



count sample diff f1   is twice tot diff f1....... 948  / 1000    p < 0.948  
count sample diff prec is twice tot diff prec..... 981  / 1000    p < 0.981  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 927  / 1000    p < 0.927  
count sample diff acc  is twice tot diff acc...... 986  / 1000    p < 0.986  [38;5;8m![0m


Seed: 2923262358 - Split: 1 - age_range - Group: 35 - 44
data shape:  (6179, 1)
sample size: 3089


bootstrap: 100%|████████████████████████████| 1000/1000 [00:12<00:00, 80.11it/s]



count sample diff f1   is twice tot diff f1....... 474  / 1000    p < 0.474  
count sample diff prec is twice tot diff prec..... 434  / 1000    p < 0.434  
count sample diff rec  is twice tot diff rec ..... 507  / 1000    p < 0.507  
count sample diff acc  is twice tot diff acc...... 389  / 1000    p < 0.389  


Seed: 2923262358 - Split: 1 - age_range - Group: 65 or older
data shape:  (675, 1)
sample size: 337


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 188.78it/s]



count sample diff f1   is twice tot diff f1....... 487  / 1000    p < 0.487  
count sample diff prec is twice tot diff prec..... 402  / 1000    p < 0.402  
count sample diff rec  is twice tot diff rec ..... 492  / 1000    p < 0.492  
count sample diff acc  is twice tot diff acc...... 352  / 1000    p < 0.352  


Seed: 2923262358 - Split: 1 - age_range - Group: 18 - 24
data shape:  (2556, 1)
sample size: 1278


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 148.74it/s]



count sample diff f1   is twice tot diff f1....... 569  / 1000    p < 0.569  
count sample diff prec is twice tot diff prec..... 491  / 1000    p < 0.491  
count sample diff rec  is twice tot diff rec ..... 604  / 1000    p < 0.604  
count sample diff acc  is twice tot diff acc...... 347  / 1000    p < 0.347  


Seed: 2923262358 - Split: 1 - age_range - Group: 55 - 64
data shape:  (1832, 1)
sample size: 916


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 163.09it/s]



count sample diff f1   is twice tot diff f1....... 760  / 1000    p < 0.76   
count sample diff prec is twice tot diff prec..... 676  / 1000    p < 0.676  
count sample diff rec  is twice tot diff rec ..... 793  / 1000    p < 0.793  
count sample diff acc  is twice tot diff acc...... 596  / 1000    p < 0.596  


Seed: 2923262358 - Split: 1 - age_range - Group: 45 - 54
data shape:  (3046, 1)
sample size: 1523


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 129.07it/s]



count sample diff f1   is twice tot diff f1....... 793  / 1000    p < 0.793  
count sample diff prec is twice tot diff prec..... 820  / 1000    p < 0.82   
count sample diff rec  is twice tot diff rec ..... 772  / 1000    p < 0.772  
count sample diff acc  is twice tot diff acc...... 826  / 1000    p < 0.826  


Seed: 2923262358 - Split: 2 - age_range - Group: 25 - 34
data shape:  (10301, 1)
sample size: 5150


bootstrap: 100%|████████████████████████████| 1000/1000 [00:17<00:00, 56.53it/s]



count sample diff f1   is twice tot diff f1....... 979  / 1000    p < 0.979  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 986  / 1000    p < 0.986  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 958  / 1000    p < 0.958  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 993  / 1000    p < 0.993  [38;5;8m![0m


Seed: 2923262358 - Split: 2 - age_range - Group: 35 - 44
data shape:  (6314, 1)
sample size: 3157


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 93.72it/s]



count sample diff f1   is twice tot diff f1....... 972  / 1000    p < 0.972  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 969  / 1000    p < 0.969  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 972  / 1000    p < 0.972  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 951  / 1000    p < 0.951  [38;5;8m![0m


Seed: 2923262358 - Split: 2 - age_range - Group: 65 or older
data shape:  (683, 1)
sample size: 341


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 114.48it/s]



count sample diff f1   is twice tot diff f1....... 514  / 1000    p < 0.514  
count sample diff prec is twice tot diff prec..... 531  / 1000    p < 0.531  
count sample diff rec  is twice tot diff rec ..... 491  / 1000    p < 0.491  
count sample diff acc  is twice tot diff acc...... 571  / 1000    p < 0.571  


Seed: 2923262358 - Split: 2 - age_range - Group: 18 - 24
data shape:  (2596, 1)
sample size: 1298


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 141.10it/s]



count sample diff f1   is twice tot diff f1....... 912  / 1000    p < 0.912  
count sample diff prec is twice tot diff prec..... 928  / 1000    p < 0.928  
count sample diff rec  is twice tot diff rec ..... 892  / 1000    p < 0.892  
count sample diff acc  is twice tot diff acc...... 949  / 1000    p < 0.949  


Seed: 2923262358 - Split: 2 - age_range - Group: 55 - 64
data shape:  (1856, 1)
sample size: 928


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 164.66it/s]



count sample diff f1   is twice tot diff f1....... 396  / 1000    p < 0.396  
count sample diff prec is twice tot diff prec..... 334  / 1000    p < 0.334  
count sample diff rec  is twice tot diff rec ..... 463  / 1000    p < 0.463  
count sample diff acc  is twice tot diff acc...... 259  / 1000    p < 0.259  


Seed: 2923262358 - Split: 2 - age_range - Group: 45 - 54
data shape:  (3010, 1)
sample size: 1505


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 121.95it/s]



count sample diff f1   is twice tot diff f1....... 564  / 1000    p < 0.564  
count sample diff prec is twice tot diff prec..... 490  / 1000    p < 0.49   
count sample diff rec  is twice tot diff rec ..... 614  / 1000    p < 0.614  
count sample diff acc  is twice tot diff acc...... 420  / 1000    p < 0.42   


Seed: 2923262358 - Split: 3 - age_range - Group: 25 - 34
data shape:  (10308, 1)
sample size: 5154


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 63.89it/s]



count sample diff f1   is twice tot diff f1....... 786  / 1000    p < 0.786  
count sample diff prec is twice tot diff prec..... 803  / 1000    p < 0.803  
count sample diff rec  is twice tot diff rec ..... 757  / 1000    p < 0.757  
count sample diff acc  is twice tot diff acc...... 830  / 1000    p < 0.83   


Seed: 2923262358 - Split: 3 - age_range - Group: 35 - 44
data shape:  (6186, 1)
sample size: 3093


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 90.52it/s]



count sample diff f1   is twice tot diff f1....... 801  / 1000    p < 0.801  
count sample diff prec is twice tot diff prec..... 620  / 1000    p < 0.62   
count sample diff rec  is twice tot diff rec ..... 927  / 1000    p < 0.927  
count sample diff acc  is twice tot diff acc...... 320  / 1000    p < 0.32   


Seed: 2923262358 - Split: 3 - age_range - Group: 65 or older
data shape:  (696, 1)
sample size: 348


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 185.66it/s]



count sample diff f1   is twice tot diff f1....... 461  / 1000    p < 0.461  
count sample diff prec is twice tot diff prec..... 318  / 1000    p < 0.318  
count sample diff rec  is twice tot diff rec ..... 578  / 1000    p < 0.578  
count sample diff acc  is twice tot diff acc...... 188  / 1000    p < 0.188  


Seed: 2923262358 - Split: 3 - age_range - Group: 18 - 24
data shape:  (2664, 1)
sample size: 1332


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 143.04it/s]



count sample diff f1   is twice tot diff f1....... 195  / 1000    p < 0.195  
count sample diff prec is twice tot diff prec..... 186  / 1000    p < 0.186  
count sample diff rec  is twice tot diff rec ..... 211  / 1000    p < 0.211  
count sample diff acc  is twice tot diff acc...... 156  / 1000    p < 0.156  


Seed: 2923262358 - Split: 3 - age_range - Group: 55 - 64
data shape:  (1903, 1)
sample size: 951


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 144.42it/s]



count sample diff f1   is twice tot diff f1....... 922  / 1000    p < 0.922  
count sample diff prec is twice tot diff prec..... 894  / 1000    p < 0.894  
count sample diff rec  is twice tot diff rec ..... 941  / 1000    p < 0.941  
count sample diff acc  is twice tot diff acc...... 805  / 1000    p < 0.805  


Seed: 2923262358 - Split: 3 - age_range - Group: 45 - 54
data shape:  (3014, 1)
sample size: 1507


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 109.31it/s]


count sample diff f1   is twice tot diff f1....... 472  / 1000    p < 0.472  
count sample diff prec is twice tot diff prec..... 325  / 1000    p < 0.325  
count sample diff rec  is twice tot diff rec ..... 567  / 1000    p < 0.567  
count sample diff acc  is twice tot diff acc...... 238  / 1000    p < 0.238  





In [11]:
age_ks = k_estimator_groups(age_p_dicts)
age_ks

{'65 or older': {'k_count': 1, 'k_bonferroni': -1},
 '25 - 34': {'k_count': 2, 'k_bonferroni': -1},
 '45 - 54': {'k_count': -1, 'k_bonferroni': -1},
 '35 - 44': {'k_count': 1, 'k_bonferroni': -1},
 '18 - 24': {'k_count': 2, 'k_bonferroni': -1},
 '55 - 64': {'k_count': 1, 'k_bonferroni': -1}}

In [12]:
age_ks_df = pd.DataFrame(age_ks).T.sort_index()
age_ks_df = age_ks_df.replace({-1: 0})
age_ks_df = age_ks_df.rename(columns={'k_count': '$\hat{k}_{count}$', 'k_bonferroni':'$\hat{k}_{Bonf.}$'})
age_ks_df

Unnamed: 0,$\hat{k}_{count}$,$\hat{k}_{Bonf.}$
18 - 24,2,0
25 - 34,2,0
35 - 44,1,0
45 - 54,0,0
55 - 64,1,0
65 or older,1,0


In [13]:
age_ks_df.to_latex('../tables/replication/baseline/age.tex', escape=False)

#### Education

In [14]:
education_p_dicts = significance_test_per_group(df, attribute_a='no extra layers', attribute_b='education', to_groups=to_groups, attributes=['education'])



Seed: 2803636207 - Split: 0 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10439, 1)
sample size: 5219


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 61.25it/s]



count sample diff f1   is twice tot diff f1....... 161  / 1000    p < 0.161  
count sample diff prec is twice tot diff prec..... 150  / 1000    p < 0.15   
count sample diff rec  is twice tot diff rec ..... 176  / 1000    p < 0.176  
count sample diff acc  is twice tot diff acc...... 140  / 1000    p < 0.14   


Seed: 2803636207 - Split: 0 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2130, 1)
sample size: 1065


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 147.94it/s]



count sample diff f1   is twice tot diff f1....... 497  / 1000    p < 0.497  
count sample diff prec is twice tot diff prec..... 504  / 1000    p < 0.504  
count sample diff rec  is twice tot diff rec ..... 480  / 1000    p < 0.48   
count sample diff acc  is twice tot diff acc...... 493  / 1000    p < 0.493  


Seed: 2803636207 - Split: 0 - education - Group: Master's degree
data shape:  (3534, 1)
sample size: 1767


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 127.96it/s]



count sample diff f1   is twice tot diff f1....... 399  / 1000    p < 0.399  
count sample diff prec is twice tot diff prec..... 415  / 1000    p < 0.415  
count sample diff rec  is twice tot diff rec ..... 386  / 1000    p < 0.386  
count sample diff acc  is twice tot diff acc...... 419  / 1000    p < 0.419  


Seed: 2803636207 - Split: 0 - education - Group: Associate degree in college (2-year)
data shape:  (2603, 1)
sample size: 1301


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 113.85it/s]



count sample diff f1   is twice tot diff f1....... 126  / 1000    p < 0.126  
count sample diff prec is twice tot diff prec..... 111  / 1000    p < 0.111  
count sample diff rec  is twice tot diff rec ..... 161  / 1000    p < 0.161  
count sample diff acc  is twice tot diff acc...... 69   / 1000    p < 0.069  


Seed: 2803636207 - Split: 0 - education - Group: Some college but no degree
data shape:  (4759, 1)
sample size: 2379


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 106.90it/s]



count sample diff f1   is twice tot diff f1....... 53   / 1000    p < 0.053  
count sample diff prec is twice tot diff prec..... 44   / 1000    p < 0.044  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 78   / 1000    p < 0.078  
count sample diff acc  is twice tot diff acc...... 24   / 1000    p < 0.024  [38;5;9m*[0m


Seed: 2803636207 - Split: 0 - education - Group: Doctoral degree
data shape:  (277, 1)
sample size: 138


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 183.49it/s]



count sample diff f1   is twice tot diff f1....... 154  / 1000    p < 0.154  
count sample diff prec is twice tot diff prec..... 46   / 1000    p < 0.046  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 242  / 1000    p < 0.242  
count sample diff acc  is twice tot diff acc...... 37   / 1000    p < 0.037  [38;5;9m*[0m


Seed: 2803636207 - Split: 0 - education - Group: Less than high school degree
data shape:  (159, 1)
sample size: 79


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 155.42it/s]



count sample diff f1   is twice tot diff f1....... 653  / 1000    p < 0.653  
count sample diff prec is twice tot diff prec..... 641  / 1000    p < 0.641  
count sample diff rec  is twice tot diff rec ..... 664  / 1000    p < 0.664  
count sample diff acc  is twice tot diff acc...... 563  / 1000    p < 0.563  


Seed: 2803636207 - Split: 0 - education - Group: Professional degree (JD, MD)
data shape:  (362, 1)
sample size: 181


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 178.19it/s]



count sample diff f1   is twice tot diff f1....... 220  / 1000    p < 0.22   
count sample diff prec is twice tot diff prec..... 231  / 1000    p < 0.231  
count sample diff rec  is twice tot diff rec ..... 225  / 1000    p < 0.225  
count sample diff acc  is twice tot diff acc...... 251  / 1000    p < 0.251  


Seed: 2803636207 - Split: 1 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10541, 1)
sample size: 5270


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 69.53it/s]



count sample diff f1   is twice tot diff f1....... 61   / 1000    p < 0.061  
count sample diff prec is twice tot diff prec..... 36   / 1000    p < 0.036  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 123  / 1000    p < 0.123  
count sample diff acc  is twice tot diff acc...... 20   / 1000    p < 0.02   [38;5;9m*[0m


Seed: 2803636207 - Split: 1 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2052, 1)
sample size: 1026


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 137.04it/s]



count sample diff f1   is twice tot diff f1....... 77   / 1000    p < 0.077  
count sample diff prec is twice tot diff prec..... 66   / 1000    p < 0.066  
count sample diff rec  is twice tot diff rec ..... 86   / 1000    p < 0.086  
count sample diff acc  is twice tot diff acc...... 61   / 1000    p < 0.061  


Seed: 2803636207 - Split: 1 - education - Group: Master's degree
data shape:  (3455, 1)
sample size: 1727


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 116.87it/s]



count sample diff f1   is twice tot diff f1....... 576  / 1000    p < 0.576  
count sample diff prec is twice tot diff prec..... 541  / 1000    p < 0.541  
count sample diff rec  is twice tot diff rec ..... 601  / 1000    p < 0.601  
count sample diff acc  is twice tot diff acc...... 528  / 1000    p < 0.528  


Seed: 2803636207 - Split: 1 - education - Group: Associate degree in college (2-year)
data shape:  (2697, 1)
sample size: 1348


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 143.27it/s]



count sample diff f1   is twice tot diff f1....... 119  / 1000    p < 0.119  
count sample diff prec is twice tot diff prec..... 111  / 1000    p < 0.111  
count sample diff rec  is twice tot diff rec ..... 143  / 1000    p < 0.143  
count sample diff acc  is twice tot diff acc...... 80   / 1000    p < 0.08   


Seed: 2803636207 - Split: 1 - education - Group: Some college but no degree
data shape:  (4539, 1)
sample size: 2269


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 122.36it/s]



count sample diff f1   is twice tot diff f1....... 281  / 1000    p < 0.281  
count sample diff prec is twice tot diff prec..... 302  / 1000    p < 0.302  
count sample diff rec  is twice tot diff rec ..... 266  / 1000    p < 0.266  
count sample diff acc  is twice tot diff acc...... 386  / 1000    p < 0.386  


Seed: 2803636207 - Split: 1 - education - Group: Doctoral degree
data shape:  (297, 1)
sample size: 148


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 209.81it/s]



count sample diff f1   is twice tot diff f1....... 406  / 1000    p < 0.406  
count sample diff prec is twice tot diff prec..... 499  / 1000    p < 0.499  
count sample diff rec  is twice tot diff rec ..... 326  / 1000    p < 0.326  
count sample diff acc  is twice tot diff acc...... 639  / 1000    p < 0.639  


Seed: 2803636207 - Split: 1 - education - Group: Less than high school degree
data shape:  (152, 1)
sample size: 76


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 215.36it/s]



count sample diff f1   is twice tot diff f1....... 540  / 1000    p < 0.54   
count sample diff prec is twice tot diff prec..... 570  / 1000    p < 0.57   
count sample diff rec  is twice tot diff rec ..... 542  / 1000    p < 0.542  
count sample diff acc  is twice tot diff acc...... 544  / 1000    p < 0.544  


Seed: 2803636207 - Split: 1 - education - Group: Professional degree (JD, MD)
data shape:  (372, 1)
sample size: 186


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 191.20it/s]



count sample diff f1   is twice tot diff f1....... 91   / 1000    p < 0.091  
count sample diff prec is twice tot diff prec..... 83   / 1000    p < 0.083  
count sample diff rec  is twice tot diff rec ..... 101  / 1000    p < 0.101  
count sample diff acc  is twice tot diff acc...... 61   / 1000    p < 0.061  


Seed: 2803636207 - Split: 2 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10471, 1)
sample size: 5235


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 67.78it/s]



count sample diff f1   is twice tot diff f1....... 843  / 1000    p < 0.843  
count sample diff prec is twice tot diff prec..... 909  / 1000    p < 0.909  
count sample diff rec  is twice tot diff rec ..... 757  / 1000    p < 0.757  
count sample diff acc  is twice tot diff acc...... 948  / 1000    p < 0.948  


Seed: 2803636207 - Split: 2 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2106, 1)
sample size: 1053


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 132.81it/s]



count sample diff f1   is twice tot diff f1....... 300  / 1000    p < 0.3    
count sample diff prec is twice tot diff prec..... 374  / 1000    p < 0.374  
count sample diff rec  is twice tot diff rec ..... 233  / 1000    p < 0.233  
count sample diff acc  is twice tot diff acc...... 553  / 1000    p < 0.553  


Seed: 2803636207 - Split: 2 - education - Group: Master's degree
data shape:  (3510, 1)
sample size: 1755


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 108.84it/s]



count sample diff f1   is twice tot diff f1....... 594  / 1000    p < 0.594  
count sample diff prec is twice tot diff prec..... 545  / 1000    p < 0.545  
count sample diff rec  is twice tot diff rec ..... 620  / 1000    p < 0.62   
count sample diff acc  is twice tot diff acc...... 547  / 1000    p < 0.547  


Seed: 2803636207 - Split: 2 - education - Group: Associate degree in college (2-year)
data shape:  (2604, 1)
sample size: 1302


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 131.86it/s]



count sample diff f1   is twice tot diff f1....... 322  / 1000    p < 0.322  
count sample diff prec is twice tot diff prec..... 226  / 1000    p < 0.226  
count sample diff rec  is twice tot diff rec ..... 412  / 1000    p < 0.412  
count sample diff acc  is twice tot diff acc...... 120  / 1000    p < 0.12   


Seed: 2803636207 - Split: 2 - education - Group: Some college but no degree
data shape:  (4644, 1)
sample size: 2322


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 112.23it/s]



count sample diff f1   is twice tot diff f1....... 95   / 1000    p < 0.095  
count sample diff prec is twice tot diff prec..... 110  / 1000    p < 0.11   
count sample diff rec  is twice tot diff rec ..... 86   / 1000    p < 0.086  
count sample diff acc  is twice tot diff acc...... 180  / 1000    p < 0.18   


Seed: 2803636207 - Split: 2 - education - Group: Doctoral degree
data shape:  (319, 1)
sample size: 159


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 169.07it/s]



count sample diff f1   is twice tot diff f1....... 172  / 1000    p < 0.172  
count sample diff prec is twice tot diff prec..... 155  / 1000    p < 0.155  
count sample diff rec  is twice tot diff rec ..... 184  / 1000    p < 0.184  
count sample diff acc  is twice tot diff acc...... 173  / 1000    p < 0.173  


Seed: 2803636207 - Split: 2 - education - Group: Less than high school degree
data shape:  (184, 1)
sample size: 92


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 174.32it/s]



count sample diff f1   is twice tot diff f1....... 139  / 1000    p < 0.139  
count sample diff prec is twice tot diff prec..... 96   / 1000    p < 0.096  
count sample diff rec  is twice tot diff rec ..... 173  / 1000    p < 0.173  
count sample diff acc  is twice tot diff acc...... 23   / 1000    p < 0.023  [38;5;9m*[0m


Seed: 2803636207 - Split: 2 - education - Group: Professional degree (JD, MD)
data shape:  (411, 1)
sample size: 205


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 201.86it/s]



count sample diff f1   is twice tot diff f1....... 237  / 1000    p < 0.237  
count sample diff prec is twice tot diff prec..... 205  / 1000    p < 0.205  
count sample diff rec  is twice tot diff rec ..... 269  / 1000    p < 0.269  
count sample diff acc  is twice tot diff acc...... 213  / 1000    p < 0.213  


Seed: 2803636207 - Split: 3 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10589, 1)
sample size: 5294


bootstrap: 100%|████████████████████████████| 1000/1000 [00:18<00:00, 54.55it/s]



count sample diff f1   is twice tot diff f1....... 425  / 1000    p < 0.425  
count sample diff prec is twice tot diff prec..... 473  / 1000    p < 0.473  
count sample diff rec  is twice tot diff rec ..... 393  / 1000    p < 0.393  
count sample diff acc  is twice tot diff acc...... 516  / 1000    p < 0.516  


Seed: 2803636207 - Split: 3 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2032, 1)
sample size: 1016


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 162.60it/s]



count sample diff f1   is twice tot diff f1....... 780  / 1000    p < 0.78   
count sample diff prec is twice tot diff prec..... 787  / 1000    p < 0.787  
count sample diff rec  is twice tot diff rec ..... 773  / 1000    p < 0.773  
count sample diff acc  is twice tot diff acc...... 781  / 1000    p < 0.781  


Seed: 2803636207 - Split: 3 - education - Group: Master's degree
data shape:  (3561, 1)
sample size: 1780


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 127.33it/s]



count sample diff f1   is twice tot diff f1....... 888  / 1000    p < 0.888  
count sample diff prec is twice tot diff prec..... 873  / 1000    p < 0.873  
count sample diff rec  is twice tot diff rec ..... 911  / 1000    p < 0.911  
count sample diff acc  is twice tot diff acc...... 856  / 1000    p < 0.856  


Seed: 2803636207 - Split: 3 - education - Group: Associate degree in college (2-year)
data shape:  (2516, 1)
sample size: 1258


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 143.32it/s]



count sample diff f1   is twice tot diff f1....... 703  / 1000    p < 0.703  
count sample diff prec is twice tot diff prec..... 641  / 1000    p < 0.641  
count sample diff rec  is twice tot diff rec ..... 836  / 1000    p < 0.836  
count sample diff acc  is twice tot diff acc...... 154  / 1000    p < 0.154  


Seed: 2803636207 - Split: 3 - education - Group: Some college but no degree
data shape:  (4718, 1)
sample size: 2359


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 105.91it/s]



count sample diff f1   is twice tot diff f1....... 773  / 1000    p < 0.773  
count sample diff prec is twice tot diff prec..... 684  / 1000    p < 0.684  
count sample diff rec  is twice tot diff rec ..... 822  / 1000    p < 0.822  
count sample diff acc  is twice tot diff acc...... 400  / 1000    p < 0.4    


Seed: 2803636207 - Split: 3 - education - Group: Doctoral degree
data shape:  (327, 1)
sample size: 163


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 165.07it/s]



count sample diff f1   is twice tot diff f1....... 607  / 1000    p < 0.607  
count sample diff prec is twice tot diff prec..... 637  / 1000    p < 0.637  
count sample diff rec  is twice tot diff rec ..... 593  / 1000    p < 0.593  
count sample diff acc  is twice tot diff acc...... 673  / 1000    p < 0.673  


Seed: 2803636207 - Split: 3 - education - Group: Less than high school degree
data shape:  (165, 1)
sample size: 82


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 171.05it/s]



count sample diff f1   is twice tot diff f1....... 156  / 1000    p < 0.156  
count sample diff prec is twice tot diff prec..... 148  / 1000    p < 0.148  
count sample diff rec  is twice tot diff rec ..... 183  / 1000    p < 0.183  
count sample diff acc  is twice tot diff acc...... 138  / 1000    p < 0.138  


Seed: 2803636207 - Split: 3 - education - Group: Professional degree (JD, MD)
data shape:  (375, 1)
sample size: 187


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 178.16it/s]



count sample diff f1   is twice tot diff f1....... 545  / 1000    p < 0.545  
count sample diff prec is twice tot diff prec..... 580  / 1000    p < 0.58   
count sample diff rec  is twice tot diff rec ..... 526  / 1000    p < 0.526  
count sample diff acc  is twice tot diff acc...... 576  / 1000    p < 0.576  


Seed: 165043843 - Split: 0 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10700, 1)
sample size: 5350


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 59.47it/s]



count sample diff f1   is twice tot diff f1....... 400  / 1000    p < 0.4    
count sample diff prec is twice tot diff prec..... 359  / 1000    p < 0.359  
count sample diff rec  is twice tot diff rec ..... 446  / 1000    p < 0.446  
count sample diff acc  is twice tot diff acc...... 309  / 1000    p < 0.309  


Seed: 165043843 - Split: 0 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2075, 1)
sample size: 1037


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 124.10it/s]



count sample diff f1   is twice tot diff f1....... 246  / 1000    p < 0.246  
count sample diff prec is twice tot diff prec..... 328  / 1000    p < 0.328  
count sample diff rec  is twice tot diff rec ..... 178  / 1000    p < 0.178  
count sample diff acc  is twice tot diff acc...... 555  / 1000    p < 0.555  


Seed: 165043843 - Split: 0 - education - Group: Master's degree
data shape:  (3425, 1)
sample size: 1712


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 120.47it/s]



count sample diff f1   is twice tot diff f1....... 443  / 1000    p < 0.443  
count sample diff prec is twice tot diff prec..... 464  / 1000    p < 0.464  
count sample diff rec  is twice tot diff rec ..... 434  / 1000    p < 0.434  
count sample diff acc  is twice tot diff acc...... 452  / 1000    p < 0.452  


Seed: 165043843 - Split: 0 - education - Group: Associate degree in college (2-year)
data shape:  (2598, 1)
sample size: 1299


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 137.58it/s]



count sample diff f1   is twice tot diff f1....... 690  / 1000    p < 0.69   
count sample diff prec is twice tot diff prec..... 613  / 1000    p < 0.613  
count sample diff rec  is twice tot diff rec ..... 769  / 1000    p < 0.769  
count sample diff acc  is twice tot diff acc...... 340  / 1000    p < 0.34   


Seed: 165043843 - Split: 0 - education - Group: Some college but no degree
data shape:  (4560, 1)
sample size: 2280


bootstrap: 100%|███████████████████████████| 1000/1000 [00:09<00:00, 106.08it/s]



count sample diff f1   is twice tot diff f1....... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 3    / 1000    p < 0.003  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 0    / 1000    p < 0.0    [38;5;9m**[0m


Seed: 165043843 - Split: 0 - education - Group: Doctoral degree
data shape:  (320, 1)
sample size: 160


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 172.95it/s]



count sample diff f1   is twice tot diff f1....... 423  / 1000    p < 0.423  
count sample diff prec is twice tot diff prec..... 394  / 1000    p < 0.394  
count sample diff rec  is twice tot diff rec ..... 432  / 1000    p < 0.432  
count sample diff acc  is twice tot diff acc...... 432  / 1000    p < 0.432  


Seed: 165043843 - Split: 0 - education - Group: Less than high school degree
data shape:  (144, 1)
sample size: 72


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 181.07it/s]



count sample diff f1   is twice tot diff f1....... 605  / 1000    p < 0.605  
count sample diff prec is twice tot diff prec..... 524  / 1000    p < 0.524  
count sample diff rec  is twice tot diff rec ..... 623  / 1000    p < 0.623  
count sample diff acc  is twice tot diff acc...... 339  / 1000    p < 0.339  


Seed: 165043843 - Split: 0 - education - Group: Professional degree (JD, MD)
data shape:  (384, 1)
sample size: 192


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 155.77it/s]



count sample diff f1   is twice tot diff f1....... 57   / 1000    p < 0.057  
count sample diff prec is twice tot diff prec..... 44   / 1000    p < 0.044  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 79   / 1000    p < 0.079  
count sample diff acc  is twice tot diff acc...... 38   / 1000    p < 0.038  [38;5;9m*[0m


Seed: 165043843 - Split: 1 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10348, 1)
sample size: 5174


bootstrap: 100%|████████████████████████████| 1000/1000 [00:15<00:00, 63.74it/s]



count sample diff f1   is twice tot diff f1....... 790  / 1000    p < 0.79   
count sample diff prec is twice tot diff prec..... 819  / 1000    p < 0.819  
count sample diff rec  is twice tot diff rec ..... 744  / 1000    p < 0.744  
count sample diff acc  is twice tot diff acc...... 848  / 1000    p < 0.848  


Seed: 165043843 - Split: 1 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2075, 1)
sample size: 1037


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 153.11it/s]



count sample diff f1   is twice tot diff f1....... 495  / 1000    p < 0.495  
count sample diff prec is twice tot diff prec..... 650  / 1000    p < 0.65   
count sample diff rec  is twice tot diff rec ..... 419  / 1000    p < 0.419  
count sample diff acc  is twice tot diff acc...... 819  / 1000    p < 0.819  


Seed: 165043843 - Split: 1 - education - Group: Master's degree
data shape:  (3580, 1)
sample size: 1790


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 73.74it/s]



count sample diff f1   is twice tot diff f1....... 764  / 1000    p < 0.764  
count sample diff prec is twice tot diff prec..... 713  / 1000    p < 0.713  
count sample diff rec  is twice tot diff rec ..... 786  / 1000    p < 0.786  
count sample diff acc  is twice tot diff acc...... 719  / 1000    p < 0.719  


Seed: 165043843 - Split: 1 - education - Group: Associate degree in college (2-year)
data shape:  (2625, 1)
sample size: 1312


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 118.06it/s]



count sample diff f1   is twice tot diff f1....... 562  / 1000    p < 0.562  
count sample diff prec is twice tot diff prec..... 580  / 1000    p < 0.58   
count sample diff rec  is twice tot diff rec ..... 530  / 1000    p < 0.53   
count sample diff acc  is twice tot diff acc...... 618  / 1000    p < 0.618  


Seed: 165043843 - Split: 1 - education - Group: Some college but no degree
data shape:  (4654, 1)
sample size: 2327


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 112.19it/s]



count sample diff f1   is twice tot diff f1....... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 2    / 1000    p < 0.002  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 29   / 1000    p < 0.029  [38;5;9m*[0m


Seed: 165043843 - Split: 1 - education - Group: Doctoral degree
data shape:  (332, 1)
sample size: 166


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 185.32it/s]



count sample diff f1   is twice tot diff f1....... 570  / 1000    p < 0.57   
count sample diff prec is twice tot diff prec..... 519  / 1000    p < 0.519  
count sample diff rec  is twice tot diff rec ..... 616  / 1000    p < 0.616  
count sample diff acc  is twice tot diff acc...... 446  / 1000    p < 0.446  


Seed: 165043843 - Split: 1 - education - Group: Less than high school degree
data shape:  (179, 1)
sample size: 89


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 127.63it/s]



count sample diff f1   is twice tot diff f1....... 138  / 1000    p < 0.138  
count sample diff prec is twice tot diff prec..... 118  / 1000    p < 0.118  
count sample diff rec  is twice tot diff rec ..... 148  / 1000    p < 0.148  
count sample diff acc  is twice tot diff acc...... 134  / 1000    p < 0.134  


Seed: 165043843 - Split: 1 - education - Group: Professional degree (JD, MD)
data shape:  (400, 1)
sample size: 200


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 156.43it/s]



count sample diff f1   is twice tot diff f1....... 233  / 1000    p < 0.233  
count sample diff prec is twice tot diff prec..... 276  / 1000    p < 0.276  
count sample diff rec  is twice tot diff rec ..... 212  / 1000    p < 0.212  
count sample diff acc  is twice tot diff acc...... 295  / 1000    p < 0.295  


Seed: 165043843 - Split: 2 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10497, 1)
sample size: 5248


bootstrap: 100%|████████████████████████████| 1000/1000 [00:16<00:00, 61.89it/s]



count sample diff f1   is twice tot diff f1....... 21   / 1000    p < 0.021  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 6    / 1000    p < 0.006  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 62   / 1000    p < 0.062  
count sample diff acc  is twice tot diff acc...... 1    / 1000    p < 0.001  [38;5;9m**[0m


Seed: 165043843 - Split: 2 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2064, 1)
sample size: 1032


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 119.29it/s]



count sample diff f1   is twice tot diff f1....... 851  / 1000    p < 0.851  
count sample diff prec is twice tot diff prec..... 789  / 1000    p < 0.789  
count sample diff rec  is twice tot diff rec ..... 896  / 1000    p < 0.896  
count sample diff acc  is twice tot diff acc...... 591  / 1000    p < 0.591  


Seed: 165043843 - Split: 2 - education - Group: Master's degree
data shape:  (3487, 1)
sample size: 1743


bootstrap: 100%|████████████████████████████| 1000/1000 [00:11<00:00, 88.25it/s]



count sample diff f1   is twice tot diff f1....... 957  / 1000    p < 0.957  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 902  / 1000    p < 0.902  
count sample diff rec  is twice tot diff rec ..... 973  / 1000    p < 0.973  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 894  / 1000    p < 0.894  


Seed: 165043843 - Split: 2 - education - Group: Associate degree in college (2-year)
data shape:  (2546, 1)
sample size: 1273


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 120.20it/s]



count sample diff f1   is twice tot diff f1....... 994  / 1000    p < 0.994  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 990  / 1000    p < 0.99   [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 996  / 1000    p < 0.996  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 969  / 1000    p < 0.969  [38;5;8m![0m


Seed: 165043843 - Split: 2 - education - Group: Some college but no degree
data shape:  (4776, 1)
sample size: 2388


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 112.53it/s]



count sample diff f1   is twice tot diff f1....... 977  / 1000    p < 0.977  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 945  / 1000    p < 0.945  
count sample diff rec  is twice tot diff rec ..... 986  / 1000    p < 0.986  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 596  / 1000    p < 0.596  


Seed: 165043843 - Split: 2 - education - Group: Doctoral degree
data shape:  (270, 1)
sample size: 135


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 190.77it/s]



count sample diff f1   is twice tot diff f1....... 25   / 1000    p < 0.025  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 28   / 1000    p < 0.028  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 37   / 1000    p < 0.037  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 9    / 1000    p < 0.009  [38;5;9m**[0m


Seed: 165043843 - Split: 2 - education - Group: Less than high school degree
data shape:  (174, 1)
sample size: 87


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 160.79it/s]



count sample diff f1   is twice tot diff f1....... 421  / 1000    p < 0.421  
count sample diff prec is twice tot diff prec..... 399  / 1000    p < 0.399  
count sample diff rec  is twice tot diff rec ..... 440  / 1000    p < 0.44   
count sample diff acc  is twice tot diff acc...... 358  / 1000    p < 0.358  


Seed: 165043843 - Split: 2 - education - Group: Professional degree (JD, MD)
data shape:  (375, 1)
sample size: 187


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 123.64it/s]



count sample diff f1   is twice tot diff f1....... 651  / 1000    p < 0.651  
count sample diff prec is twice tot diff prec..... 650  / 1000    p < 0.65   
count sample diff rec  is twice tot diff rec ..... 611  / 1000    p < 0.611  
count sample diff acc  is twice tot diff acc...... 659  / 1000    p < 0.659  


Seed: 165043843 - Split: 3 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10495, 1)
sample size: 5247


bootstrap: 100%|████████████████████████████| 1000/1000 [00:18<00:00, 53.92it/s]



count sample diff f1   is twice tot diff f1....... 347  / 1000    p < 0.347  
count sample diff prec is twice tot diff prec..... 232  / 1000    p < 0.232  
count sample diff rec  is twice tot diff rec ..... 431  / 1000    p < 0.431  
count sample diff acc  is twice tot diff acc...... 160  / 1000    p < 0.16   


Seed: 165043843 - Split: 3 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2106, 1)
sample size: 1053


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 132.95it/s]



count sample diff f1   is twice tot diff f1....... 688  / 1000    p < 0.688  
count sample diff prec is twice tot diff prec..... 738  / 1000    p < 0.738  
count sample diff rec  is twice tot diff rec ..... 663  / 1000    p < 0.663  
count sample diff acc  is twice tot diff acc...... 778  / 1000    p < 0.778  


Seed: 165043843 - Split: 3 - education - Group: Master's degree
data shape:  (3568, 1)
sample size: 1784


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 125.90it/s]



count sample diff f1   is twice tot diff f1....... 676  / 1000    p < 0.676  
count sample diff prec is twice tot diff prec..... 509  / 1000    p < 0.509  
count sample diff rec  is twice tot diff rec ..... 745  / 1000    p < 0.745  
count sample diff acc  is twice tot diff acc...... 488  / 1000    p < 0.488  


Seed: 165043843 - Split: 3 - education - Group: Associate degree in college (2-year)
data shape:  (2651, 1)
sample size: 1325


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 153.36it/s]



count sample diff f1   is twice tot diff f1....... 232  / 1000    p < 0.232  
count sample diff prec is twice tot diff prec..... 140  / 1000    p < 0.14   
count sample diff rec  is twice tot diff rec ..... 288  / 1000    p < 0.288  
count sample diff acc  is twice tot diff acc...... 44   / 1000    p < 0.044  [38;5;9m*[0m


Seed: 165043843 - Split: 3 - education - Group: Some college but no degree
data shape:  (4670, 1)
sample size: 2335


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 114.00it/s]



count sample diff f1   is twice tot diff f1....... 401  / 1000    p < 0.401  
count sample diff prec is twice tot diff prec..... 337  / 1000    p < 0.337  
count sample diff rec  is twice tot diff rec ..... 473  / 1000    p < 0.473  
count sample diff acc  is twice tot diff acc...... 154  / 1000    p < 0.154  


Seed: 165043843 - Split: 3 - education - Group: Doctoral degree
data shape:  (298, 1)
sample size: 149


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 186.75it/s]



count sample diff f1   is twice tot diff f1....... 738  / 1000    p < 0.738  
count sample diff prec is twice tot diff prec..... 642  / 1000    p < 0.642  
count sample diff rec  is twice tot diff rec ..... 794  / 1000    p < 0.794  
count sample diff acc  is twice tot diff acc...... 429  / 1000    p < 0.429  


Seed: 165043843 - Split: 3 - education - Group: Less than high school degree
data shape:  (163, 1)
sample size: 81


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 199.12it/s]



count sample diff f1   is twice tot diff f1....... 134  / 1000    p < 0.134  
count sample diff prec is twice tot diff prec..... 155  / 1000    p < 0.155  
count sample diff rec  is twice tot diff rec ..... 147  / 1000    p < 0.147  
count sample diff acc  is twice tot diff acc...... 282  / 1000    p < 0.282  


Seed: 165043843 - Split: 3 - education - Group: Professional degree (JD, MD)
data shape:  (361, 1)
sample size: 180


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 209.13it/s]



count sample diff f1   is twice tot diff f1....... 449  / 1000    p < 0.449  
count sample diff prec is twice tot diff prec..... 385  / 1000    p < 0.385  
count sample diff rec  is twice tot diff rec ..... 475  / 1000    p < 0.475  
count sample diff acc  is twice tot diff acc...... 341  / 1000    p < 0.341  


Seed: 2923262358 - Split: 0 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10517, 1)
sample size: 5258


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 69.16it/s]



count sample diff f1   is twice tot diff f1....... 299  / 1000    p < 0.299  
count sample diff prec is twice tot diff prec..... 306  / 1000    p < 0.306  
count sample diff rec  is twice tot diff rec ..... 283  / 1000    p < 0.283  
count sample diff acc  is twice tot diff acc...... 331  / 1000    p < 0.331  


Seed: 2923262358 - Split: 0 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2015, 1)
sample size: 1007


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 143.21it/s]



count sample diff f1   is twice tot diff f1....... 139  / 1000    p < 0.139  
count sample diff prec is twice tot diff prec..... 144  / 1000    p < 0.144  
count sample diff rec  is twice tot diff rec ..... 111  / 1000    p < 0.111  
count sample diff acc  is twice tot diff acc...... 249  / 1000    p < 0.249  


Seed: 2923262358 - Split: 0 - education - Group: Master's degree
data shape:  (3562, 1)
sample size: 1781


bootstrap: 100%|████████████████████████████| 1000/1000 [00:10<00:00, 92.34it/s]



count sample diff f1   is twice tot diff f1....... 184  / 1000    p < 0.184  
count sample diff prec is twice tot diff prec..... 177  / 1000    p < 0.177  
count sample diff rec  is twice tot diff rec ..... 213  / 1000    p < 0.213  
count sample diff acc  is twice tot diff acc...... 165  / 1000    p < 0.165  


Seed: 2923262358 - Split: 0 - education - Group: Associate degree in college (2-year)
data shape:  (2619, 1)
sample size: 1309


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 152.69it/s]



count sample diff f1   is twice tot diff f1....... 118  / 1000    p < 0.118  
count sample diff prec is twice tot diff prec..... 146  / 1000    p < 0.146  
count sample diff rec  is twice tot diff rec ..... 111  / 1000    p < 0.111  
count sample diff acc  is twice tot diff acc...... 219  / 1000    p < 0.219  


Seed: 2923262358 - Split: 0 - education - Group: Some college but no degree
data shape:  (4698, 1)
sample size: 2349


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 115.14it/s]



count sample diff f1   is twice tot diff f1....... 321  / 1000    p < 0.321  
count sample diff prec is twice tot diff prec..... 313  / 1000    p < 0.313  
count sample diff rec  is twice tot diff rec ..... 342  / 1000    p < 0.342  
count sample diff acc  is twice tot diff acc...... 271  / 1000    p < 0.271  


Seed: 2923262358 - Split: 0 - education - Group: Doctoral degree
data shape:  (306, 1)
sample size: 153


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 203.05it/s]



count sample diff f1   is twice tot diff f1....... 163  / 1000    p < 0.163  
count sample diff prec is twice tot diff prec..... 24   / 1000    p < 0.024  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 417  / 1000    p < 0.417  
count sample diff acc  is twice tot diff acc...... 10   / 1000    p < 0.01   [38;5;9m*[0m


Seed: 2923262358 - Split: 0 - education - Group: Less than high school degree
data shape:  (158, 1)
sample size: 79


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 210.14it/s]



count sample diff f1   is twice tot diff f1....... 202  / 1000    p < 0.202  
count sample diff prec is twice tot diff prec..... 146  / 1000    p < 0.146  
count sample diff rec  is twice tot diff rec ..... 349  / 1000    p < 0.349  
count sample diff acc  is twice tot diff acc...... 47   / 1000    p < 0.047  [38;5;9m*[0m


Seed: 2923262358 - Split: 0 - education - Group: Professional degree (JD, MD)
data shape:  (352, 1)
sample size: 176


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 205.91it/s]



count sample diff f1   is twice tot diff f1....... 580  / 1000    p < 0.58   
count sample diff prec is twice tot diff prec..... 611  / 1000    p < 0.611  
count sample diff rec  is twice tot diff rec ..... 547  / 1000    p < 0.547  
count sample diff acc  is twice tot diff acc...... 648  / 1000    p < 0.648  


Seed: 2923262358 - Split: 1 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10498, 1)
sample size: 5249


bootstrap: 100%|████████████████████████████| 1000/1000 [00:13<00:00, 71.72it/s]



count sample diff f1   is twice tot diff f1....... 950  / 1000    p < 0.95   
count sample diff prec is twice tot diff prec..... 933  / 1000    p < 0.933  
count sample diff rec  is twice tot diff rec ..... 955  / 1000    p < 0.955  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 917  / 1000    p < 0.917  


Seed: 2923262358 - Split: 1 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2139, 1)
sample size: 1069


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 156.83it/s]



count sample diff f1   is twice tot diff f1....... 638  / 1000    p < 0.638  
count sample diff prec is twice tot diff prec..... 640  / 1000    p < 0.64   
count sample diff rec  is twice tot diff rec ..... 632  / 1000    p < 0.632  
count sample diff acc  is twice tot diff acc...... 631  / 1000    p < 0.631  


Seed: 2923262358 - Split: 1 - education - Group: Master's degree
data shape:  (3486, 1)
sample size: 1743


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 129.17it/s]



count sample diff f1   is twice tot diff f1....... 239  / 1000    p < 0.239  
count sample diff prec is twice tot diff prec..... 204  / 1000    p < 0.204  
count sample diff rec  is twice tot diff rec ..... 245  / 1000    p < 0.245  
count sample diff acc  is twice tot diff acc...... 197  / 1000    p < 0.197  


Seed: 2923262358 - Split: 1 - education - Group: Associate degree in college (2-year)
data shape:  (2588, 1)
sample size: 1294


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 155.21it/s]



count sample diff f1   is twice tot diff f1....... 768  / 1000    p < 0.768  
count sample diff prec is twice tot diff prec..... 867  / 1000    p < 0.867  
count sample diff rec  is twice tot diff rec ..... 719  / 1000    p < 0.719  
count sample diff acc  is twice tot diff acc...... 950  / 1000    p < 0.95   


Seed: 2923262358 - Split: 1 - education - Group: Some college but no degree
data shape:  (4666, 1)
sample size: 2333


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 122.92it/s]



count sample diff f1   is twice tot diff f1....... 774  / 1000    p < 0.774  
count sample diff prec is twice tot diff prec..... 725  / 1000    p < 0.725  
count sample diff rec  is twice tot diff rec ..... 822  / 1000    p < 0.822  
count sample diff acc  is twice tot diff acc...... 392  / 1000    p < 0.392  


Seed: 2923262358 - Split: 1 - education - Group: Doctoral degree
data shape:  (295, 1)
sample size: 147


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 203.76it/s]



count sample diff f1   is twice tot diff f1....... 825  / 1000    p < 0.825  
count sample diff prec is twice tot diff prec..... 562  / 1000    p < 0.562  
count sample diff rec  is twice tot diff rec ..... 855  / 1000    p < 0.855  
count sample diff acc  is twice tot diff acc...... 391  / 1000    p < 0.391  


Seed: 2923262358 - Split: 1 - education - Group: Less than high school degree
data shape:  (164, 1)
sample size: 82


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 216.29it/s]



count sample diff f1   is twice tot diff f1....... 557  / 1000    p < 0.557  
count sample diff prec is twice tot diff prec..... 622  / 1000    p < 0.622  
count sample diff rec  is twice tot diff rec ..... 575  / 1000    p < 0.575  
count sample diff acc  is twice tot diff acc...... 550  / 1000    p < 0.55   


Seed: 2923262358 - Split: 1 - education - Group: Professional degree (JD, MD)
data shape:  (378, 1)
sample size: 189


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 199.21it/s]



count sample diff f1   is twice tot diff f1....... 754  / 1000    p < 0.754  
count sample diff prec is twice tot diff prec..... 644  / 1000    p < 0.644  
count sample diff rec  is twice tot diff rec ..... 753  / 1000    p < 0.753  
count sample diff acc  is twice tot diff acc...... 669  / 1000    p < 0.669  


Seed: 2923262358 - Split: 2 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10478, 1)
sample size: 5239


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 66.99it/s]



count sample diff f1   is twice tot diff f1....... 896  / 1000    p < 0.896  
count sample diff prec is twice tot diff prec..... 873  / 1000    p < 0.873  
count sample diff rec  is twice tot diff rec ..... 909  / 1000    p < 0.909  
count sample diff acc  is twice tot diff acc...... 853  / 1000    p < 0.853  


Seed: 2923262358 - Split: 2 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2076, 1)
sample size: 1038


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 163.77it/s]



count sample diff f1   is twice tot diff f1....... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 993  / 1000    p < 0.993  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 981  / 1000    p < 0.981  [38;5;8m![0m


Seed: 2923262358 - Split: 2 - education - Group: Master's degree
data shape:  (3516, 1)
sample size: 1758


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 131.80it/s]



count sample diff f1   is twice tot diff f1....... 572  / 1000    p < 0.572  
count sample diff prec is twice tot diff prec..... 512  / 1000    p < 0.512  
count sample diff rec  is twice tot diff rec ..... 598  / 1000    p < 0.598  
count sample diff acc  is twice tot diff acc...... 506  / 1000    p < 0.506  


Seed: 2923262358 - Split: 2 - education - Group: Associate degree in college (2-year)
data shape:  (2575, 1)
sample size: 1287


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 153.86it/s]



count sample diff f1   is twice tot diff f1....... 982  / 1000    p < 0.982  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 981  / 1000    p < 0.981  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 980  / 1000    p < 0.98   [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 965  / 1000    p < 0.965  [38;5;8m![0m


Seed: 2923262358 - Split: 2 - education - Group: Some college but no degree
data shape:  (4686, 1)
sample size: 2343


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 116.52it/s]



count sample diff f1   is twice tot diff f1....... 873  / 1000    p < 0.873  
count sample diff prec is twice tot diff prec..... 861  / 1000    p < 0.861  
count sample diff rec  is twice tot diff rec ..... 877  / 1000    p < 0.877  
count sample diff acc  is twice tot diff acc...... 794  / 1000    p < 0.794  


Seed: 2923262358 - Split: 2 - education - Group: Doctoral degree
data shape:  (308, 1)
sample size: 154


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 210.03it/s]



count sample diff f1   is twice tot diff f1....... 941  / 1000    p < 0.941  
count sample diff prec is twice tot diff prec..... 921  / 1000    p < 0.921  
count sample diff rec  is twice tot diff rec ..... 948  / 1000    p < 0.948  
count sample diff acc  is twice tot diff acc...... 882  / 1000    p < 0.882  


Seed: 2923262358 - Split: 2 - education - Group: Less than high school degree
data shape:  (168, 1)
sample size: 84


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 212.75it/s]



count sample diff f1   is twice tot diff f1....... 224  / 1000    p < 0.224  
count sample diff prec is twice tot diff prec..... 138  / 1000    p < 0.138  
count sample diff rec  is twice tot diff rec ..... 236  / 1000    p < 0.236  
count sample diff acc  is twice tot diff acc...... 77   / 1000    p < 0.077  


Seed: 2923262358 - Split: 2 - education - Group: Professional degree (JD, MD)
data shape:  (402, 1)
sample size: 201


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 205.53it/s]



count sample diff f1   is twice tot diff f1....... 866  / 1000    p < 0.866  
count sample diff prec is twice tot diff prec..... 891  / 1000    p < 0.891  
count sample diff rec  is twice tot diff rec ..... 843  / 1000    p < 0.843  
count sample diff acc  is twice tot diff acc...... 918  / 1000    p < 0.918  


Seed: 2923262358 - Split: 3 - education - Group: Bachelor's degree in college (4-year)
data shape:  (10547, 1)
sample size: 5273


bootstrap: 100%|████████████████████████████| 1000/1000 [00:14<00:00, 70.80it/s]



count sample diff f1   is twice tot diff f1....... 597  / 1000    p < 0.597  
count sample diff prec is twice tot diff prec..... 667  / 1000    p < 0.667  
count sample diff rec  is twice tot diff rec ..... 507  / 1000    p < 0.507  
count sample diff acc  is twice tot diff acc...... 752  / 1000    p < 0.752  


Seed: 2923262358 - Split: 3 - education - Group: High school graduate (high school diploma or equivalent including GED)
data shape:  (2090, 1)
sample size: 1045


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 163.74it/s]



count sample diff f1   is twice tot diff f1....... 70   / 1000    p < 0.07   
count sample diff prec is twice tot diff prec..... 83   / 1000    p < 0.083  
count sample diff rec  is twice tot diff rec ..... 56   / 1000    p < 0.056  
count sample diff acc  is twice tot diff acc...... 140  / 1000    p < 0.14   


Seed: 2923262358 - Split: 3 - education - Group: Master's degree
data shape:  (3496, 1)
sample size: 1748


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 130.14it/s]



count sample diff f1   is twice tot diff f1....... 65   / 1000    p < 0.065  
count sample diff prec is twice tot diff prec..... 50   / 1000    p < 0.05   
count sample diff rec  is twice tot diff rec ..... 82   / 1000    p < 0.082  
count sample diff acc  is twice tot diff acc...... 47   / 1000    p < 0.047  [38;5;9m*[0m


Seed: 2923262358 - Split: 3 - education - Group: Associate degree in college (2-year)
data shape:  (2638, 1)
sample size: 1319


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 155.24it/s]



count sample diff f1   is twice tot diff f1....... 121  / 1000    p < 0.121  
count sample diff prec is twice tot diff prec..... 122  / 1000    p < 0.122  
count sample diff rec  is twice tot diff rec ..... 111  / 1000    p < 0.111  
count sample diff acc  is twice tot diff acc...... 156  / 1000    p < 0.156  


Seed: 2923262358 - Split: 3 - education - Group: Some college but no degree
data shape:  (4610, 1)
sample size: 2305


bootstrap: 100%|███████████████████████████| 1000/1000 [00:08<00:00, 118.80it/s]



count sample diff f1   is twice tot diff f1....... 131  / 1000    p < 0.131  
count sample diff prec is twice tot diff prec..... 128  / 1000    p < 0.128  
count sample diff rec  is twice tot diff rec ..... 148  / 1000    p < 0.148  
count sample diff acc  is twice tot diff acc...... 116  / 1000    p < 0.116  


Seed: 2923262358 - Split: 3 - education - Group: Doctoral degree
data shape:  (311, 1)
sample size: 155


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 201.94it/s]



count sample diff f1   is twice tot diff f1....... 24   / 1000    p < 0.024  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 32   / 1000    p < 0.032  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 65   / 1000    p < 0.065  
count sample diff acc  is twice tot diff acc...... 10   / 1000    p < 0.01   [38;5;9m*[0m


Seed: 2923262358 - Split: 3 - education - Group: Less than high school degree
data shape:  (170, 1)
sample size: 85


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 206.34it/s]



count sample diff f1   is twice tot diff f1....... 54   / 1000    p < 0.054  
count sample diff prec is twice tot diff prec..... 70   / 1000    p < 0.07   
count sample diff rec  is twice tot diff rec ..... 50   / 1000    p < 0.05   
count sample diff acc  is twice tot diff acc...... 97   / 1000    p < 0.097  


Seed: 2923262358 - Split: 3 - education - Group: Professional degree (JD, MD)
data shape:  (388, 1)
sample size: 194


bootstrap: 100%|███████████████████████████| 1000/1000 [00:04<00:00, 208.17it/s]


count sample diff f1   is twice tot diff f1....... 431  / 1000    p < 0.431  
count sample diff prec is twice tot diff prec..... 433  / 1000    p < 0.433  
count sample diff rec  is twice tot diff rec ..... 442  / 1000    p < 0.442  
count sample diff acc  is twice tot diff acc...... 442  / 1000    p < 0.442  





In [15]:
education_ks = k_estimator_groups(education_p_dicts)
education_ks

{'Doctoral degree': {'k_count': 2, 'k_bonferroni': -1},
 "Bachelor's degree in college (4-year)": {'k_count': 1, 'k_bonferroni': -1},
 'Professional degree (JD, MD)': {'k_count': -1, 'k_bonferroni': -1},
 'Less than high school degree': {'k_count': -1, 'k_bonferroni': -1},
 'High school graduate (high school diploma or equivalent including GED)': {'k_count': -1,
  'k_bonferroni': -1},
 'Some college but no degree': {'k_count': 2, 'k_bonferroni': 2},
 "Master's degree": {'k_count': -1, 'k_bonferroni': -1},
 'Associate degree in college (2-year)': {'k_count': -1, 'k_bonferroni': -1}}

In [16]:
education_ks_df = pd.DataFrame(education_ks).T.sort_index()
education_ks_df = education_ks_df.replace({-1: 0})
rename_mapping = {
        'Less than high school degree': 'Belowhigh school', 
        'Some college but no degree': 'College, no degree',
        'Associate degree in college (2-year)': 'Associate degree',
        "Bachelor's degree in college (4-year)":  "Bachelor's degree",
        'High school graduate (high school diploma or equivalent including GED)': 'High school',
        'Professional degree (JD, MD)': 'Professional degree'
    }
education_ks_df.index = education_ks_df.index.map(lambda i: rename_mapping[i] if i in rename_mapping else i)
education_ks_df = education_ks_df.rename(columns={'k_count': '$\hat{k}_{count}$', 'k_bonferroni':'$\hat{k}_{Bonf.}$'})
education_ks_df

Unnamed: 0,$\hat{k}_{count}$,$\hat{k}_{Bonf.}$
Associate degree,0,0
Bachelor's degree,1,0
Doctoral degree,2,0
High school,0,0
Belowhigh school,0,0
Master's degree,0,0
Professional degree,0,0
"College, no degree",2,2


In [17]:
education_ks_df.to_latex('../tables/replication/baseline/education.tex', escape=False)

### Sexual Orientation

In [18]:
lgbq_p_dicts = significance_test_per_group(df, attribute_a='no extra layers', attribute_b='lgbq', to_groups=to_groups, attributes=['lgbtq_status'])



Seed: 2803636207 - Split: 0 - lgbtq_status - Group: Heterosexual
data shape:  (22623, 1)
sample size: 11311


bootstrap: 100%|████████████████████████████| 1000/1000 [00:25<00:00, 39.79it/s]



count sample diff f1   is twice tot diff f1....... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 993  / 1000    p < 0.993  [38;5;8m![0m


Seed: 2803636207 - Split: 0 - lgbtq_status - Group: Homosexual
data shape:  (739, 1)
sample size: 369


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 190.58it/s]



count sample diff f1   is twice tot diff f1....... 669  / 1000    p < 0.669  
count sample diff prec is twice tot diff prec..... 675  / 1000    p < 0.675  
count sample diff rec  is twice tot diff rec ..... 653  / 1000    p < 0.653  
count sample diff acc  is twice tot diff acc...... 662  / 1000    p < 0.662  


Seed: 2803636207 - Split: 0 - lgbtq_status - Group: Bisexual
data shape:  (2418, 1)
sample size: 1209


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 131.73it/s]



count sample diff f1   is twice tot diff f1....... 290  / 1000    p < 0.29   
count sample diff prec is twice tot diff prec..... 222  / 1000    p < 0.222  
count sample diff rec  is twice tot diff rec ..... 309  / 1000    p < 0.309  
count sample diff acc  is twice tot diff acc...... 243  / 1000    p < 0.243  


Seed: 2803636207 - Split: 1 - lgbtq_status - Group: Heterosexual
data shape:  (22678, 1)
sample size: 11339


bootstrap: 100%|████████████████████████████| 1000/1000 [00:26<00:00, 37.85it/s]



count sample diff f1   is twice tot diff f1....... 805  / 1000    p < 0.805  
count sample diff prec is twice tot diff prec..... 866  / 1000    p < 0.866  
count sample diff rec  is twice tot diff rec ..... 642  / 1000    p < 0.642  
count sample diff acc  is twice tot diff acc...... 974  / 1000    p < 0.974  [38;5;8m![0m


Seed: 2803636207 - Split: 1 - lgbtq_status - Group: Homosexual
data shape:  (686, 1)
sample size: 343


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 179.15it/s]



count sample diff f1   is twice tot diff f1....... 681  / 1000    p < 0.681  
count sample diff prec is twice tot diff prec..... 578  / 1000    p < 0.578  
count sample diff rec  is twice tot diff rec ..... 841  / 1000    p < 0.841  
count sample diff acc  is twice tot diff acc...... 306  / 1000    p < 0.306  


Seed: 2803636207 - Split: 1 - lgbtq_status - Group: Bisexual
data shape:  (2450, 1)
sample size: 1225


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 141.75it/s]



count sample diff f1   is twice tot diff f1....... 971  / 1000    p < 0.971  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 940  / 1000    p < 0.94   
count sample diff rec  is twice tot diff rec ..... 977  / 1000    p < 0.977  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 948  / 1000    p < 0.948  


Seed: 2803636207 - Split: 2 - lgbtq_status - Group: Heterosexual
data shape:  (22603, 1)
sample size: 11301


bootstrap: 100%|████████████████████████████| 1000/1000 [00:25<00:00, 39.29it/s]



count sample diff f1   is twice tot diff f1....... 998  / 1000    p < 0.998  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 998  / 1000    p < 0.998  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 993  / 1000    p < 0.993  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 998  / 1000    p < 0.998  [38;5;8m![0m


Seed: 2803636207 - Split: 2 - lgbtq_status - Group: Homosexual
data shape:  (733, 1)
sample size: 366


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 184.37it/s]



count sample diff f1   is twice tot diff f1....... 233  / 1000    p < 0.233  
count sample diff prec is twice tot diff prec..... 186  / 1000    p < 0.186  
count sample diff rec  is twice tot diff rec ..... 293  / 1000    p < 0.293  
count sample diff acc  is twice tot diff acc...... 145  / 1000    p < 0.145  


Seed: 2803636207 - Split: 2 - lgbtq_status - Group: Bisexual
data shape:  (2478, 1)
sample size: 1239


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 148.08it/s]



count sample diff f1   is twice tot diff f1....... 762  / 1000    p < 0.762  
count sample diff prec is twice tot diff prec..... 741  / 1000    p < 0.741  
count sample diff rec  is twice tot diff rec ..... 777  / 1000    p < 0.777  
count sample diff acc  is twice tot diff acc...... 737  / 1000    p < 0.737  


Seed: 2803636207 - Split: 3 - lgbtq_status - Group: Heterosexual
data shape:  (22616, 1)
sample size: 11308


bootstrap: 100%|████████████████████████████| 1000/1000 [00:24<00:00, 40.30it/s]



count sample diff f1   is twice tot diff f1....... 957  / 1000    p < 0.957  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 946  / 1000    p < 0.946  
count sample diff rec  is twice tot diff rec ..... 972  / 1000    p < 0.972  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 920  / 1000    p < 0.92   


Seed: 2803636207 - Split: 3 - lgbtq_status - Group: Homosexual
data shape:  (742, 1)
sample size: 371


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 187.66it/s]



count sample diff f1   is twice tot diff f1....... 423  / 1000    p < 0.423  
count sample diff prec is twice tot diff prec..... 403  / 1000    p < 0.403  
count sample diff rec  is twice tot diff rec ..... 430  / 1000    p < 0.43   
count sample diff acc  is twice tot diff acc...... 398  / 1000    p < 0.398  


Seed: 2803636207 - Split: 3 - lgbtq_status - Group: Bisexual
data shape:  (2434, 1)
sample size: 1217


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 149.70it/s]



count sample diff f1   is twice tot diff f1....... 745  / 1000    p < 0.745  
count sample diff prec is twice tot diff prec..... 750  / 1000    p < 0.75   
count sample diff rec  is twice tot diff rec ..... 763  / 1000    p < 0.763  
count sample diff acc  is twice tot diff acc...... 731  / 1000    p < 0.731  


Seed: 165043843 - Split: 0 - lgbtq_status - Group: Heterosexual
data shape:  (22615, 1)
sample size: 11307


bootstrap: 100%|████████████████████████████| 1000/1000 [00:24<00:00, 40.13it/s]



count sample diff f1   is twice tot diff f1....... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 2    / 1000    p < 0.002  [38;5;9m**[0m
count sample diff rec  is twice tot diff rec ..... 1    / 1000    p < 0.001  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 3    / 1000    p < 0.003  [38;5;9m**[0m


Seed: 165043843 - Split: 0 - lgbtq_status - Group: Homosexual
data shape:  (742, 1)
sample size: 371


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 198.59it/s]



count sample diff f1   is twice tot diff f1....... 264  / 1000    p < 0.264  
count sample diff prec is twice tot diff prec..... 292  / 1000    p < 0.292  
count sample diff rec  is twice tot diff rec ..... 249  / 1000    p < 0.249  
count sample diff acc  is twice tot diff acc...... 289  / 1000    p < 0.289  


Seed: 165043843 - Split: 0 - lgbtq_status - Group: Bisexual
data shape:  (2387, 1)
sample size: 1193


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 152.38it/s]



count sample diff f1   is twice tot diff f1....... 953  / 1000    p < 0.953  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 932  / 1000    p < 0.932  
count sample diff rec  is twice tot diff rec ..... 951  / 1000    p < 0.951  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 940  / 1000    p < 0.94   


Seed: 165043843 - Split: 1 - lgbtq_status - Group: Heterosexual
data shape:  (22659, 1)
sample size: 11329


bootstrap: 100%|████████████████████████████| 1000/1000 [00:24<00:00, 40.38it/s]



count sample diff f1   is twice tot diff f1....... 2    / 1000    p < 0.002  [38;5;9m**[0m
count sample diff prec is twice tot diff prec..... 11   / 1000    p < 0.011  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 0    / 1000    p < 0.0    [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 57   / 1000    p < 0.057  


Seed: 165043843 - Split: 1 - lgbtq_status - Group: Homosexual
data shape:  (729, 1)
sample size: 364


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 190.99it/s]



count sample diff f1   is twice tot diff f1....... 49   / 1000    p < 0.049  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 30   / 1000    p < 0.03   [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 61   / 1000    p < 0.061  
count sample diff acc  is twice tot diff acc...... 30   / 1000    p < 0.03   [38;5;9m*[0m


Seed: 165043843 - Split: 1 - lgbtq_status - Group: Bisexual
data shape:  (2413, 1)
sample size: 1206


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 151.87it/s]



count sample diff f1   is twice tot diff f1....... 968  / 1000    p < 0.968  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 958  / 1000    p < 0.958  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 967  / 1000    p < 0.967  [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 964  / 1000    p < 0.964  [38;5;8m![0m


Seed: 165043843 - Split: 2 - lgbtq_status - Group: Heterosexual
data shape:  (22538, 1)
sample size: 11269


bootstrap: 100%|████████████████████████████| 1000/1000 [00:25<00:00, 39.58it/s]



count sample diff f1   is twice tot diff f1....... 175  / 1000    p < 0.175  
count sample diff prec is twice tot diff prec..... 138  / 1000    p < 0.138  
count sample diff rec  is twice tot diff rec ..... 253  / 1000    p < 0.253  
count sample diff acc  is twice tot diff acc...... 73   / 1000    p < 0.073  


Seed: 165043843 - Split: 2 - lgbtq_status - Group: Homosexual
data shape:  (759, 1)
sample size: 379


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 192.92it/s]



count sample diff f1   is twice tot diff f1....... 83   / 1000    p < 0.083  
count sample diff prec is twice tot diff prec..... 86   / 1000    p < 0.086  
count sample diff rec  is twice tot diff rec ..... 86   / 1000    p < 0.086  
count sample diff acc  is twice tot diff acc...... 111  / 1000    p < 0.111  


Seed: 165043843 - Split: 2 - lgbtq_status - Group: Bisexual
data shape:  (2479, 1)
sample size: 1239


bootstrap: 100%|███████████████████████████| 1000/1000 [00:07<00:00, 142.80it/s]



count sample diff f1   is twice tot diff f1....... 314  / 1000    p < 0.314  
count sample diff prec is twice tot diff prec..... 348  / 1000    p < 0.348  
count sample diff rec  is twice tot diff rec ..... 306  / 1000    p < 0.306  
count sample diff acc  is twice tot diff acc...... 330  / 1000    p < 0.33   


Seed: 165043843 - Split: 3 - lgbtq_status - Group: Heterosexual
data shape:  (22708, 1)
sample size: 11354


bootstrap: 100%|████████████████████████████| 1000/1000 [00:24<00:00, 40.13it/s]



count sample diff f1   is twice tot diff f1....... 997  / 1000    p < 0.997  [38;5;8m![0m
count sample diff prec is twice tot diff prec..... 953  / 1000    p < 0.953  [38;5;8m![0m
count sample diff rec  is twice tot diff rec ..... 1000 / 1000    p < 1.0    [38;5;8m![0m
count sample diff acc  is twice tot diff acc...... 782  / 1000    p < 0.782  


Seed: 165043843 - Split: 3 - lgbtq_status - Group: Homosexual
data shape:  (670, 1)
sample size: 335


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 197.33it/s]



count sample diff f1   is twice tot diff f1....... 632  / 1000    p < 0.632  
count sample diff prec is twice tot diff prec..... 610  / 1000    p < 0.61   
count sample diff rec  is twice tot diff rec ..... 644  / 1000    p < 0.644  
count sample diff acc  is twice tot diff acc...... 606  / 1000    p < 0.606  


Seed: 165043843 - Split: 3 - lgbtq_status - Group: Bisexual
data shape:  (2501, 1)
sample size: 1250


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 144.93it/s]



count sample diff f1   is twice tot diff f1....... 40   / 1000    p < 0.04   [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 34   / 1000    p < 0.034  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 40   / 1000    p < 0.04   [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 35   / 1000    p < 0.035  [38;5;9m*[0m


Seed: 2923262358 - Split: 0 - lgbtq_status - Group: Heterosexual
data shape:  (22649, 1)
sample size: 11324


bootstrap: 100%|████████████████████████████| 1000/1000 [00:25<00:00, 38.79it/s]



count sample diff f1   is twice tot diff f1....... 186  / 1000    p < 0.186  
count sample diff prec is twice tot diff prec..... 297  / 1000    p < 0.297  
count sample diff rec  is twice tot diff rec ..... 68   / 1000    p < 0.068  
count sample diff acc  is twice tot diff acc...... 671  / 1000    p < 0.671  


Seed: 2923262358 - Split: 0 - lgbtq_status - Group: Homosexual
data shape:  (709, 1)
sample size: 354


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 189.98it/s]



count sample diff f1   is twice tot diff f1....... 187  / 1000    p < 0.187  
count sample diff prec is twice tot diff prec..... 191  / 1000    p < 0.191  
count sample diff rec  is twice tot diff rec ..... 185  / 1000    p < 0.185  
count sample diff acc  is twice tot diff acc...... 187  / 1000    p < 0.187  


Seed: 2923262358 - Split: 0 - lgbtq_status - Group: Bisexual
data shape:  (2444, 1)
sample size: 1222


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 143.72it/s]



count sample diff f1   is twice tot diff f1....... 387  / 1000    p < 0.387  
count sample diff prec is twice tot diff prec..... 348  / 1000    p < 0.348  
count sample diff rec  is twice tot diff rec ..... 415  / 1000    p < 0.415  
count sample diff acc  is twice tot diff acc...... 359  / 1000    p < 0.359  


Seed: 2923262358 - Split: 1 - lgbtq_status - Group: Heterosexual
data shape:  (22726, 1)
sample size: 11363


bootstrap: 100%|████████████████████████████| 1000/1000 [00:25<00:00, 38.96it/s]



count sample diff f1   is twice tot diff f1....... 49   / 1000    p < 0.049  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 178  / 1000    p < 0.178  
count sample diff rec  is twice tot diff rec ..... 7    / 1000    p < 0.007  [38;5;9m**[0m
count sample diff acc  is twice tot diff acc...... 744  / 1000    p < 0.744  


Seed: 2923262358 - Split: 1 - lgbtq_status - Group: Homosexual
data shape:  (726, 1)
sample size: 363


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 187.39it/s]



count sample diff f1   is twice tot diff f1....... 172  / 1000    p < 0.172  
count sample diff prec is twice tot diff prec..... 255  / 1000    p < 0.255  
count sample diff rec  is twice tot diff rec ..... 131  / 1000    p < 0.131  
count sample diff acc  is twice tot diff acc...... 310  / 1000    p < 0.31   


Seed: 2923262358 - Split: 1 - lgbtq_status - Group: Bisexual
data shape:  (2383, 1)
sample size: 1191


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 145.94it/s]



count sample diff f1   is twice tot diff f1....... 420  / 1000    p < 0.42   
count sample diff prec is twice tot diff prec..... 429  / 1000    p < 0.429  
count sample diff rec  is twice tot diff rec ..... 416  / 1000    p < 0.416  
count sample diff acc  is twice tot diff acc...... 426  / 1000    p < 0.426  


Seed: 2923262358 - Split: 2 - lgbtq_status - Group: Heterosexual
data shape:  (22507, 1)
sample size: 11253


bootstrap: 100%|████████████████████████████| 1000/1000 [00:25<00:00, 39.69it/s]



count sample diff f1   is twice tot diff f1....... 283  / 1000    p < 0.283  
count sample diff prec is twice tot diff prec..... 478  / 1000    p < 0.478  
count sample diff rec  is twice tot diff rec ..... 110  / 1000    p < 0.11   
count sample diff acc  is twice tot diff acc...... 838  / 1000    p < 0.838  


Seed: 2923262358 - Split: 2 - lgbtq_status - Group: Homosexual
data shape:  (753, 1)
sample size: 376


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 195.22it/s]



count sample diff f1   is twice tot diff f1....... 139  / 1000    p < 0.139  
count sample diff prec is twice tot diff prec..... 191  / 1000    p < 0.191  
count sample diff rec  is twice tot diff rec ..... 116  / 1000    p < 0.116  
count sample diff acc  is twice tot diff acc...... 235  / 1000    p < 0.235  


Seed: 2923262358 - Split: 2 - lgbtq_status - Group: Bisexual
data shape:  (2492, 1)
sample size: 1246


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 150.00it/s]



count sample diff f1   is twice tot diff f1....... 871  / 1000    p < 0.871  
count sample diff prec is twice tot diff prec..... 857  / 1000    p < 0.857  
count sample diff rec  is twice tot diff rec ..... 874  / 1000    p < 0.874  
count sample diff acc  is twice tot diff acc...... 865  / 1000    p < 0.865  


Seed: 2923262358 - Split: 3 - lgbtq_status - Group: Heterosexual
data shape:  (22638, 1)
sample size: 11319


bootstrap: 100%|████████████████████████████| 1000/1000 [00:24<00:00, 40.06it/s]



count sample diff f1   is twice tot diff f1....... 16   / 1000    p < 0.016  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 10   / 1000    p < 0.01   [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 37   / 1000    p < 0.037  [38;5;9m*[0m
count sample diff acc  is twice tot diff acc...... 1    / 1000    p < 0.001  [38;5;9m**[0m


Seed: 2923262358 - Split: 3 - lgbtq_status - Group: Homosexual
data shape:  (712, 1)
sample size: 356


bootstrap: 100%|███████████████████████████| 1000/1000 [00:05<00:00, 197.37it/s]



count sample diff f1   is twice tot diff f1....... 433  / 1000    p < 0.433  
count sample diff prec is twice tot diff prec..... 388  / 1000    p < 0.388  
count sample diff rec  is twice tot diff rec ..... 462  / 1000    p < 0.462  
count sample diff acc  is twice tot diff acc...... 382  / 1000    p < 0.382  


Seed: 2923262358 - Split: 3 - lgbtq_status - Group: Bisexual
data shape:  (2461, 1)
sample size: 1230


bootstrap: 100%|███████████████████████████| 1000/1000 [00:06<00:00, 152.00it/s]


count sample diff f1   is twice tot diff f1....... 46   / 1000    p < 0.046  [38;5;9m*[0m
count sample diff prec is twice tot diff prec..... 25   / 1000    p < 0.025  [38;5;9m*[0m
count sample diff rec  is twice tot diff rec ..... 53   / 1000    p < 0.053  
count sample diff acc  is twice tot diff acc...... 35   / 1000    p < 0.035  [38;5;9m*[0m





In [19]:
lgbq_ks = k_estimator_groups(lgbq_p_dicts)
lgbq_ks

{'Homosexual': {'k_count': 1, 'k_bonferroni': -1},
 'Bisexual': {'k_count': 2, 'k_bonferroni': -1},
 'Heterosexual': {'k_count': 4, 'k_bonferroni': 2}}

In [20]:
lgbq_ks_df = pd.DataFrame(lgbq_ks).T.sort_index()
lgbq_ks_df = lgbq_ks_df.replace({-1: 0})
lgbq_ks_df = lgbq_ks_df.rename(columns={'k_count': '$\hat{k}_{count}$', 'k_bonferroni':'$\hat{k}_{Bonf.}$'})
lgbq_ks_df

Unnamed: 0,$\hat{k}_{count}$,$\hat{k}_{Bonf.}$
Bisexual,2,0
Heterosexual,4,2
Homosexual,1,0


In [21]:
lgbq_ks_df.to_latex('../tables/replication/baseline/lgbq.tex', escape=False)