In [1]:
from splink import DuckDBAPI, Linker, SettingsCreator, block_on, splink_datasets
from arfpy import arf
import numpy as np
import pandas as pds
from matplotlib import pyplot as plt
import xgboost as xgb
from sklearn.metrics import accuracy_score, roc_auc_score
from splink import DuckDBAPI, block_on
from splink.blocking_analysis import (
    cumulative_comparisons_to_be_scored_from_blocking_rules_chart,
)
import splink.comparison_level_library as cll
import splink.comparison_library as cl
import gc
import math

In [2]:
def one_round(list_params_key, linkage, nA, nB, overlap, save_df, setting_info, dedup=False):

    df_origin = pds.DataFrame(np.array([np.round(np.random.normal(size=nA)*list_params[list_params_key][0]+50,0),
                            np.round(np.random.beta(0.2, 0.2, size=nA)*list_params[list_params_key][1]+1,0),
                            np.random.randint(0, list_params[list_params_key][2], size=nA)+1,
                            np.random.randint(0, list_params[list_params_key][3], size=nA)+1,
                            np.round(np.random.beta(1, 2, size=nA)*list_params[list_params_key][4]+1,0)]).transpose())
    
    df_destination = pds.DataFrame(np.array([np.round(np.random.normal(size=nB)*list_params[list_params_key][0]+50,0),
                                np.round(np.random.beta(0.2, 0.2, size=nB)*list_params[list_params_key][1]+1,0),
                                np.random.randint(0, list_params[list_params_key][2], size=nB)+1,
                                np.random.randint(0, list_params[list_params_key][3], size=nB)+1,
                                np.round(np.random.beta(1, 2, size=nB)*list_params[list_params_key][4]+1,0)]).transpose())
            
    df_origin.loc[:,"link"] = False
    df_destination.loc[:,"link"] = False

    # "at_random", "strong_dep_pivs", "light_dep_pivs"

    if linkage == "strong_dep_pivs":

        # selection on piv
        df_origin.loc[ (df_origin[2]) > (df_origin[2]).quantile(1-overlap),"link"] = True
    
        df_origin.loc[ df_origin.link, 0 ] = np.round(np.random.normal(size=df_origin.link.sum())*(list_params[list_params_key][0]), 0)
        df_origin.loc[ df_origin.link, 2 ] = np.random.randint(list_params[list_params_key][2], 2* list_params[list_params_key][2], size=df_origin.link.sum())+1
        df_origin.loc[ df_origin.link, 4 ] = np.round(np.random.beta(2, 1, size=df_origin.link.sum())*list_params[list_params_key][4], 0)

    elif linkage == "light_dep_pivs":

        # selection on piv
        df_origin.loc[ (df_origin[2]) > (df_origin[2]).quantile(1-overlap),"link"] = True
    
        # df_origin.loc[ df_origin.link, 0 ] = np.round(np.random.normal(size=df_origin.link.sum())*list_params[list_params_key][0]+45, 0)
        # # df_origin.loc[ df_origin.link, 2 ] = np.random.randint(0, 0.5*list_params[list_params_key][2], size=df_origin.link.sum())+1
        # df_origin.loc[ df_origin.link, 4 ] = np.round(np.random.beta(2, 1, size=df_origin.link.sum())*list_params[list_params_key][4], 0)

    elif linkage == "at_random":

        rng = np.random.default_rng()
        rdgen = rng.choice(df_origin.shape[0], size=int(overlap*df_origin.shape[0]), replace=False)
        df_origin.loc[rdgen,"link"] = True

    df_origin.loc[:,"unique_id"] = range(1,1+df_origin.shape[0])
    df_destination.loc[:,"unique_id"] = range(5000001,5000001+df_destination.shape[0])

    df_origin.loc[:,"synthetic"] = False
    df_destination.loc[:,"synthetic"] = False

    df_origin.columns = ["piv1", "piv2", "piv3", "piv4", "piv5", "link", "unique_id", "synthetic"]
    df_destination.columns = ["piv1", "piv2", "piv3", "piv4", "piv5", "link", "unique_id", "synthetic"]
        
    # links get identical in the other file:
    df_destination.iloc[np.where(df_origin.link)[0]] = df_origin.iloc[np.where(df_origin.link)[0]]

    df_origin.loc[:,"duplication"] = False
    df_destination.loc[:,"duplication"] = False
    
    if dedup:
        
        # we augment each file with 5% of its size with duplicated records
        # 2.5% of the links get duplicated (at random)
        # 2.5% of the non-links get duplicated (at random)
        
        rng = np.random.default_rng()
        rdgen = rng.choice(np.where(df_origin.link)[0], size=int(0.025*df_origin.shape[0]), replace=False)
        df_origin.loc[rdgen,"duplication"] = True
        rng = np.random.default_rng()
        rdgen = rng.choice(np.where(~df_origin.link)[0], size=int(0.025*df_origin.shape[0]), replace=False)
        df_origin.loc[rdgen,"duplication"] = True
        
        rng = np.random.default_rng()
        rdgen = rng.choice(np.where(df_destination.link)[0], size=int(0.025*df_destination.shape[0]), replace=False)
        df_destination.loc[rdgen,"duplication"] = True
        rng = np.random.default_rng()
        rdgen = rng.choice(np.where(~df_destination.link)[0], size=int(0.025*df_destination.shape[0]), replace=False)
        df_destination.loc[rdgen,"duplication"] = True

        # duplicate:
        new_set = df_origin.loc[df_origin.duplication,:].copy()
        df_origin = pds.concat([df_origin, new_set])
        new_set = df_destination.loc[df_destination.duplication,:].copy()
        df_destination = pds.concat([df_destination, new_set])

        nA = df_origin.shape[0]
        nB = df_destination.shape[0]

        df_origin = df_origin.reset_index(drop=True)
        df_destination = df_destination.reset_index(drop=True)

    for column in df_destination.columns:
        unique_values = df_destination[column].unique()
        print(f"Unique values in column '{column}': {len(unique_values)}")

    # info to save
    overlapInA = sum(df_origin.link)/df_origin.shape[0]
    overlapInB = sum(df_destination.link)/df_destination.shape[0]
    difficultyInA = len(np.unique(df_origin[["piv1", "piv2", "piv3", "piv4", "piv5"]].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)))/df_origin.shape[0]
    difficultyInB = len(np.unique(df_destination[["piv1", "piv2", "piv3", "piv4", "piv5"]].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)))/df_destination.shape[0]
    difficultyInL = len(np.unique(df_destination.loc[df_destination.link,["piv1", "piv2", "piv3", "piv4", "piv5"]].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)))/df_destination.link.sum()
    difficultyInNL = len(np.unique(df_destination.loc[~df_destination.link,["piv1", "piv2", "piv3", "piv4", "piv5"]].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)))/(df_destination.shape[0]-df_destination.link.sum())

    print(difficultyInB)
    max_id_ = max( max(df_destination.unique_id), max(df_origin.unique_id) )

    #
    # print("PIV1")
    # print("links profile")
    # plt.show(plt.hist(df_destination.loc[df_destination.link,"piv1"]))
    # print("PIV1")
    # print("non-links profile")
    # plt.show(plt.hist(df_destination.loc[~df_destination.link,"piv1"]))
    # print("PIV2")
    # print("links profile")
    # plt.show(plt.hist(df_destination.loc[df_destination.link,"piv2"]))
    # print("PIV2")
    # print("non-links profile")
    # plt.show(plt.hist(df_destination.loc[~df_destination.link,"piv2"]))
    # print("PIV3")
    # print("links profile")
    # plt.show(plt.hist(df_destination.loc[df_destination.link,"piv3"]))
    # print("PIV3")
    # print("non-links profile")
    # plt.show(plt.hist(df_destination.loc[~df_destination.link,"piv3"]))
    # print("PIV4")
    # print("links profile")
    # plt.show(plt.hist(df_destination.loc[df_destination.link,"piv4"]))
    # print("PIV4")
    # print("non-links profile")
    # plt.show(plt.hist(df_destination.loc[~df_destination.link,"piv4"]))
    # print("PIV5")
    # print("links profile")
    # plt.show(plt.hist(df_destination.loc[df_destination.link,"piv5"]))
    # print("PIV5")
    # print("non-links profile")
    # plt.show(plt.hist(df_destination.loc[~df_destination.link,"piv5"]))
    #

    # Train the ARF
    my_arf = arf.arf(x = df_destination[["piv1", "piv2", "piv3", "piv4", "piv5"]])
    
    # Get density estimates
    my_arf.forde()

    nS = round(0.10*df_destination.shape[0])
    
    # Generate data
    synthdata = my_arf.forge(n = nS)
    
    synthdata['unique_id'] = max_id_ + np.array(range(1,1+nS))
    synthdata['link'] = False
    synthdata['synthetic'] = True
    
    synthetictest = synthdata.copy()
    synthetictest[["piv1", "piv2", "piv3", "piv4", "piv5"]] = round(synthetictest[["piv1", "piv2", "piv3", "piv4", "piv5"]],0)
    
    # investigate links profiles / synthetic profiles
    
    Xtrain = pds.concat([ df_destination.loc[0:round(nB/2,0),["piv1", "piv2", "piv3", "piv4", "piv5"]], synthetictest.loc[0:round(nS/2,0),["piv1", "piv2", "piv3", "piv4", "piv5"]]  ])
    ytrain = pds.concat([ df_destination.loc[0:round(nB/2,0),["synthetic"]], synthetictest.loc[0:round(nS/2,0),["synthetic"]]  ])

    print(round(nB/2,0))
    print(df_destination.index)
        
    Xtest = pds.concat([ df_destination.loc[round(nB/2,0):nB,["piv1", "piv2", "piv3", "piv4", "piv5"]], synthetictest.loc[round(nS/2,0):nS,["piv1", "piv2", "piv3", "piv4", "piv5"]]  ])
    ytest = pds.concat([ df_destination.loc[round(nB/2,0):nB,["synthetic"]], synthetictest.loc[round(nS/2,0):nS,["synthetic"]]  ])

    # info to save
    ytrain_synth_prop = ytrain.sum()/ytrain.shape[0]
    ytest_synth_prop = ytest.sum()/ytest.shape[0]
    
    model = xgb.XGBClassifier()
    model.fit(Xtrain, ytrain)
    predictions = model.predict(Xtest)
    y_score = model.predict_proba(Xtest)[:, 1]
    auc = roc_auc_score(ytest, y_score)

    # info to save
    auc_synth = auc

    rng = np.random.default_rng()
    rdgen = rng.choice(np.where(df_destination.link)[0], size=sum(df_destination.link), replace=False)
    rng2 = np.random.default_rng()
    rdgen2 = rng.choice(np.where(~df_destination.link)[0], size=sum(~df_destination.link), replace=False)
    
    XtrainL1 = df_destination.loc[rdgen[0:int(len(rdgen)/2)],["piv1", "piv2", "piv3", "piv4", "piv5"]]
    XtrainL2 = df_destination.loc[rdgen2[0:int(len(rdgen2)/2)],["piv1", "piv2", "piv3", "piv4", "piv5"]]
    XtrainL = pds.concat([XtrainL1,XtrainL2])
    ytrainL1 = df_destination.loc[rdgen[0:int(len(rdgen)/2)],["link"]]
    ytrainL2 = df_destination.loc[rdgen2[0:int(len(rdgen2)/2)],["link"]]
    ytrainL = pds.concat([ytrainL1,ytrainL2])
    
    XtestL1 = df_destination.loc[rdgen[int(len(rdgen)/2):len(rdgen)],["piv1", "piv2", "piv3", "piv4", "piv5"]]
    XtestL2 = df_destination.loc[rdgen2[int(len(rdgen2)/2):len(rdgen2)],["piv1", "piv2", "piv3", "piv4", "piv5"]]
    XtestL = pds.concat([XtestL1,XtestL2])
    ytestL1 = df_destination.loc[rdgen[int(len(rdgen)/2):len(rdgen)],["link"]]
    ytestL2 = df_destination.loc[rdgen2[int(len(rdgen2)/2):len(rdgen2)],["link"]]
    ytestL = pds.concat([ytestL1,ytestL2])

    # info to save
    ytrain_link_prop = ytrainL.sum()/ytrainL.shape[0]
    ytest_link_prop = ytestL.sum()/ytestL.shape[0]
    
    model = xgb.XGBClassifier()
    model.fit(XtrainL, ytrainL)
    predictions = model.predict(XtestL)
    y_scoreL = model.predict_proba(XtestL)[:, 1]
    auc = roc_auc_score(ytestL, y_scoreL)

    # info to save
    auc_link = auc

    NEW_df_destination = pds.concat([df_destination, synthetictest])

    NEW_df_destination[["piv1", "piv2", "piv3", "piv4", "piv5"]] = NEW_df_destination[["piv1", "piv2", "piv3", "piv4", "piv5"]].astype(int)
    df_origin[["piv1", "piv2", "piv3", "piv4", "piv5"]] = df_origin[["piv1", "piv2", "piv3", "piv4", "piv5"]].astype(int)

    if save_df:

        NEW_df_destination.to_csv(f'{setting_info}_df_destination{_}.csv', index=False)
        df_origin.to_csv(f'{setting_info}_df_origin{_}.csv', index=False)
    
    blocking_rule_1 = """
    round(l.piv1) = round(r.piv1) and
    round(l.piv2) = round(r.piv2) and
    round(l.piv3) = round(r.piv3) and
    round(l.piv4) = round(r.piv4) and
    round(l.piv5) = round(r.piv5)
    """
    
    db_api = DuckDBAPI()
    
    brs = [
      blocking_rule_1,
    ]
    
    comparison_1 = {
      "output_column_name": "piv1",
      "comparison_levels": [
          cll.NullLevel("piv1"),
          cll.ExactMatchLevel("piv1"),
          cll.ElseLevel(),
      ],
      "comparison_description": "piv1 difference",
    }
    
    comparison_2 = {
      "output_column_name": "piv2",
      "comparison_levels": [
          cll.NullLevel("piv2"),
          cll.ExactMatchLevel("piv2"),
          cll.ElseLevel(),
      ],
      "comparison_description": "piv2 difference",
    }
    
    comparison_3 = {
      "output_column_name": "piv3",
      "comparison_levels": [
          cll.NullLevel("piv3"),
          cll.ExactMatchLevel("piv3"),
          cll.ElseLevel(),
      ],
      "comparison_description": "piv3 difference",
    }
    
    comparison_4 = {
      "output_column_name": "piv4",
      "comparison_levels": [
          cll.NullLevel("piv4"),
          cll.ExactMatchLevel("piv4"),
          cll.ElseLevel(),
      ],
      "comparison_description": "piv4 difference",
    }
    
    comparison_5 = {
      "output_column_name": "piv5",
      "comparison_levels": [
          cll.NullLevel("piv5"),
          cll.ExactMatchLevel("piv5"),
          cll.ElseLevel(),
      ],
      "comparison_description": "piv5 difference",
    }
    
    settings = SettingsCreator(
      link_type="link_only",
      blocking_rules_to_generate_predictions=brs,
      comparisons=[
          comparison_1,
          comparison_2,
          comparison_3,
          comparison_4,
          comparison_5,
      ],
      retain_intermediate_calculation_columns=True,
    )
    
    linker = Linker(
      [df_origin, NEW_df_destination],
      settings,
      input_table_aliases=["__ori", "_dest"],
      db_api=db_api,
    )
    
    linker.training.estimate_u_using_random_sampling(max_pairs=1e7)
    
    # linker.training.estimate_parameters_using_expectation_maximisation(block_on("piv1"))
    # linker.training.estimate_parameters_using_expectation_maximisation(block_on("piv2"))
    # linker.training.estimate_parameters_using_expectation_maximisation(block_on("piv3"))
    # linker.training.estimate_parameters_using_expectation_maximisation(block_on("piv4"))
    # linker.training.estimate_parameters_using_expectation_maximisation(block_on("piv5"))
    linker.training.estimate_parameters_using_expectation_maximisation(block_on("piv1","piv2","piv3"))
    linker.training.estimate_parameters_using_expectation_maximisation(block_on("piv4","piv5"))
    
    df_predict = linker.inference.predict(threshold_match_probability=0.00001)

    # info to save
    max_proba_linked = max(df_predict.as_pandas_dataframe().match_probability)
    
    median_proba_linked = np.median(df_predict.as_pandas_dataframe().match_probability)
    median_proba_linked = math.floor(median_proba_linked * 100) / 100

    linkedpairs05 = df_predict.as_pandas_dataframe()

    # we cheat because the model sometimes does not link anything
    # if there are not enough linked pairs: threshold is set at median match_probability
    # if there are enough linked pairs: threshold is set at max(0.5, median match_probability) (this may help for 1-2-1 assignment constraint)
    # in general we cheat because we do not enforce 1-2-1 assignment constraint (it is not implemented in SPLink)
    if max_proba_linked >= 0.5:
        threshold_for_links = 0.5 # max(0.5, median_proba_linked)
    else:
        threshold_for_links = median_proba_linked
        
    linkedpairs05 = linkedpairs05.loc[linkedpairs05.match_probability>threshold_for_links,:]
    
    synthlinkedpairs = linkedpairs05[linkedpairs05.unique_id_r > max_id_]
    reallinkedpairs = linkedpairs05[linkedpairs05.unique_id_r <= max_id_]

    tp = 0
    fp = 0
    true_fdr = 0
    hat_fdr_us = 0
    hat_fdr_prob = 0

    if reallinkedpairs.shape[0]!=0 or synthlinkedpairs.shape[0]!=0:
    
        tp = (reallinkedpairs.unique_id_l == reallinkedpairs.unique_id_r).sum()
        fp = (reallinkedpairs.unique_id_l != reallinkedpairs.unique_id_r).sum()
        true_fdr = fp/(tp+fp)
        hat_fdr_us = ( synthlinkedpairs.shape[0] * (nB / nS) ) / reallinkedpairs.shape[0]
        hat_fdr_prob = 1 - reallinkedpairs[reallinkedpairs.match_probability>threshold_for_links].match_probability.sum() / reallinkedpairs[reallinkedpairs.match_probability>threshold_for_links].shape[0]
        
    # info to save
    synth_pairs_prop = synthlinkedpairs.shape[0] / nS
    real_pairs_prop = fp / nB
    
    return overlapInA, overlapInB, difficultyInA, difficultyInB, difficultyInL, difficultyInNL, ytrain_synth_prop, ytest_synth_prop, auc_synth, ytrain_link_prop, ytest_link_prop, auc_link, max_proba_linked, median_proba_linked, threshold_for_links, synth_pairs_prop, real_pairs_prop, true_fdr, hat_fdr_us, hat_fdr_prob        


In [None]:
# STUDY SCALABILITY

linkagestructures = ["at_random"]
nAlist = [100000]
list_params = { 85:[10,16,10,15,14], 95:[10,20,15,25,20] }       
set_of_overlap = [0.35, 0.75]      

nB = 200000
save_df = False # True

In [3]:
# STUDY ROBUSTNESS TO LINKS HAPPEN AT RANDOM

linkagestructures = ["at_random", "strong_dep_pivs"] # "light_dep_pivs"
nAlist = [2002, 4502]
list_params = { 85:[1.5,7,7,8,8], 95:[2,9,9,10,10] } 
set_of_overlap = [0.35, 0.75]      

nB = 5000
save_df = False # True

In [None]:
# STUDY ROBUSTNESS TO THE PRESENCE OF NON-DEDUPLICATED DATA

linkagestructures = ["at_random"]
nAlist = [2002, 4502]
list_params = { 85:[10,16,10,15,14], 95:[10,20,15,25,20] }       
set_of_overlap = [0.35, 0.75]      

nB = 5000
save_df = False # True
# change the dedup parameter below in the code!!!

In [4]:
                          
dico_results_final = {}

for linkage in linkagestructures:
    
    dico_results_linkage = {}
    
    for nA in nAlist:
    
        dico_results_size = {}
        
        for list_params_key in list_params.keys():
            
            dico_results_setting = {}
            
            for overlap in set_of_overlap:
            
                print("OVERLAP")
                print(overlap)
        
                iter_secu = 0
            
                overlapInA_res = []
                overlapInB_res = []
                difficultyInA_res = []
                difficultyInB_res = []
                difficultyInL_res = []
                difficultyInNL_res = []
                ytrain_synth_prop_res = []
                ytest_synth_prop_res = []
                auc_synth_res = []
                ytrain_link_prop_res = []
                ytest_link_prop_res = []
                auc_link_res = []
                max_proba_linked_res = []
                median_proba_linked_res = []
                threshold_for_links_res = []
                synth_pairs_prop_res = []
                real_pairs_prop_res = []
                true_fdr_res = []
                hat_fdr_us_res = []
                hat_fdr_prob_res = []
        
                setting_info = f"links_{linkage}_nA_{nA}_discrlevel_{list_params_key}_overlaplevel_{overlap}_highdim"   
            
                while len(np.nonzero(true_fdr_res)[0]) < 10 and iter_secu < 15:
        
                    iter_secu += 1
            
                    print("ITERATION")
                    print(iter_secu)
        
                    overlapInA, overlapInB, difficultyInA, difficultyInB, difficultyInL, difficultyInNL, ytrain_synth_prop, ytest_synth_prop, auc_synth, ytrain_link_prop, ytest_link_prop, auc_link, max_proba_linked, median_proba_linked, threshold_for_links, synth_pairs_prop, real_pairs_prop, true_fdr, hat_fdr_us, hat_fdr_prob = one_round(list_params_key, linkage, nA, nB, overlap, save_df, setting_info)   # # #   
        
                    overlapInA_res.append(overlapInA)
                    overlapInB_res.append(overlapInB)
                    difficultyInA_res.append(difficultyInA)
                    difficultyInB_res.append(difficultyInB)
                    difficultyInL_res.append(difficultyInL)
                    difficultyInNL_res.append(difficultyInNL)
                    ytrain_synth_prop_res.append(ytrain_synth_prop)
                    ytest_synth_prop_res.append(ytest_synth_prop)
                    auc_synth_res.append(auc_synth)
                    ytrain_link_prop_res.append(ytrain_link_prop)
                    ytest_link_prop_res.append(ytest_link_prop)
                    auc_link_res.append(auc_link)
                    max_proba_linked_res.append(max_proba_linked)
                    median_proba_linked_res.append(median_proba_linked)
                    threshold_for_links_res.append(threshold_for_links)
                    synth_pairs_prop_res.append(synth_pairs_prop)
                    real_pairs_prop_res.append(real_pairs_prop)
                    true_fdr_res.append(true_fdr)
                    hat_fdr_us_res.append(hat_fdr_us)
                    hat_fdr_prob_res.append(hat_fdr_prob)
                
                    dico_results_setting[overlap] = {
                        
                        "overlapInA" : overlapInA_res,
                        "overlapInB" : overlapInB_res,
                        "difficultyInA" : difficultyInA_res,
                        "difficultyInB" : difficultyInB_res,
                        "difficultyInL" : difficultyInL_res,
                        "difficultyInNL" : difficultyInNL_res,
                        "ytrain_synth_prop" : ytrain_synth_prop_res,
                        "ytest_synth_prop" : ytest_synth_prop_res,
                        "auc_synth" : auc_synth_res,
                        "ytrain_link_prop" : ytrain_link_prop_res,
                        "ytest_link_prop" : ytest_link_prop_res,
                        "auc_link" : auc_link_res,
                        "max_proba_linked" : max_proba_linked_res,
                        "median_proba_linked" : median_proba_linked_res,
                        "threshold_for_links" : threshold_for_links_res,
                        "synth_pairs_prop" : synth_pairs_prop_res,
                        "real_pairs_prop" : real_pairs_prop_res,
                        "true_fdr" : true_fdr_res,
                        "hat_fdr_us" : hat_fdr_us_res,
                        "hat_fdr_prob" : hat_fdr_prob_res
                        
                    }
        
                    print(nA)
                    print(list_params_key)
                    print(overlap)
                    print(dico_results_setting)
                
                    gc.collect()
        
            dico_results_size[list_params_key] = dico_results_setting
        
        dico_results_linkage[nA] = dico_results_size
    
    dico_results_final[linkage] = dico_results_linkage


OVERLAP
0.35
ITERATION
1
Unique values in column 'piv1': 11
Unique values in column 'piv2': 8
Unique values in column 'piv3': 7
Unique values in column 'piv4': 8
Unique values in column 'piv5': 9
Unique values in column 'link': 2
Unique values in column 'unique_id': 5000
Unique values in column 'synthetic': 1
Unique values in column 'duplication': 1
0.7932
Initial accuracy is 0.4904


  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00432 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.00281 in the m_probability of piv5, level `All other comparisons`
Iteratio

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963], 'overlapInB': [0.14], 'difficultyInA': [0.8921078921078921], 'difficultyInB': [0.7932], 'difficultyInL': [np.float64(0.9542857142857143)], 'difficultyInNL': [np.float64(0.8181395348837209)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4827088)], 'ytrain_link_prop': [link    0.14
dtype: float64], 'ytest_link_prop': [link    0.14
dtype: float64], 'auc_link': [np.float64(0.49102591362126247)], 'max_proba_linked': [0.41406256855084755], 'median_proba_linked': [0.41], 'threshold_for_links': [0.41], 'synth_pairs_prop': [0.212], 'real_pairs_prop': [np.float64(0.2148)], 'true_fdr': [np.float64(0.6054114994363021)], 'hat_fdr_us': [0.5975197294250282], 'hat_fdr_prob': [np.float64(0.5859374314491524)]}}
ITERATION
2
Unique values in column 'piv1': 12
Unique values in column 'piv2': 8
Unique values in column 'piv3': 7
Unique values in colu

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00192 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.00116 in the m_probability of piv5, level `All other comparisons`
Iteratio

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002], 'difficultyInB': [0.7932, 0.7902], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285)], 'difficultyInNL': [np.float64(0.8181395348837209), np.float64(0.8137209302325581)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4827088), np.float64(0.5224783999999999)], 'ytrain_link_prop': [link    0.14
dtype: float64, link    0.14
dtype: float64], 'ytest_link_prop': [link    0.14
dtype: float64, link    0.14
dtype: float64], 'auc_link': [np.float64(0.49102591362126247), np.float64(0.5131189368770764)], 'max_proba_linked': [0.41406256855084755, 0.39850141654742727], 'median_proba_linked': [0.41, 0.39], 'threshold_for_links': [0.

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00327 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00232 in the m_probability of piv5, level `All other comparisons`
Iterati

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076], 'difficultyInB': [0.7932, 0.7902, 0.789], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429)], 'difficultyInNL': [np.float64(0.8181395348837209), np.float64(0.8137209302325581), np.float64(0.8118604651162791)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4827088), np.float64(0.5224783999999999), np.float64(0.537624)], 'ytrain_link_prop': [link    0.14
dtype: float64, link    0.14
dtype: float64, link    0.14
dtype: float64], 'ytest_link_prop': [link    0.14
dtype: 

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00243 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.00158 in the m_probability of piv5, level `Exact match on piv5`
Iteratio

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572)], 'difficultyInNL': [np.float64(0.8181395348837209), np.float64(0.8137209302325581), np.float64(0.8118604651162791), np.float64(0.8102325581395349)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4827088), np.float6

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00329 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00193 in the m_probability of piv4, level `All other comparisons`
Iteratio

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143)], 'difficultyInNL': [np.float64(0.8181395348837209), np.float64(0.8137209302325581), np.float64(0.8118604651162791), np.float64(0.8102325581395349), np.float64(0.8102325581395349)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synt

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00255 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00161 in the m_probability of piv4, level `All other comparisons`
Iteratio

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97)], 'difficultyInNL': [np.float64(0.8181395348837209), np.float64(0.8137209302325581), np.float64(0.8118604651162791), np.float64(0.8102325581395349), np.float64(0.8102325581395349), np.float64(0.7988372093023256)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float6

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00474 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.0033 in the m_probability of piv5, level `All other comparisons`
Iteration

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98)], 'difficultyInNL': [np.float64(0.8181395348837209), np.float64(0.8137209302325581), np.float64(0.8118604651162791), np.float64(0.8102325581395349), np.float64(0.8102325581395349), np.float64(0.7988372093023256), np.float64(0.8123255813953488)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthe

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00126 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.000599 in the m_probability of piv4, level `All other comparisons`
Iterati

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143)], 'difficultyInNL': [np.float64(0.8181395348837209), np.float64(0.8137209302325581), np.float64(0.8118604651162791), np.float64(0.8102325581395349), np.float64(0.8102325581395349), np.float64(0.7988372093023256), np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00311 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00214 in the m_probability of piv4, level `All other comparisons`
Iteratio

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858)], 'difficultyInNL': [np.float64(0.8181395348837209), np.float64(0.8137209302325581), np.float64(0.8118604651162791), np.float64(0.81023255

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00316 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.00199 in the m_probability of piv5, level `Exact match on piv5`
Iteration

2002
85
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0126 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.00592 in the m_probability of piv4, level `All other comparisons`
Iteration

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0118 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.00563 in the m_probability of piv5, level `All other comparisons`
Iterat

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.011 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00522 in the m_probability of piv4, level `All other comparisons`
Iterati

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0124 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00576 in the m_probability of piv5, level `Exact match on piv5`
Iteration

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0127 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.00612 in the m_probability of piv4, level `All other comparisons`
Iteration

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0121 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.00556 in the m_probability of piv5, level `All other comparisons`
Iteration

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0118 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.00568 in the m_probability of piv4, level `All other comparisons`
Iteration

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0126 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.00603 in the m_probability of piv5, level `All other comparisons`
Iteration

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0122 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.00577 in the m_probability of piv5, level `All other comparisons`
Iteration

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0126 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00597 in the m_probability of piv4, level `All other comparisons`
Iterat

2002
85
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.8921078921078921, 0.9000999000999002, 0.9075924075924076, 0.8981018981018981, 0.8956043956043956, 0.9015984015984015, 0.9105894105894106, 0.8946053946053946, 0.9080919080919081, 0.8871128871128872], 'difficultyInB': [0.7932, 0.7902, 0.789, 0.7882, 0.7862, 0.7836, 0.7898, 0.7774, 0.7926, 0.778], 'difficultyInL': [np.float64(0.9542857142857143), np.float64(0.9585714285714285), np.float64(0.9728571428571429), np.float64(0.9671428571428572), np.float64(0.9642857142857143), np.float64(0.97), np.float64(0.98), np.float64(0.9442857142857143), np.float64(0.9757142857142858), np.float64(0.9528571428571428)], 'difficultyInNL': [np.float64(0.8181395348837209),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00654 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.003 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963], 'overlapInB': [0.14], 'difficultyInA': [0.9595404595404595], 'difficultyInB': [0.9126], 'difficultyInL': [np.float64(0.99)], 'difficultyInNL': [np.float64(0.9232558139534883)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5066392000000001)], 'ytrain_link_prop': [link    0.14
dtype: float64], 'ytest_link_prop': [link    0.14
dtype: float64], 'auc_link': [np.float64(0.48584916943521594)], 'max_proba_linked': [0.676784957278319], 'median_proba_linked': [0.67], 'threshold_for_links': [0.5], 'synth_pairs_prop': [0.066], 'real_pairs_prop': [np.float64(0.0784)], 'true_fdr': [np.float64(0.358974358974359)], 'hat_fdr_us': [0.3021978021978022], 'hat_fdr_prob': [np.float64(0.3232150427216809)]}}
ITERATION
2
Unique values in column 'piv1': 15
Unique values in column 'piv2': 10
Unique values in column 'piv3': 9
Unique values in column 'piv4

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00458 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.00135 in the m_probability of piv5, level `All other comparisons`
Iteratio

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601], 'difficultyInB': [0.9126, 0.9138], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858)], 'difficultyInNL': [np.float64(0.9232558139534883), np.float64(0.9265116279069767)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5066392000000001), np.float64(0.5099256)], 'ytrain_link_prop': [link    0.14
dtype: float64, link    0.14
dtype: float64], 'ytest_link_prop': [link    0.14
dtype: float64, link    0.14
dtype: float64], 'auc_link': [np.float64(0.48584916943521594), np.float64(0.49027707641196017)], 'max_proba_linked': [0.676784957278319, 0.671064325987925], 'median_proba_linked': [0.67, 0.67], 'threshold_for_links': [0.5, 0.5], 'synth_p

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00656 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.00289 in the m_probability of piv5, level `Exact match on piv5`
Iteratio

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961], 'difficultyInB': [0.9126, 0.9138, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571)], 'difficultyInNL': [np.float64(0.9232558139534883), np.float64(0.9265116279069767), np.float64(0.9286046511627907)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5066392000000001), np.float64(0.5099256), np.float64(0.5210752)], 'ytrain_link_prop': [link    0.14
dtype: float64, link    0.14
dtype: float64, link    0.14
dtype: float64], 'ytest_link_prop': [link    0.14
dtype: float64, link

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00534 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.0018 in the m_probability of piv5, level `Exact match on piv5`
Iteration 

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429)], 'difficultyInNL': [np.float64(0.9232558139534883), np.float64(0.9265116279069767), np.float64(0.9286046511627907), np.float64(0.9309302325581396)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5066392000000001), np.float64(0.5

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0036 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.000891 in the m_probability of piv5, level `All other comparisons`
Iteratio

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285)], 'difficultyInNL': [np.float64(0.9232558139534883), np.float64(0.9265116279069767), np.float64(0.9286046511627907), np.float64(0.9309302325581396), np.float64(0.9230232558139535)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.0909

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00604 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00176 in the m_probability of piv4, level `Exact match on piv4`
Iteratio

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285)], 'difficultyInNL': [np.float64(0.9232558139534883), np.float64(0.9265116279069767), np.float64(0.9286046511627907), np.float64(0.9309302325581396), np.float64(0.9230232558139535), np.float64(0.9255813953488372)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00513 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.00131 in the m_probability of piv5, level `All other comparisons`
Iteratio

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285)], 'difficultyInNL': [np.float64(0.9232558139534883), np.float64(0.9265116279069767), np.float64(0.9286046511627907), np.float64(0.9309302325581396), np.float64(0.9230232558139535), np.float64(0.9255813953488372), np.float64(0.9244186046511628)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: fl

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00732 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00276 in the m_probability of piv4, level `Exact match on piv4`
Iteratio

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143)], 'difficultyInNL': [np.float64(0.9232558139534883), np.float64(0.9265116279069767), np.float64(0.9286046511627907), np.float64(0.9309302325581396), np.float64(0.9230232558139535), np.float64(0.9255813953488372),

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00575 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.002 in the m_probability of piv5, level `Exact match on piv5`
Iteration 

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143)], 'difficultyInNL': [np.float64(0.9232558139534883), np.float64(0.9265116279069767), np.float64(0.9286046511627907), np.float6

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00569 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.0016 in the m_probability of piv5, level `Exact match on piv5`
Iteration

2002
95
0.35
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0171 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.00464 in probability_two_random_records_match
Iteration 3: Largest change

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0169 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.0046 in probability_two_random_records_match
Iteration 3: Largest change in 

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0164 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00478 in probability_two_random_records_match
Iteration 3: Largest change

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0176 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.00492 in probability_two_random_records_match
Iteration 3: Largest change

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.017 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.0047 in probability_two_random_records_match
Iteration 3: Largest change i

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0164 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.0045 in probability_two_random_records_match
Iteration 3: Largest change 

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0177 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0051 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0172 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.00475 in probability_two_random_records_match
Iteration 3: Largest change in

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0162 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00437 in probability_two_random_records_match
Iteration 3: Largest change

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0164 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00465 in probability_two_random_records_match
Iteration 3: Largest change in

2002
95
0.75
{0.35: {'overlapInA': [0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963, 0.34965034965034963], 'overlapInB': [0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14], 'difficultyInA': [0.9595404595404595, 0.9600399600399601, 0.961038961038961, 0.9595404595404595, 0.967032967032967, 0.9665334665334665, 0.9675324675324676, 0.9625374625374625, 0.964035964035964, 0.9695304695304695], 'difficultyInB': [0.9126, 0.9138, 0.9174, 0.9198, 0.9138, 0.9162, 0.9146, 0.9136, 0.9136, 0.9174], 'difficultyInL': [np.float64(0.99), np.float64(0.9857142857142858), np.float64(0.9871428571428571), np.float64(0.9828571428571429), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9885714285714285), np.float64(0.9842857142857143), np.float64(0.9842857142857143), np.float64(0.9928571428571429)], 'difficultyInNL': [np.float64(0.92325

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00401 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.00252 in the m_probability of piv5, level `Exact match on piv5`
Iteration

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355], 'overlapInB': [0.315], 'difficultyInA': [0.8098622834295869], 'difficultyInB': [0.7874], 'difficultyInL': [np.float64(0.9288888888888889)], 'difficultyInNL': [np.float64(0.8370802919708029)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4988824)], 'ytrain_link_prop': [link    0.314926
dtype: float64], 'ytest_link_prop': [link    0.315074
dtype: float64], 'auc_link': [np.float64(0.4878700798018141)], 'max_proba_linked': [0.4124232577173227], 'median_proba_linked': [0.41], 'threshold_for_links': [0.41], 'synth_pairs_prop': [0.462], 'real_pairs_prop': [np.float64(0.485)], 'true_fdr': [np.float64(0.60625)], 'hat_fdr_us': [0.5775], 'hat_fdr_prob': [np.float64(0.5875767422826774)]}}
ITERATION
2
Unique values in column 'piv1': 11
Unique values in column 'piv2': 8
Unique values in column 'piv3': 7
Unique values in column 'piv4': 8
Uniq

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00276 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.00167 in the m_probability of piv5, level `Exact match on piv5`
Iteratio

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957], 'difficultyInB': [0.7874, 0.7778], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651)], 'difficultyInNL': [np.float64(0.8370802919708029), np.float64(0.8362043795620437)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4988824), np.float64(0.48249040000000004)], 'ytrain_link_prop': [link    0.314926
dtype: float64, link    0.314926
dtype: float64], 'ytest_link_prop': [link    0.315074
dtype: float64, link    0.315074
dtype: float64], 'auc_link': [np.float64(0.4878700798018141), np.float64(0.5167089678510999)], 'max_proba_linked': [0.4124232577173227, 0.41177144780184494], 'median_proba_linked': [0.41, 0.41], 'threshol

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00302 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00236 in the m_probability of piv4, level `All other comparisons`
Iteratio

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612], 'difficultyInB': [0.7874, 0.7778, 0.782], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619)], 'difficultyInNL': [np.float64(0.8370802919708029), np.float64(0.8362043795620437), np.float64(0.8356204379562043)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4988824), np.float64(0.48249040000000004), np.float64(0.47441279999999997)], 'ytrain_link_prop': [link    0.314926
dtype: float64, link    0.314926
dtype: float64, link    0.314926
dtype: float64], 'ytest_link_p

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00323 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0023 in the m_probability of piv4, level `All other comparisons`
Iteration

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92)], 'difficultyInNL': [np.float64(0.8370802919708029), np.float64(0.8362043795620437), np.float64(0.8356204379562043), np.float64(0.84)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4988824), np.float64(0.48249040000000004), 

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00439 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.00291 in the m_probability of piv5, level `Exact match on piv5`
Iteratio

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239)], 'difficultyInNL': [np.float64(0.8370802919708029), np.float64(0.8362043795620437), np.float64(0.8356204379562043), np.float64(0.84), np.float64(0.8452554744525548)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00367 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.00242 in the m_probability of piv4, level `Exact match on piv4`
Iteration

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444)], 'difficultyInNL': [np.float64(0.8370802919708029), np.float64(0.8362043795620437), np.float64(0.8356204379562043), np.float64(0.84), np.float64(0.8452554744525548), np.float64(0.8467153284671532)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthet

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00361 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.002 in the m_probability of piv4, level `All other comparisons`
Iteration 

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762)], 'difficultyInNL': [np.float64(0.8370802919708029), np.float64(0.8362043795620437), np.float64(0.8356204379562043), np.float64(0.84), np.float64(0.8452554744525548), np.float64(0.8467153284671532), np.float64(0.8592700729927008)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, sy

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00188 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00103 in the m_probability of piv4, level `All other comparisons`
Iteratio

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936)], 'difficultyInNL': [np.float64(0.8370802919708029), np.float64(0.8362043795620437), np.float64(0.8356204379562043), np.float64(0.84), np.float64(0.8452554744525548), np.float64(0.8467153284671532), np.float6

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00418 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00307 in the m_probability of piv4, level `Exact match on piv4`
Iteratio

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095)], 'difficultyInNL': [np.float64(0.8370802919708029), np.float64(0.8362043795620437), np.float64(0.8356204379562043), np.f

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00243 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.00148 in the m_probability of piv5, level `All other comparisons`
Iteratio

4502
85
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0126 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.00578 in the m_probability of piv5, level `All other comparisons`
Iteration

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0113 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.0055 in the m_probability of piv5, level `Exact match on piv5`
Iteration 

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0107 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00499 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3:

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0122 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.00573 in the m_probability of piv5, level `All other comparisons`
Iterat

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0118 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.00563 in the m_probability of piv4, level `Exact match on piv4`
Iteration

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0123 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00575 in the m_probability of piv5, level `Exact match on piv5`
Iteration

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0108 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00541 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3:

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0127 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.00588 in the m_probability of piv4, level `All other comparisons`
Iteration

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0123 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.00578 in the m_probability of piv4, level `All other comparisons`
Iteration

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0114 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.0053 in the m_probability of piv5, level `Exact match on piv5`
Iteration 

4502
85
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.8098622834295869, 0.8031985784095957, 0.8040870724122612, 0.8047534429142603, 0.8103065304309196, 0.8109729009329187, 0.806974677920924, 0.8000888494002666, 0.8038649489115949, 0.8043091959129276], 'difficultyInB': [0.7874, 0.7778, 0.782, 0.7898, 0.7994, 0.79, 0.7952, 0.79, 0.793, 0.7918], 'difficultyInL': [np.float64(0.9288888888888889), np.float64(0.9250793650793651), np.float64(0.9219047619047619), np.float64(0.92), np.float64(0.9238095238095239), np.float64(0.9244444444444444), np.float64(0.9161904761904762), np.float64(0.9136507936507936), np.float64(0.9295238095238095), np.float64(0.9225396825396825)], 'difficultyInNL': [np.float64(0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00565 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.00241 in the m_probability of piv5, level `Exact match on piv5`
Iteratio

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355], 'overlapInB': [0.315], 'difficultyInA': [0.9167036872501111], 'difficultyInB': [0.9154], 'difficultyInL': [np.float64(0.9695238095238096)], 'difficultyInNL': [np.float64(0.9465693430656934)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4889864)], 'ytrain_link_prop': [link    0.314926
dtype: float64], 'ytest_link_prop': [link    0.315074
dtype: float64], 'auc_link': [np.float64(0.5210253925638815)], 'max_proba_linked': [0.6756309206627078], 'median_proba_linked': [0.67], 'threshold_for_links': [0.5], 'synth_pairs_prop': [0.15], 'real_pairs_prop': [np.float64(0.1902)], 'true_fdr': [np.float64(0.37648456057007124)], 'hat_fdr_us': [0.29691211401425177], 'hat_fdr_prob': [np.float64(0.3243690793372923)]}}
ITERATION
2
Unique values in column 'piv1': 14
Unique values in column 'piv2': 10
Unique values in column 'piv3': 9
Unique values

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00541 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.00178 in the m_probability of piv5, level `Exact match on piv5`
Iteratio

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641], 'difficultyInB': [0.9154, 0.9218], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127)], 'difficultyInNL': [np.float64(0.9465693430656934), np.float64(0.9494890510948905)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4889864), np.float64(0.4676136)], 'ytrain_link_prop': [link    0.314926
dtype: float64, link    0.314926
dtype: float64], 'ytest_link_prop': [link    0.315074
dtype: float64, link    0.315074
dtype: float64], 'auc_link': [np.float64(0.5210253925638815), np.float64(0.4939207789937208)], 'max_proba_linked': [0.6756309206627078, 0.6818043837592349], 'median_proba_linked': [0.67, 0.68], 'threshold_for_links

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00569 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.00228 in the m_probability of piv5, level `Exact match on piv5`
Iteration

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084], 'difficultyInB': [0.9154, 0.9218, 0.9084], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857)], 'difficultyInNL': [np.float64(0.9465693430656934), np.float64(0.9494890510948905), np.float64(0.9383941605839416)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4889864), np.float64(0.4676136), np.float64(0.4891328000000001)], 'ytrain_link_prop': [link    0.314926
dtype: float64, link    0.314926
dtype: float64, link    0.314926
dtype: float64], 'ytest_link_prop': [lin

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00536 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.0015 in the m_probability of piv4, level `All other comparisons`
Iteratio

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127)], 'difficultyInNL': [np.float64(0.9465693430656934), np.float64(0.9494890510948905), np.float64(0.9383941605839416), np.float64(0.9404379562043795)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.4889864), np.fl

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0049 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.00149 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826)], 'difficultyInNL': [np.float64(0.9465693430656934), np.float64(0.9494890510948905), np.float64(0.9383941605839416), np.float64(0.9404379562043795), np.float64(0.9357664233576642)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64,

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00565 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.00217 in the m_probability of piv4, level `All other comparisons`
Iterati

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444)], 'difficultyInNL': [np.float64(0.9465693430656934), np.float64(0.9494890510948905), np.float64(0.9383941605839416), np.float64(0.9404379562043795), np.float64(0.9357664233576642), np.float64(0.9366423357664233)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00599 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00181 in the m_probability of piv4, level `Exact match on piv4`
Iteratio

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301)], 'difficultyInNL': [np.float64(0.9465693430656934), np.float64(0.9494890510948905), np.float64(0.9383941605839416), np.float64(0.9404379562043795), np.float64(0.9357664233576642), np.float64(0.9366423357664233), np.float64(0.9404379562043795)], 'ytrain_synth_prop': [synthetic 

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00638 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.0028 in the m_probability of piv5, level `Exact match on piv5`
Iteration

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683)], 'difficultyInNL': [np.float64(0.9465693430656934), np.float64(0.9494890510948905), np.float64(0.9383941605839416), np.float64(0.9404379562043795), np.float64(0.9357664233576642), np.float6

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0043 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.000897 in probability_two_random_records_match
Iteration 3: Largest chang

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096)], 'difficultyInNL': [np.float64(0.9465693430656934), np.float64(0.9494890510948905), np.float64(0.93839

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00559 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.00153 in the m_probability of piv5, level `Exact match on piv5`
Iteration

4502
95
0.35
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0153 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00424 in probability_two_random_records_match
Iteration 3: Largest change in

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.017 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00458 in probability_two_random_records_match
Iteration 3: Largest change 

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0168 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00474 in probability_two_random_records_match
Iteration 3: Largest change

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0167 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.00477 in probability_two_random_records_match
Iteration 3: Largest change in

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0171 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.00465 in probability_two_random_records_match
Iteration 3: Largest change

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0174 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.00492 in the m_probability of piv5, level `All other comparisons`
Iteration

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0173 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00468 in probability_two_random_records_match
Iteration 3: Largest change

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0153 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00447 in probability_two_random_records_match
Iteration 3: Largest change

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0168 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00468 in probability_two_random_records_match
Iteration 3: Largest change in

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.017 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.00467 in probability_two_random_records_match
Iteration 3: Largest change 

4502
95
0.75
{0.35: {'overlapInA': [0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355, 0.34984451354953355], 'overlapInB': [0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315, 0.315], 'difficultyInA': [0.9167036872501111, 0.9269213682807641, 0.9187027987561084, 0.9275877387827632, 0.9235895157707685, 0.9229231452687694, 0.9200355397601067, 0.9222567747667704, 0.926254997778765, 0.9227010217681031], 'difficultyInB': [0.9154, 0.9218, 0.9084, 0.9124, 0.9116, 0.9108, 0.914, 0.9144, 0.9108, 0.917], 'difficultyInL': [np.float64(0.9695238095238096), np.float64(0.9726984126984127), np.float64(0.9657142857142857), np.float64(0.966984126984127), np.float64(0.9625396825396826), np.float64(0.9644444444444444), np.float64(0.9701587301587301), np.float64(0.9682539682539683), np.float64(0.9695238095238096), np.float64(0.9765079365079365)], 'difficultyInN

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0246 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0161 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917], 'overlapInB': [0.1168], 'difficultyInA': [0.9265734265734266], 'difficultyInB': [0.8318], 'difficultyInL': [np.float64(0.976027397260274)], 'difficultyInNL': [np.float64(0.8127264492753623)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.481356)], 'ytrain_link_prop': [link    0.1168
dtype: float64], 'ytest_link_prop': [link    0.1168
dtype: float64], 'auc_link': [np.float64(1.0)], 'max_proba_linked': [0.38861066368270386], 'median_proba_linked': [0.38], 'threshold_for_links': [0.38], 'synth_pairs_prop': [0.15], 'real_pairs_prop': [np.float64(0.1448)], 'true_fdr': [np.float64(0.5535168195718655)], 'hat_fdr_us': [0.573394495412844], 'hat_fdr_prob': [np.float64(0.6113893363172962)]}}
ITERATION
2
Unique values in column 'piv1': 23
Unique values in column 'piv2': 8
Unique values in column 'piv3': 14
Unique values in column 'piv4': 8
U

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0257 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0173 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807], 'overlapInB': [0.1168, 0.1124], 'difficultyInA': [0.9265734265734266, 0.9135864135864136], 'difficultyInB': [0.8318, 0.8268], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253)], 'difficultyInNL': [np.float64(0.8127264492753623), np.float64(0.8082469580892294)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.481356), np.float64(0.4565256)], 'ytrain_link_prop': [link    0.1168
dtype: float64, link    0.1124
dtype: float64], 'ytest_link_prop': [link    0.1168
dtype: float64, link    0.1124
dtype: float64], 'auc_link': [np.float64(1.0), np.float64(1.0)], 'max_proba_linked': [0.38861066368270386, 0.38776342720823936], 'median_proba_linked': [0.38, 0.38], 'threshold_for_links': [0.38, 0.38], 'synth_pairs_prop': [

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0238 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0163 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997], 'overlapInB': [0.1168, 0.1124, 0.12], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935], 'difficultyInB': [0.8318, 0.8268, 0.836], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985)], 'difficultyInNL': [np.float64(0.8127264492753623), np.float64(0.8082469580892294), np.float64(0.8156818181818182)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.481356), np.float64(0.4565256), np.float64(0.44689200000000007)], 'ytrain_link_prop': [link    0.1168
dtype: float64, link    0.1124
dtype: float64, link    0.12
dtype: float64], 'ytest_link_prop': [link    0.1168
dtype: float64

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0255 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0166 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924)], 'difficultyInNL': [np.float64(0.8127264492753623), np.float64(0.8082469580892294), np.float64(0.8156818181818182), np.float64(0.8078573041318582)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.481356), np.float64(0.4565256), 

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0275 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0206 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402)], 'difficultyInNL': [np.float64(0.8127264492753623), np.float64(0.8082469580892294), np.float64(0.8156818181818182), np.float64(0.8078573041318582), np.float64(0.8067074552458645)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0251 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.019 in the m_probability of piv4, level `All other comparisons`
Iteration 3

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143)], 'difficultyInNL': [np.float64(0.8127264492753623), np.float64(0.8082469580892294), np.float64(0.8156818181818182), np.float64(0.8078573041318582), np.float64(0.8067074552458645), np.float64(0.8128378378378378)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: flo

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0272 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0191 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766)], 'difficultyInNL': [np.float64(0.8127264492753623), np.float64(0.8082469580892294), np.float64(0.8156818181818182), np.float64(0.8078573041318582), np.float64(0.8067074552458645), np.float64(0.8128378378378378), np.float64(0.8212593094109681)], 'ytrain_synth_prop': [synthetic    0.091206
dtyp

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0271 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0187 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213)], 'difficultyInNL': [np.float64(0.8127264492753623), np.float64(0.8082469580892294), np.float64(0.8156818181818182), np.float64(0.8078573041318582), np.float64(0.8067074552458645), np.float64(0.8128378378378

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0221 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0148 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726)], 'difficultyInNL': [np.float64(0.8127264492753623), np.float64(0.8082469580892294), np.float64(0.8156818181818182), np.

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0225 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.014 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3:

2002
85
0.35
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0206 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0129 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.021 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.0118 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0215 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0126 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0266 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0192 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0222 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0128 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0222 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0149 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0246 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0161 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0175 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0111 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  warn(


Initial accuracy is 0.6986
Iteration number 1 reached accuracy of 0.3978.


  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.023 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.016 in the m_probability of piv4, level `All other comparisons`
Iteration 3:

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0234 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0151 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
85
0.75
{0.35: {'overlapInA': [0.2917082917082917, 0.2807192807192807, 0.2997002997002997, 0.2852147852147852, 0.2932067932067932, 0.27972027972027974, 0.2842157842157842, 0.27522477522477523, 0.2922077922077922, 0.2942057942057942], 'overlapInB': [0.1168, 0.1124, 0.12, 0.1142, 0.1174, 0.112, 0.1138, 0.1102, 0.117, 0.1178], 'difficultyInA': [0.9265734265734266, 0.9135864135864136, 0.935064935064935, 0.9255744255744256, 0.9195804195804196, 0.9200799200799201, 0.9155844155844156, 0.919080919080919, 0.9250749250749251, 0.9180819180819181], 'difficultyInB': [0.8318, 0.8268, 0.836, 0.8268, 0.8256, 0.8312, 0.8386, 0.825, 0.8272, 0.8314], 'difficultyInL': [np.float64(0.976027397260274), np.float64(0.9733096085409253), np.float64(0.985), np.float64(0.9737302977232924), np.float64(0.9676320272572402), np.float64(0.9767857142857143), np.float64(0.9736379613356766), np.float64(0.9818511796733213), np.float64(0.9726495726495726), np.float64(0.9609507640067911)], 'difficultyInNL': [np.float64(

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0353 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0145 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342], 'overlapInB': [0.1338], 'difficultyInA': [0.9755244755244755], 'difficultyInB': [0.9312], 'difficultyInL': [np.float64(0.9850523168908819)], 'difficultyInNL': [np.float64(0.9228815516047102)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.503492)], 'ytrain_link_prop': [link    0.133653
dtype: float64], 'ytest_link_prop': [link    0.133946
dtype: float64], 'auc_link': [np.float64(1.0)], 'max_proba_linked': [0.7523957713121159], 'median_proba_linked': [0.75], 'threshold_for_links': [0.5], 'synth_pairs_prop': [0.052], 'real_pairs_prop': [np.float64(0.0444)], 'true_fdr': [np.float64(0.24915824915824916)], 'hat_fdr_us': [0.29180695847362514], 'hat_fdr_prob': [np.float64(0.2476042286878839)]}}
ITERATION
2
Unique values in column 'piv1': 29
Unique values in column 'piv2': 10
Unique values in column 'piv3': 18
Unique values in column 'pi

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0386 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0176 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765], 'overlapInB': [0.1338, 0.1392], 'difficultyInA': [0.9755244755244755, 0.9795204795204795], 'difficultyInB': [0.9312, 0.936], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023)], 'difficultyInNL': [np.float64(0.9228815516047102), np.float64(0.9268122676579925)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.503492), np.float64(0.47191760000000005)], 'ytrain_link_prop': [link    0.133653
dtype: float64, link    0.1392
dtype: float64], 'ytest_link_prop': [link    0.133946
dtype: float64, link    0.1392
dtype: float64], 'auc_link': [np.float64(1.0), np.float64(1.0)], 'max_proba_linked': [0.7523957713121159, 0.7193637781088537], 'median_proba_linked': [0.75, 0.71], 'threshold_for_links': [0.5, 0.5], 'synth_pair

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0363 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0128 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816], 'overlapInB': [0.1338, 0.1392, 0.1314], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715], 'difficultyInB': [0.9312, 0.936, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455)], 'difficultyInNL': [np.float64(0.9228815516047102), np.float64(0.9268122676579925), np.float64(0.9219433571264103)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.503492), np.float64(0.47191760000000005), np.float64(0.45211760000000006)], 'ytrain_link_prop': [link    0.133653
dtype: float64, link    0.1392
dtype: float64, link    0.131253
dtype: float64], 'ytest_link_prop

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0375 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0167 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862)], 'difficultyInNL': [np.float64(0.9228815516047102), np.float64(0.9268122676579925), np.float64(0.9219433571264103), np.float64(0.9238272178355783)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.503492), np.fl

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.043 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0206 in the m_probability of piv4, level `All other comparisons`
Iteration 3

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238)], 'difficultyInNL': [np.float64(0.9228815516047102), np.float64(0.9268122676579925), np.float64(0.9219433571264103), np.float64(0.9238272178355783), np.float64(0.919362292051756)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64,

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0354 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0143 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088)], 'difficultyInNL': [np.float64(0.9228815516047102), np.float64(0.9268122676579925), np.float64(0.9219433571264103), np.float64(0.9238272178355783), np.float64(0.919362292051756), np.float64(0.9290829290829291)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.037 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.017 in the m_probability of piv4, level `All other comparisons`
Iteration 3:

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716)], 'difficultyInNL': [np.float64(0.9228815516047102), np.float64(0.9268122676579925), np.float64(0.9219433571264103), np.float64(0.9238272178355783), np.float64(0.919362292051756), np.float64(0.9290829290829291), np.float64(0.9203233256351039)], 'ytrain_synth_prop': [synthetic 

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0402 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0193 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894)], 'difficultyInNL': [np.float64(0.9228815516047102), np.float64(0.9268122676579925), np.float64(0.9219433571264103), np.float64(0.9238272178355783), np.float64(0.919362292051756), np.float

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0411 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0177 in the m_probability of piv4, level `All other comparisons`
Iteration 

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932)], 'difficultyInNL': [np.float64(0.9228815516047102), np.float64(0.9268122676579925), np.float64(0.9219

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0411 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0188 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

2002
95
0.35
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0299 in probability_two_random_records_match
Iteration 2: Largest change in params was -0.0141 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3: Largest change i

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0328 in probability_two_random_records_match
Iteration 2: Largest change in params was 0.0147 in the m_probability of piv4, level `All other comparisons`
Iteration 3: Largest change 

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0287 in probability_two_random_records_match
Iteration 2: Largest change in params was 0.01 in the m_probability of piv4, level `All other comparisons`
Iteration 3: Largest change in

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0296 in probability_two_random_records_match
Iteration 2: Largest change in params was -0.0105 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3: Largest change i

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0294 in probability_two_random_records_match
Iteration 2: Largest change in params was -0.00976 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3: Largest change 

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0291 in probability_two_random_records_match
Iteration 2: Largest change in params was 0.0093 in the m_probability of piv4, level `All other comparisons`
Iteration 3: Largest change 

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0328 in probability_two_random_records_match
Iteration 2: Largest change in params was 0.0101 in the m_probability of piv4, level `All other comparisons`
Iteration 3: Largest change 

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0265 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0146 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0345 in probability_two_random_records_match
Iteration 2: Largest change in params was -0.0108 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3: Largest change i

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0272 in probability_two_random_records_match
Iteration 2: Largest change in params was 0.0119 in the m_probability of piv4, level `All other comparisons`
Iteration 3: Largest change 

2002
95
0.75
{0.35: {'overlapInA': [0.3341658341658342, 0.34765234765234765, 0.32817182817182816, 0.34665334665334663, 0.3356643356643357, 0.33516483516483514, 0.33466533466533466, 0.33266733266733267, 0.3256743256743257, 0.31718281718281716], 'overlapInB': [0.1338, 0.1392, 0.1314, 0.1388, 0.1344, 0.1342, 0.134, 0.1332, 0.1304, 0.127], 'difficultyInA': [0.9755244755244755, 0.9795204795204795, 0.9715284715284715, 0.9735264735264735, 0.9745254745254746, 0.9765234765234765, 0.968031968031968, 0.9685314685314685, 0.973026973026973, 0.971028971028971], 'difficultyInB': [0.9312, 0.936, 0.9308, 0.9328, 0.9292, 0.9378, 0.929, 0.9312, 0.933, 0.9308], 'difficultyInL': [np.float64(0.9850523168908819), np.float64(0.992816091954023), np.float64(0.989345509893455), np.float64(0.9884726224783862), np.float64(0.9925595238095238), np.float64(0.9940387481371088), np.float64(0.9850746268656716), np.float64(0.9894894894894894), np.float64(0.9877300613496932), np.float64(0.9937007874015747)], 'difficultyIn

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0328 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0228 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215], 'overlapInB': [0.2556], 'difficultyInA': [0.8513993780541982], 'difficultyInB': [0.8674], 'difficultyInL': [np.float64(0.9514866979655712)], 'difficultyInNL': [np.float64(0.838527673293928)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5260975999999999)], 'ytrain_link_prop': [link    0.2556
dtype: float64], 'ytest_link_prop': [link    0.2556
dtype: float64], 'auc_link': [np.float64(1.0)], 'max_proba_linked': [0.37544118123196574], 'median_proba_linked': [0.37], 'threshold_for_links': [0.37], 'synth_pairs_prop': [0.242], 'real_pairs_prop': [np.float64(0.29)], 'true_fdr': [np.float64(0.531524926686217)], 'hat_fdr_us': [0.4435483870967742], 'hat_fdr_prob': [np.float64(0.6245588187680342)]}}
ITERATION
2
Unique values in column 'piv1': 23
Unique values in column 'piv2': 8
Unique values in column 'piv3': 14
Unique values in column 'p

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.033 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0234 in the m_probability of piv4, level `All other comparisons`
Iteration 3

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674], 'overlapInB': [0.2556, 0.2498], 'difficultyInA': [0.8513993780541982, 0.8454020435362062], 'difficultyInB': [0.8674, 0.8616], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241)], 'difficultyInNL': [np.float64(0.838527673293928), np.float64(0.8309784057584644)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5260975999999999), np.float64(0.5082888000000001)], 'ytrain_link_prop': [link    0.2556
dtype: float64, link    0.2497
dtype: float64], 'ytest_link_prop': [link    0.2556
dtype: float64, link    0.2499
dtype: float64], 'auc_link': [np.float64(1.0), np.float64(1.0)], 'max_proba_linked': [0.37544118123196574, 0.3592257632671698], 'median_proba_linked': [0.37, 0.35], 'threshold_for_links': [0.37, 0.35], 's

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.033 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0213 in the m_probability of piv4, level `All other comparisons`
Iteration 3

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188], 'overlapInB': [0.2556, 0.2498, 0.2574], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207], 'difficultyInB': [0.8674, 0.8616, 0.8626], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471)], 'difficultyInNL': [np.float64(0.838527673293928), np.float64(0.8309784057584644), np.float64(0.8332884460005386)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5260975999999999), np.float64(0.5082888000000001), np.float64(0.49274720000000005)], 'ytrain_link_prop': [link    0.2556
dtype: float64, link    0.2497
dtype: float64, link    0.257303
dtype: float64], 'ytest_li

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0302 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0185 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345)], 'difficultyInNL': [np.float64(0.838527673293928), np.float64(0.8309784057584644), np.float64(0.8332884460005386), np.float64(0.8396967235310046)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.52609759999999

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0329 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0221 in the m_probability of piv4, level `All other comparisons`
Iteration 

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507)], 'difficultyInNL': [np.float64(0.838527673293928), np.float64(0.8309784057584644), np.float64(0.8332884460005386), np.float64(0.8396967235310046), np.float64(0.8356531049250535)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64,

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0299 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0195 in the m_probability of piv4, level `All other comparisons`
Iteration 

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754)], 'difficultyInNL': [np.float64(0.838527673293928), np.float64(0.8309784057584644), np.float64(0.8332884460005386), np.float64(0.8396967235310046), np.float64(0.8356531049250535), np.float64(0.8288117200217038)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0315 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0209 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355)], 'difficultyInNL': [np.float64(0.838527673293928), np.float64(0.8309784057584644), np.float64(0.8332884460005386), np.float64(0.8396967235310046), np.float64(0.8356531049250535), np.float64(0.8288117200217038), np.float64(0.8377133568138716)], 'ytrain_synth_prop': [syntheti

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0311 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.0197 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615)], 'difficultyInNL': [np.float64(0.838527673293928), np.float64(0.8309784057584644), np.float64(0.8332884460005386), np.float64(0.8396967235310046), np.float64(0.8356531049250535), np.float

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0321 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.022 in the m_probability of piv4, level `All other comparisons`
Iteration 3

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979)], 'difficultyInNL': [np.float64(0.838527673293928), np.float64(0.8309784057584644), np.float64(0.833

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0331 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0242 in the m_probability of piv4, level `All other comparisons`
Iteration 

4502
85
0.35
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00516 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was 0.00647 in the m_probability of piv4, level `All other comparisons`
Iterat

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00753 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00841 in the m_probability of piv4, level `Exact match on piv4`
Iteratio

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00584 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00559 in the m_probability of piv4, level `Exact match on piv4`
Iteratio

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00637 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0076 in the m_probability of piv4, level `Exact match on piv4`
Iteration

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0065 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00728 in the m_probability of piv4, level `All other comparisons`
Iteration

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00516 in the m_probability of piv5, level `All other comparisons`
Iteration 2: Largest change in params was -0.00501 in the m_probability of piv4, level `Exact match on piv4`
Iterati

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00432 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was -0.0064 in the m_probability of piv4, level `Exact match on piv4`
Iteration 3

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.00618 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00714 in the m_probability of piv4, level `All other comparisons`
Iteratio

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00731 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00741 in the m_probability of piv4, level `Exact match on piv4`
Iteratio

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.00397 in the m_probability of piv5, level `Exact match on piv5`
Iteration 2: Largest change in params was 0.00421 in the m_probability of piv4, level `All other comparisons`
Iteration

4502
85
0.75
{0.35: {'overlapInA': [0.2838738338516215, 0.27743225233229674, 0.2858729453576188, 0.29031541537094624, 0.2807641048422923, 0.2918702798756108, 0.29075966237227896, 0.2883163038649489, 0.2867614393602843, 0.2927587738782763], 'overlapInB': [0.2556, 0.2498, 0.2574, 0.2614, 0.2528, 0.2628, 0.2618, 0.2596, 0.2582, 0.2636], 'difficultyInA': [0.8513993780541982, 0.8454020435362062, 0.844735673034207, 0.8478454020435362, 0.8394047090182142, 0.8454020435362062, 0.8462905375388716, 0.8458462905375389, 0.8440693025322079, 0.8402932030208796], 'difficultyInB': [0.8674, 0.8616, 0.8626, 0.8678, 0.863, 0.8598, 0.8664, 0.875, 0.8586, 0.8672], 'difficultyInL': [np.float64(0.9514866979655712), np.float64(0.9535628502802241), np.float64(0.9471639471639471), np.float64(0.9472073450650345), np.float64(0.9438291139240507), np.float64(0.9467275494672754), np.float64(0.9472880061115355), np.float64(0.9445300462249615), np.float64(0.9434546862896979), np.float64(0.9446130500758725)], 'difficult

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0493 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0239 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855], 'overlapInB': [0.3026], 'difficultyInA': [0.9435806308307418], 'difficultyInB': [0.9468], 'difficultyInL': [np.float64(0.9781890284203569)], 'difficultyInNL': [np.float64(0.9331803842844852)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5457832)], 'ytrain_link_prop': [link    0.302521
dtype: float64], 'ytest_link_prop': [link    0.302679
dtype: float64], 'auc_link': [np.float64(0.9994266055045872)], 'max_proba_linked': [0.7326976577658454], 'median_proba_linked': [0.73], 'threshold_for_links': [0.5], 'synth_pairs_prop': [0.07], 'real_pairs_prop': [np.float64(0.094)], 'true_fdr': [np.float64(0.23701462430660616)], 'hat_fdr_us': [0.17650025214321735], 'hat_fdr_prob': [np.float64(0.2673023422341545)]}}
ITERATION
2
Unique values in column 'piv1': 30
Unique values in column 'piv2': 10
Unique values in column 'piv3': 18
Unique value

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0468 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0219 in the m_probability of piv4, level `All other comparisons`
Iteration 

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486], 'overlapInB': [0.3026, 0.3048], 'difficultyInA': [0.9435806308307418, 0.9404709018214127], 'difficultyInB': [0.9468, 0.953], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107)], 'difficultyInNL': [np.float64(0.9331803842844852), np.float64(0.942174913693901)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5457832), np.float64(0.49154319999999996)], 'ytrain_link_prop': [link    0.302521
dtype: float64, link    0.3048
dtype: float64], 'ytest_link_prop': [link    0.302679
dtype: float64, link    0.3048
dtype: float64], 'auc_link': [np.float64(0.9994266055045872), np.float64(1.0)], 'max_proba_linked': [0.7326976577658454, 0.7453075704356866], 'median_proba_linked': [0.73, 0.74], 'threshold_for_links': [0.5, 0

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.046 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0205 in the m_probability of piv4, level `All other comparisons`
Iteration 3

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867], 'overlapInB': [0.3026, 0.3048, 0.3016], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498], 'difficultyInB': [0.9468, 0.953, 0.9536], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382)], 'difficultyInNL': [np.float64(0.9331803842844852), np.float64(0.942174913693901), np.float64(0.9435853379152348)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5457832), np.float64(0.49154319999999996), np.float64(0.5067072)], 'ytrain_link_prop': [link    0.302521
dtype: float64, link    0.3048
dtype: float64, link    0.3016
dtype: float64], 'ytest_link_prop': [link    

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0479 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0214 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309)], 'difficultyInNL': [np.float64(0.9331803842844852), np.float64(0.942174913693901), np.float64(0.9435853379152348), np.float64(0.9362649596437518)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64, synthetic    0.090909
dtype: float64], 'auc_synth': [np.float64(0.5457832), np.floa

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0524 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0213 in the m_probability of piv4, level `All other comparisons`
Iteration 

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095)], 'difficultyInNL': [np.float64(0.9331803842844852), np.float64(0.942174913693901), np.float64(0.9435853379152348), np.float64(0.9362649596437518), np.float64(0.9338905775075987)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64], 'ytest_synth_prop': [synthetic    0.090909
dtype: float64, 

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0454 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.0204 in the m_probability of piv4, level `All other comparisons`
Iteration

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881)], 'difficultyInNL': [np.float64(0.9331803842844852), np.float64(0.942174913693901), np.float64(0.9435853379152348), np.float64(0.9362649596437518), np.float64(0.9338905775075987), np.float64(0.9384397163120567)], 'ytrain_synth_prop': [synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.091206
dtype: float64, synthetic    0.09

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0462 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0176 in the m_probability of piv4, level `All other comparisons`
Iteration 

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101)], 'difficultyInNL': [np.float64(0.9331803842844852), np.float64(0.942174913693901), np.float64(0.9435853379152348), np.float64(0.9362649596437518), np.float64(0.9338905775075987), np.float64(0.9384397163120567), np.float64(0.9414954337899544)], 'ytrain_synth_prop': [synthetic 

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0429 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.018 in the m_probability of piv4, level `All other comparisons`
Iteration 3

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035)], 'difficultyInNL': [np.float64(0.9331803842844852), np.float64(0.942174913693901), np.float64(0.9435853379152348), np.float64(0.9362649596437518), np.float64(0.9338905775075987), np.float64

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.047 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.0206 in the m_probability of piv4, level `All other comparisons`
Iteration 3

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667)], 'difficultyInNL': [np.float64(0.9331803842844852), np.float64(0.942174913693901), np.float64(0.9435853

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0483 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.0208 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

4502
95
0.35
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0137 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00931 in the m_probability of piv4, level `Exact match on piv4`
Iteration

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.014 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00783 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0111 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00569 in the m_probability of piv4, level `Exact match on piv4`
Iteration

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0149 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00872 in the m_probability of piv4, level `All other comparisons`
Iteration

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0104 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was -0.00758 in the m_probability of piv4, level `Exact match on piv4`
Iteration 

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0123 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00894 in the m_probability of piv4, level `Exact match on piv4`
Iteration

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.014 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00861 in the m_probability of piv4, level `All other comparisons`
Iteration 

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was -0.0112 in the m_probability of piv4, level `Exact match on piv4`
Iteration 2: Largest change in params was 0.00739 in the m_probability of piv4, level `All other comparisons`
Iteration

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0091 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was 0.00434 in the m_probability of piv4, level `All other comparisons`
Iteratio

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

  if self.factor_cols[j]:


2500.0
RangeIndex(start=0, stop=5000, step=1)


----- Estimating u probabilities using random sampling -----

Estimated u probabilities using random sampling

Your model is not yet fully trained. Missing estimates for:
    - piv1 (no m values are trained).
    - piv2 (no m values are trained).
    - piv3 (no m values are trained).
    - piv4 (no m values are trained).
    - piv5 (no m values are trained).

----- Starting EM training session -----

Estimating the m probabilities of the model by blocking on:
(l."piv1" = r."piv1") AND (l."piv2" = r."piv2") AND (l."piv3" = r."piv3")

Parameter estimates will be made for the following comparison(s):
    - piv4
    - piv5

Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: 
    - piv1
    - piv2
    - piv3

Iteration 1: Largest change in params was 0.0129 in the m_probability of piv4, level `All other comparisons`
Iteration 2: Largest change in params was -0.00921 in the m_probability of piv4, level `Exact match on piv4`
Iteration

4502
95
0.75
{0.35: {'overlapInA': [0.33607285650821855, 0.3385162150155486, 0.3349622390048867, 0.3125277654375833, 0.23367392270102177, 0.3276321634828965, 0.33229675699689026, 0.32207907596623725, 0.33318525099955576, 0.3302976454908929], 'overlapInB': [0.3026, 0.3048, 0.3016, 0.2814, 0.2104, 0.295, 0.2992, 0.29, 0.3, 0.2974], 'difficultyInA': [0.9435806308307418, 0.9404709018214127, 0.9375832963127498, 0.938249666814749, 0.9369169258107508, 0.9386939138160817, 0.9433585073300755, 0.9369169258107508, 0.9420257663260773, 0.9415815193247445], 'difficultyInB': [0.9468, 0.953, 0.9536, 0.9482, 0.9444, 0.9492, 0.9528, 0.9458, 0.9524, 0.946], 'difficultyInL': [np.float64(0.9781890284203569), np.float64(0.9776902887139107), np.float64(0.976790450928382), np.float64(0.9786780383795309), np.float64(0.9838403041825095), np.float64(0.9749152542372881), np.float64(0.9792780748663101), np.float64(0.9751724137931035), np.float64(0.9766666666666667), np.float64(0.9737726967047747)], 'difficultyInNL

In [5]:
print("done")

done


In [6]:
for linkage in dico_results_final.keys():

    first_dico = dico_results_final[linkage]
    
    for nA in first_dico.keys():

        second_dico = first_dico[nA]
    
        for list_params_key in second_dico.keys():
        
            third_dico = second_dico[list_params_key]
    
            for overlap in third_dico.keys():

                fourth_dico = third_dico[overlap]

                print(len(np.nonzero(fourth_dico["true_fdr"])[0]))

                print(linkage)
                print(nA)
                print(list_params_key)
                print(overlap)
        
                print(f"overlap A: {np.mean(fourth_dico["overlapInA"])}")
                print(f"difficulty A: {np.mean(fourth_dico["difficultyInA"])}")
                print(f"overlap B: {np.mean(fourth_dico["overlapInB"])}")
                print(f"difficulty B: {np.mean(fourth_dico["difficultyInB"])}")
        
                print(f"difficulty Links: {np.mean(fourth_dico["difficultyInL"])}")
        
                print(f"difficulty Non Links: {np.mean(fourth_dico["difficultyInNL"])}")

                print(np.mean(np.array([np.mean(fourth_dico["difficultyInNL"]),np.mean(fourth_dico["difficultyInL"])])))
                print(np.mean(np.array([np.mean(fourth_dico["difficultyInA"]),np.mean(fourth_dico["difficultyInB"])])))
        
                print(f"threshold max: {np.mean(fourth_dico["max_proba_linked"])}")
                print(f"threshold median: {np.mean(fourth_dico["median_proba_linked"])}")
                print(f"threshold applied: {np.mean(fourth_dico["threshold_for_links"])}")
        
                print(f"auc synth: {np.mean(fourth_dico["auc_synth"])} and {np.std(fourth_dico["auc_synth"])}")
                print(f"auc link: {np.mean(fourth_dico["auc_link"])} and {np.std(fourth_dico["auc_link"])}")
            
                cond_synthfpprop = np.array(fourth_dico["synth_pairs_prop"]) / nA
                cond_realfpprop = np.array(fourth_dico["real_pairs_prop"]) / nA
        
                equation_check = cond_synthfpprop - cond_realfpprop
        
                print(f"condition: {np.mean(equation_check)} and {np.std(equation_check)}")
        
                FDP = np.array(fourth_dico["true_fdr"])
            
                our_estimate = np.array(fourth_dico["hat_fdr_us"])
            
                probabilistic_estimate = np.array(fourth_dico["hat_fdr_prob"])
        
                print(f"True FDP: {np.mean(FDP[np.nonzero(FDP)[0]])} and {np.std(FDP[np.nonzero(FDP)[0]])}")
                print(f"bias our FDP: {np.mean(our_estimate[np.nonzero(FDP)[0]] - FDP[np.nonzero(FDP)[0]])} and {np.std(our_estimate[np.nonzero(FDP)[0]] - FDP[np.nonzero(FDP)[0]])}")
                print(f"bias other FDP: {np.mean(probabilistic_estimate[np.nonzero(FDP)[0]] - FDP[np.nonzero(FDP)[0]])} and {np.std(probabilistic_estimate[np.nonzero(FDP)[0]] - FDP[np.nonzero(FDP)[0]])}")
        
                print("\n")
        
                # plt.show(plt.hist(our_estimate - FDP))
                # plt.show(plt.hist(probabilistic_estimate - FDP))

10
at_random
2002
85
0.35
overlap A: 0.34965034965034963
difficulty A: 0.8995504495504495
overlap B: 0.14000000000000004
difficulty B: 0.78682
difficulty Links: 0.9640000000000001
difficulty Non Links: 0.8101627906976745
0.8870813953488372
0.8431852247752247
threshold max: 0.4084039851448347
threshold median: 0.403
threshold applied: 0.403
auc synth: 0.49541752 and 0.023479207529931653
auc link: 0.49846126245847183 and 0.008095058921653684
condition: 2.6073926073926034e-06 and 1.4636332918295081e-05
True FDP: 0.6160486678783491 and 0.007745753666594387
bias our FDP: 0.015136488168373163 and 0.08082513150700617
bias other FDP: -0.024452653023183814 and 0.010921704673069398


10
at_random
2002
85
0.75
overlap A: 0.7497502497502497
difficulty A: 0.9023476523476524
overlap B: 0.3002000000000001
difficulty B: 0.7906
difficulty Links: 0.9231179213857427
difficulty Non Links: 0.8425835953129466
0.8828507583493446
0.8464738261738262
threshold max: 0.4447883718451494
threshold median: 0.4390000