In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import fedci

No GPU automatically detected. Setting SETTINGS.GPU to 0, and SETTINGS.NJOBS to cpu_count.
INFO:rpy2.situation:cffi mode is CFFI_MODE.ANY
INFO:rpy2.situation:R home found: /opt/homebrew/Caskroom/miniforge/base/envs/promotion/lib/R
INFO:rpy2.situation:R library path: 
INFO:rpy2.situation:LD_LIBRARY_PATH: 
INFO:rpy2.rinterface_lib.embedded:Default options to initialize R: rpy2, --quiet, --no-save
INFO:rpy2.rinterface_lib.embedded:R is already initialized. No need to initialize.


In [3]:
import polars as pl
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri

In [4]:
import numpy as np
from tqdm.notebook import tqdm
from itertools import chain, combinations
import os

In [5]:
from scipy import stats
from pgmpy.estimators import CITests

In [130]:
class EmptyLikelihoodRatioTest(fedci.LikelihoodRatioTest):
    def __init__(self, y_label, x_label, s_labels, p_val):
        self.y_label = y_label
        self.x_label = x_label
        self.s_labels = s_labels
        self.p_val = p_val
        
class CategoricalLikelihoodRatioTest(fedci.LikelihoodRatioTest):
    def __init__(self, y_label, t0s, t1s, num_cats):
        assert len(t0s) > 0
        assert len(t1s) > 0
        assert len(t0s[0].X_labels) + 1 == len(t1s[0].X_labels)
        # TODO: assert more data integrity
        #assert t0s[0].y_label == t1s[0].y_label
        
        self.y_label = y_label
        self.x_label = (set(t1s[0].X_labels) - set(t0s[0].X_labels)).pop()
        self.s_labels = t0s[0].X_labels
        self.p_val = self._run_likelihood_test(t0s, t1s, num_cats)
        self.p_val = round(self.p_val, 4)
        
    def _run_likelihood_test(self, t0s, t1s, num_cats):
        
        # t1 should always encompass more regressors -> less client can fulfill this
        #assert len(self.t1.providing_clients) < len(self.t0.providing_clients)
        
        providing_clients = t1s[0].providing_clients
        
        t0_llf = sum([t.get_fit_stats(providing_clients)['llf'] for t in t0s])
        t1_llf = sum([t.get_fit_stats(providing_clients)['llf'] for t in t1s])
        
        # d_y = num cats
        # DOF Z = size cond set
        # DOF X = 1
        t0_dof = (num_cats-1)*(len(self.s_labels)+1) # (d_y - 1)*(DOF(Z)+1)
        t1_dof = (num_cats-1)*(len(self.s_labels)+2) # (d_y - 1)*(DOF(Z)+DOF(X)+1)
        t = -2*(t0_llf - t1_llf)
        
        p_val = stats.chi2.sf(t, t1_dof-t0_dof)
        
        return p_val

In [7]:
TOTAL_SAMPLES = 1_000

TOTAL_FEATURES = 4
FEATURES_PER_CLIENT = 4

possible_dags = [
    "pdsep_g",
    "collider",
    "fork",
    "chain4",
    "descColl",
    "2descColl",
    "iv"
]

# TODO: possible_dags to dict or at least store num of vars for each one
chosen_dag = possible_dags[3]


server_id_pattern = 'dag_{}_{}c'

client_configurations = [1,3, 5]

max_regressors = None


alpha_comparisons = [0.01, 0.05, 0.1]
equality_tolerance = 1e-4


log_filepattern = './log-{}.csv'


In [8]:
def get_sample_data(dag_type, num_samples, num_vars):
    with (ro.default_converter + pandas2ri.converter).context():
        ro.r['source']('./app/scripts/example_data.r')
        get_example_data_f = ro.globalenv['get_example_data']

        result = get_example_data_f(dag_type, 1, num_samples, num_vars)
        
    return list(result.items())[0][1]

In [9]:
def get_servers(client_configurations, data):
    servers = {}    

    for splits in client_configurations:
        clients = {i:fedci.Client(pl.from_pandas(chunk)) for i,chunk in enumerate(np.array_split(data.to_pandas(), splits))}
        servers[server_id_pattern.format(chosen_dag, splits)] = fedci.Server(clients, max_regressors=max_regressors)
    return servers

In [10]:
def get_possible_tests(available_data):

    possible_tests = []
    max_conditioning_set_size = min(len(available_data), max_regressors) if max_regressors is not None else len(available_data)

    for y_var in available_data:
        set_of_regressors = available_data - {y_var}
        for x_var in set_of_regressors:
            set_of_conditioning_variables = set_of_regressors - {x_var}
            conditioning_sets = chain.from_iterable(combinations(set_of_conditioning_variables, r) for r in range(0,max_conditioning_set_size))
            possible_tests.extend([(y_var, x_var, sorted(list(s_labels))) for s_labels in conditioning_sets])
            
    return possible_tests


In [11]:
import polars.selectors as cs

In [12]:
from pycit import citest

In [32]:
def test_mixed_independence(continuous, categorical):
    # ANOVA
    categories = np.unique(categorical)
    groups = [continuous[categorical == category] for category in categories]
    f_statistic, p_value = stats.f_oneway(*groups)
    #print(f"ANOVA F-statistic: {f_statistic}, p-value: {p_value}")

    # If categorical is binary, you can also use point-biserial correlation
    #if len(categories) == 2:
    #    point_biserial_corr, p_value = stats.pointbiserialr(categorical, continuous)
    #    print(f"Point-biserial correlation: {point_biserial_corr}, p-value: {p_value}")
    return p_value

In [13]:
def get_ground_truth_tests(data, possible_tests):  
    ground_truth_tests = []

    for test in possible_tests:
        if len(test[2]) > 0:
            if data.schema[test[0]] == pl.String and data.schema[test[1]] == pl.String:
                #print('A')
                X = data[test[0]].to_numpy()
                Y = data[test[1]].to_numpy()
                Z = data[test[2]].to_numpy()
                pvalue = citest(X, Y, Z, test_args={'statistic': 'mixed_cmi', 'n_jobs': 8})
            elif data.schema[test[0]] == pl.String and data.schema[test[1]] == pl.Float64:
                #print('B')
                X = data[test[0]].to_numpy()
                Y = data[test[1]].to_numpy()
                Z = data[test[2]].to_numpy()
                pvalue = citest(X, Y, Z, test_args={'statistic': 'mixed_cmi', 'n_jobs': 8})
            elif data.schema[test[0]] == pl.Float64 and data.schema[test[1]] == pl.String:
                #print('C')
                X = data[test[0]].to_numpy()
                Y = data[test[1]].to_numpy()
                Z = data[test[2]].to_numpy()
                pvalue = citest(X, Y, Z, test_args={'statistic': 'mixed_cmi', 'n_jobs': 8})
            elif data.schema[test[0]] == pl.Float64 and data.schema[test[1]] == pl.Float64:
                #print('D')
                _, pvalue = CITests.pearsonr(test[1], test[0], list(test[2]), data.cast(pl.Float64).to_pandas(), boolean=False)
            else:
                assert False, 'no fitting test'
        else:
            if data.schema[test[0]] == pl.String and data.schema[test[1]] == pl.String:
                print('CAT TO CAT WO COND SET')
            elif data.schema[test[0]] == pl.String and data.schema[test[1]] == pl.Float64:
                #print('E')
                X = data[test[0]].to_numpy()
                Y = data[test[1]].to_numpy().astype(float)
                pvalue = test_mixed_independence(Y, X)
            elif data.schema[test[0]] == pl.Float64 and data.schema[test[1]] == pl.String:
                #print('F')
                X = data[test[0]].to_numpy().astype(float)
                Y = data[test[1]].to_numpy()
                pvalue = test_mixed_independence(X, Y)
            elif data.schema[test[0]] == pl.Float64 and data.schema[test[1]] == pl.Float64:
                #print('G')
                v0 = data[test[0]]
                v1 = data[test[1]]
                _, pvalue = stats.pearsonr(v0, v1)
            else:
                assert False, 'no fitting test w/o conditiong set'
        pvalue = round(pvalue,4)

        #print(test, pvalue)
                
        ground_truth_tests.append(EmptyLikelihoodRatioTest(test[0], test[1], list(test[2]), pvalue))
    return ground_truth_tests
# TODO: with and without conditioning set

In [82]:
def get_ground_truth_tests(data, possible_tests):    
    ground_truth_tests = []

    for test in possible_tests:
        print(test)

        if len(test[2]) > 0:
            X = data[test[0]].to_numpy()
            Y = data[test[1]].to_numpy()
            Z = data[test[2]].to_numpy()
            pvalue = citest(X, Y, Z, test_args={'statistic': 'mixed_cmi', 'n_jobs': 2})
        else:
            X = data[test[0]].to_numpy()
            Y = data[test[1]].to_numpy().astype(float)
            pvalue = test_mixed_independence(X, Y)

        pvalue = round(pvalue,4)
        
        ground_truth_tests.append(EmptyLikelihoodRatioTest(test[0], test[1], list(test[2]), pvalue))
    return ground_truth_tests

In [58]:
def get_ground_truth_tests(data, possible_tests):
    ground_truth_tests = []

    for test in possible_tests:
        if len(test[2]) > 0:
            #v0 = data[test[0]].values
            #v1 = data[test[1]].values
            #s = data[list(test[2])].values
            #p0 = test[3]
            #p1 = citest(v0, v1, s, test_args={'statistic': 'ksg_cmi', 'n_jobs': 8})
            
            _, p1 = CITests.pearsonr(test[1], test[0], list(test[2]), data.cast(pl.Float64).to_pandas(), boolean=False)
        else:
            
            #dummied_data = data.to_dummies(cs.string(), separator='__cat__', drop_first=True).cast(pl.Float64).to_pandas()
            #v0 = data[test[0]].cast(pl.Float64).to_pandas()
            #v1 = data[test[1]].cast(pl.Float64).to_pandas()
            
            d0 = data[test[0]]
            d1 = data[test[1]]

            
            #v0 = d0.to_dummies(cs.string(), separator='__cat__', drop_first=True).cast(pl.Float64).to_pandas()
            #v1 = d1.to_dummies(cs.string(), separator='__cat__', drop_first=True).cast(pl.Float64).to_pandas()
            
            v0 = d0.to_dummies(separator='__cat__', drop_first=True).cast(pl.Float64).to_pandas()
            v1 = d1.to_dummies(separator='__cat__', drop_first=True).cast(pl.Float64).to_pandas()
            
            
            _, p1 = stats.pearsonr(v0, v1)
            
        p1 = round(p1,4)
        
        ground_truth_tests.append(EmptyLikelihoodRatioTest(test[0], test[1], list(test[2]), p1))
    return ground_truth_tests

In [131]:
def group_categorical_likelihood_tests(tests, category_expressions, reversed_category_expressions):
    #category_expressions = servers['dag_chain4_1c'].category_expressions
    #reversed_category_expressions = servers['dag_chain4_1c'].reversed_category_expressions
    #tests = server_ci_tests['dag_chain4_1c']

    updated_tests = []
    for test in tests:
        if test.y_label not in reversed_category_expressions:
            updated_tests.append(test)
            continue
        
        category_label = reversed_category_expressions[test.y_label]
        
        # Only run if the current test is the first category. This avoids duplicate tests
        if category_expressions[category_label][0] != test.y_label:
            continue
        
        categorical_test_group = []
        for test_lookup in tests:
            if test_lookup.y_label in category_expressions[category_label] and test_lookup.x_label == test.x_label and sorted(test_lookup.s_labels) == sorted(test.s_labels):
                categorical_test_group.append(test_lookup)
                
        lrt = CategoricalLikelihoodRatioTest(category_label, [t.t0 for t in categorical_test_group], [t.t1 for t in categorical_test_group], len(category_expressions[category_label]))
        updated_tests.append(lrt)
        
    return updated_tests


def get_server_test_results(servers):
    testing_rounds = {k:v.testing_engine.finished_rounds for k,v in servers.items()}
    likelihood_tests = {k:fedci.get_likelihood_tests(v) for k,v in testing_rounds.items()}
    # fix up categorical tests
    likelihood_tests = {k:group_categorical_likelihood_tests(v, servers[k].category_expressions, servers[k].reversed_category_expressions) for k,v in likelihood_tests.items()}
    return likelihood_tests

In [42]:
def prepare_server_evaluation(ground_truth_tests, server_ci_tests):
    p_value_comparison = {k:[] for k in server_ci_tests.keys()}
    missing_test = {k:0 for k in server_ci_tests.keys()}
    
    for test in ground_truth_tests:
        for k in server_ci_tests.keys():
            matching_test = [t for t in server_ci_tests[k] if t.y_label == test.y_label and t.x_label == test.x_label and sorted(t.s_labels) == sorted(test.s_labels)]
            if len(matching_test) == 0:
                print(f'No matching test in {k} for {test}')
                missing_test[k] += 1
                continue
            assert len(matching_test) == 1
            matching_test = matching_test[0]          
            p_value_comparison[k].append((matching_test.p_val, test.p_val))
        
    missing_test = {k:(v+1)/(len(server_ci_tests[k])+1) for k,v in missing_test.items()}
    return p_value_comparison, missing_test

In [16]:
def count_correct_alpha_thresholdings(data, alpha):
    c = sum([1 for a,b in data if (a < alpha and b < alpha) or (a > alpha and b > alpha)]) / len(data)
    return c

def count_correct_pval(data, tolerance=1e-4):
    c = sum([1 for a,b in data if abs(a-b)<tolerance]) / len(data)
    return c

def evaluate_results(p_value_comparison, alphas, tolerance):
    result_alpha = {}
    result_equality = {}
    for k,v in p_value_comparison.items():
        result_alpha[k] = {}
        result_equality[k] = count_correct_pval(v, tolerance)
        for alpha in alphas:
            result_alpha[k][alpha] = count_correct_alpha_thresholdings(v,alpha)
    return result_alpha, result_equality

In [17]:
def get_records(servers, alpha_tests, equality_tests, missed_tests, total_features, features_per_client):
    results = []
    for server_id in servers.keys():
        server = servers[server_id]
        alpha_test = alpha_tests[server_id]
        
        r = {
            'chosen_dag': chosen_dag,
            'num_clients': len(server.clients),
            'num_samples': TOTAL_SAMPLES,
            'same_p_val': equality_tests[server_id],
            'missed_tests': missed_tests[server_id],
            'total_features': total_features,
            'features_per_client': features_per_client
        }
        for alpha, alpha_result in alpha_test.items():
            r[f'correctness_alpha_{alpha}'] = alpha_result
        results.append(r)

    return results

In [18]:
def csv_add_row(data, file):
    with open(file, 'a') as f:
        row = ','.join([str(d) for d in data]) + '\n'
        f.write(row)
            

def write_records(i, file, data):
    if len(data) == 0:
        return
    curr_file = file.format(i)
    if not os.path.exists(curr_file):
        csv_add_row(list(data[0].keys()), curr_file)
    for entry in data:
        csv_add_row(entry.values(), curr_file)

In [19]:
import polars.selectors as cs

In [35]:
def process(i):
    print('Step 1/6 --> Setup')
    data = pl.read_parquet('./fedci/testdata.parquet')#.to_dummies(cs.categorical()).cast(pl.Int32).to_pandas()
    servers = get_servers(client_configurations, data)

    print('Step 2/6 --> Run Tests')
    for server in servers.values(): server.run_tests()

    print('Step 3/6 --> Collect Results')
    possible_tests = get_possible_tests(set(data.columns))
    ground_truth_tests = get_ground_truth_tests(data, possible_tests)
    server_ci_tests = get_server_test_results(servers) 

    print('Step 4/6 --> Prepare Evaluation')
    p_val_comparisons, missed_tests = prepare_server_evaluation(ground_truth_tests, server_ci_tests)

    print('Step 5/6 --> Run Evaluation')
    alpha_tests, equality_tests = evaluate_results(p_val_comparisons, alpha_comparisons, equality_tolerance)

    print('Step 6/6 --> Log Results')
    records = get_records(servers, alpha_tests, equality_tests, missed_tests, TOTAL_FEATURES, FEATURES_PER_CLIENT)
    
    write_records(i, log_filepattern, records)

In [41]:
for i in range(1):
    process(1)

Step 1/6 --> Setup
Step 2/6 --> Run Tests
Step 3/6 --> Collect Results
Step 4/6 --> Prepare Evaluation
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: B, x: C, S: [], p: 0.0
No matching test in dag_chain4_3c for LikelihoodRatioTest - y: B, x: C, S: [], p: 0.0
No matching test in dag_chain4_5c for LikelihoodRatioTest - y: B, x: C, S: [], p: 0.0
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: B, x: C, S: ['A'], p: 0.0
No matching test in dag_chain4_3c for LikelihoodRatioTest - y: B, x: C, S: ['A'], p: 0.0
No matching test in dag_chain4_5c for LikelihoodRatioTest - y: B, x: C, S: ['A'], p: 0.0
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: B, x: A, S: [], p: 0.2867
No matching test in dag_chain4_3c for LikelihoodRatioTest - y: B, x: A, S: [], p: 0.2867
No matching test in dag_chain4_5c for LikelihoodRatioTest - y: B, x: A, S: [], p: 0.2867
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: B, x: A, S: ['C'], p: 0.787
No matching te

ZeroDivisionError: division by zero

In [64]:
# MANUAL RUN

In [137]:
data = pl.read_parquet('./fedci/testdata.parquet')#.to_dummies(cs.string()).with_columns((~cs.float()).cast(pl.Int16))
servers = get_servers([1], data)

for server in servers.values(): server.run_tests()

possible_tests = get_possible_tests(set(data.columns))
ground_truth_tests = get_ground_truth_tests(data, possible_tests)
server_ci_tests = get_server_test_results(servers) 



'DataFrame.swapaxes' is deprecated and will be removed in a future version. Please use 'DataFrame.transpose' instead.


In [138]:
servers['dag_chain4_1c'].testing_engine.finished_rounds

[TestingRound - y: B__cat__1, X: [], total samples: 10000, beta: [0.5], current iteration: 3, current deviance: 2500.0, relative deviance change: 0.0, llf: -7257.913526447273, rss: 2500.0,
 TestingRound - y: B__cat__2, X: [], total samples: 10000, beta: [0.5], current iteration: 3, current deviance: 2500.0, relative deviance change: 0.0, llf: -7257.913526447273, rss: 2500.0,
 TestingRound - y: C, X: [], total samples: 10000, beta: [-0.1984008], current iteration: 3, current deviance: 10572.304410345243, relative deviance change: 0.0, llf: -14467.64881879784, rss: 10572.304410345221,
 TestingRound - y: A, X: [], total samples: 10000, beta: [-0.01384051], current iteration: 3, current deviance: 10204.25968181569, relative deviance change: 0.0, llf: -14290.486111962866, rss: 10204.25968181565,
 TestingRound - y: B__cat__1, X: ['C'], total samples: 10000, beta: [0.5156922  0.07909343], current iteration: 3, current deviance: 2433.862080154411, relative deviance change: 0.0, llf: -7123.8565

In [139]:
server_ci_tests

{'dag_chain4_1c': [LikelihoodRatioTest - y: B, x: C, S: [], p: 0.0,
  LikelihoodRatioTest - y: B, x: A, S: [], p: 0.1318,
  LikelihoodRatioTest - y: C, x: B__cat__2, S: [], p: 0.0,
  LikelihoodRatioTest - y: C, x: A, S: [], p: 0.0,
  LikelihoodRatioTest - y: A, x: B__cat__2, S: [], p: 0.2866,
  LikelihoodRatioTest - y: A, x: C, S: [], p: 0.0,
  LikelihoodRatioTest - y: B, x: C, S: ['A'], p: 0.0,
  LikelihoodRatioTest - y: B, x: A, S: ['C'], p: 0.0,
  LikelihoodRatioTest - y: C, x: B__cat__2, S: ['A'], p: 0.0,
  LikelihoodRatioTest - y: C, x: A, S: ['B__cat__2'], p: 0.0,
  LikelihoodRatioTest - y: A, x: C, S: ['B__cat__2'], p: 0.0,
  LikelihoodRatioTest - y: A, x: B__cat__2, S: ['C'], p: 0.001]}

In [140]:
ground_truth_tests

[LikelihoodRatioTest - y: B, x: C, S: [], p: 0.0,
 LikelihoodRatioTest - y: B, x: C, S: ['A'], p: 0.0,
 LikelihoodRatioTest - y: B, x: A, S: [], p: 0.2867,
 LikelihoodRatioTest - y: B, x: A, S: ['C'], p: 0.781,
 LikelihoodRatioTest - y: C, x: B, S: [], p: 0.0,
 LikelihoodRatioTest - y: C, x: B, S: ['A'], p: 0.0,
 LikelihoodRatioTest - y: C, x: A, S: [], p: 0.0,
 LikelihoodRatioTest - y: C, x: A, S: ['B'], p: 0.0,
 LikelihoodRatioTest - y: A, x: B, S: [], p: 0.2867,
 LikelihoodRatioTest - y: A, x: B, S: ['C'], p: 0.335,
 LikelihoodRatioTest - y: A, x: C, S: [], p: 0.0,
 LikelihoodRatioTest - y: A, x: C, S: ['B'], p: 0.0]

In [103]:

p_val_comparisons, missed_tests = prepare_server_evaluation(ground_truth_tests, server_ci_tests)

alpha_tests, equality_tests = evaluate_results(p_val_comparisons, alpha_comparisons, equality_tolerance)

records = get_records(servers, alpha_tests, equality_tests, missed_tests, TOTAL_FEATURES, FEATURES_PER_CLIENT)

#write_records(99, log_filepattern, records)

No matching test in dag_chain4_1c for LikelihoodRatioTest - y: B, x: C, S: [], p: 0.0
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: B, x: C, S: ['A'], p: 0.0
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: B, x: A, S: [], p: 0.2867
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: B, x: A, S: ['C'], p: 0.791
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: C, x: B, S: [], p: 0.0
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: C, x: B, S: ['A'], p: 0.0
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: C, x: A, S: ['B'], p: 0.0
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: A, x: B, S: [], p: 0.2867
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: A, x: B, S: ['C'], p: 0.366
No matching test in dag_chain4_1c for LikelihoodRatioTest - y: A, x: C, S: ['B'], p: 0.0


In [104]:
records

[{'chosen_dag': 'chain4',
  'num_clients': 1,
  'num_samples': 1000,
  'same_p_val': 1.0,
  'missed_tests': 0.6470588235294118,
  'total_features': 4,
  'features_per_client': 4,
  'correctness_alpha_0.01': 1.0,
  'correctness_alpha_0.05': 1.0,
  'correctness_alpha_0.1': 1.0}]

In [66]:
server_ci_tests

{'dag_chain4_1c': [LikelihoodRatioTest - y: C, x: B, S: [], p: 0.0,
  LikelihoodRatioTest - y: C, x: A, S: [], p: 0.0,
  LikelihoodRatioTest - y: B, x: C, S: [], p: 0.0,
  LikelihoodRatioTest - y: B, x: A, S: [], p: 0.2132,
  LikelihoodRatioTest - y: A, x: C, S: [], p: 0.0,
  LikelihoodRatioTest - y: A, x: B, S: [], p: 0.2132,
  LikelihoodRatioTest - y: C, x: B, S: ['A'], p: 0.0,
  LikelihoodRatioTest - y: C, x: A, S: ['B'], p: 0.0,
  LikelihoodRatioTest - y: B, x: C, S: ['A'], p: 0.0,
  LikelihoodRatioTest - y: B, x: A, S: ['C'], p: 0.0072,
  LikelihoodRatioTest - y: A, x: B, S: ['C'], p: 0.0072,
  LikelihoodRatioTest - y: A, x: C, S: ['B'], p: 0.0],
 'dag_chain4_3c': [LikelihoodRatioTest - y: C, x: B, S: [], p: 0.0,
  LikelihoodRatioTest - y: C, x: A, S: [], p: 0.0,
  LikelihoodRatioTest - y: B, x: C, S: [], p: 0.0,
  LikelihoodRatioTest - y: B, x: A, S: [], p: 0.6634,
  LikelihoodRatioTest - y: A, x: C, S: [], p: 0.0,
  LikelihoodRatioTest - y: A, x: B, S: [], p: 0.6634,
  Likelihoo