In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import fedci

In [3]:
import polars as pl
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri

In [4]:
import numpy as np
from tqdm.notebook import tqdm
from itertools import chain, combinations
import os

In [5]:
from scipy import stats
from pgmpy.estimators import CITests
import random

In [469]:
class EmptyLikelihoodRatioTest(fedci.LikelihoodRatioTest):
    def __init__(self, y_label, x_label, s_labels, p_val):
        self.y_label = y_label
        self.x_label = x_label
        self.s_labels = s_labels
        self.p_val = p_val
        
class CategoricalLikelihoodRatioTest(fedci.LikelihoodRatioTest):
    def __init__(self, y_label, t0s, t1s, num_cats):
        assert len(t0s) > 0
        assert len(t1s) > 0
        assert len(t0s[0].X_labels) + 1 == len(t1s[0].X_labels)
        # TODO: assert more data integrity
        #assert t0s[0].y_label == t1s[0].y_label
        
        self.t0 = t0s
        self.t1 = t1s
        
        self.y_label = y_label
        self.x_label = (set(t1s[0].X_labels) - set(t0s[0].X_labels)).pop()
        self.s_labels = t0s[0].X_labels
        self.t0_params = len(t0s[0].beta)
        self.t1_params = len(t1s[0].beta)
        self.p_val = self._run_likelihood_test(t0s, t1s, num_cats)
        self.p_val = round(self.p_val, 4)
        
    def _run_likelihood_test(self, t0s, t1s, num_cats):
        
        # t1 should always encompass more regressors -> less client can fulfill this
        #assert len(self.t1.providing_clients) < len(self.t0.providing_clients)
        
        providing_clients = t1s[0].providing_clients

        t0_llf = sum([t.get_fit_stats(providing_clients)['llf'] for t in t0s])
        t1_llf = sum([t.get_fit_stats(providing_clients)['llf'] for t in t1s])
        
        # d_y = num cats
        # DOF Z = size cond set
        # DOF X = 1
        t0_dof = (num_cats-1)*self.t0_params # (d_y - 1)*(DOF(Z)+1)
        t1_dof = (num_cats-1)*self.t1_params # (d_y - 1)*(DOF(Z)+DOF(X)+1)
        t = -2*(t0_llf - t1_llf)
        
        p_val = stats.chi2.sf(t, t1_dof-t0_dof)
        
        # print(f'Regressing {self.y_label} ~ {self.x_label} + {self.s_labels}')
        # print(f'T0 params: {self.t0_params}, T1 params: {self.t1_params}, Num Categories: {num_cats}')
        # print(f'T0 llf: {t0_llf}, T1 llf: {t1_llf}')
        # print(f'DOF M0: {t0_dof}, DOF M1: {t1_dof} -> Test DOF = {t1_dof-t0_dof}')
        # print(f'Test Statistic: {t}, p val: {p_val}')
        
        return p_val
    
class OrdinalLikelihoodRatioTest(fedci.LikelihoodRatioTest):
    def __init__(self, y_label, t0s, t1s, num_cats):
        assert len(t0s) > 0
        assert len(t1s) > 0
        #assert len(t0s) == len(t1s)
        assert len(t0s[0].X_labels) + 1 == len(t1s[0].X_labels)
        # TODO: assert more data integrity
        #assert t0s[0].y_label == t1s[0].y_label
        
        t0s = sorted(t0s, key=lambda x: int(x.y_label.split('__ord__')[-1]))
        t1s = sorted(t1s, key=lambda x: int(x.y_label.split('__ord__')[-1]))
        
        self.y_label = y_label
        self.x_label = (set(t1s[0].X_labels) - set(t0s[0].X_labels)).pop()
        self.s_labels = t0s[0].X_labels
        self.t0_params = len(t0s[0].beta)
        self.t1_params = len(t1s[0].beta)
        self.p_val = self._run_likelihood_test(t0s, t1s, num_cats)
        self.p_val = round(self.p_val, 4)
        
    def _run_likelihood_test(self, t0s, t1s, num_cats):
        
        # t1 should always encompass more regressors -> less client can fulfill this
        #assert len(self.t1.providing_clients) < len(self.t0.providing_clients)
        
        providing_clients = t1s[0].providing_clients
        
        t0_llf = sum([t.get_fit_stats(providing_clients)['llf'] for t in t0s])
        t1_llf = sum([t.get_fit_stats(providing_clients)['llf'] for t in t1s])
        
        # d_y = num cats
        # DOF Z = size cond set
        # DOF X = 1
        t0_dof = (num_cats-1)*self.t0_params # (d_y - 1)*(DOF(Z)+1)
        t1_dof = (num_cats-1)*self.t1_params # (d_y - 1)*(DOF(Z)+DOF(X)+1)
        t = -2*(t0_llf - t1_llf)
        
        p_val = stats.chi2.sf(t, t1_dof-t0_dof)
        
        return p_val

In [7]:
server_id_pattern = '{}_{}c'

max_regressors = 0#None

alpha_comparisons = [0.01, 0.05, 0.1]
equality_tolerance = 1e-4


log_filepattern = 'log-{}.ndjson'


In [8]:
real_independence_tests_collider = [
    EmptyLikelihoodRatioTest('A', 'B', [], 1),
    EmptyLikelihoodRatioTest('A', 'C', [], 0),
    EmptyLikelihoodRatioTest('B', 'C', [], 0),
    EmptyLikelihoodRatioTest('A', 'B', ['C'], 0),
    EmptyLikelihoodRatioTest('A', 'C', ['B'], 0),
    EmptyLikelihoodRatioTest('B', 'C', ['A'], 0),
]

real_independence_tests_fork = [
    EmptyLikelihoodRatioTest('A', 'B', [], 0),
    EmptyLikelihoodRatioTest('A', 'C', [], 0),
    EmptyLikelihoodRatioTest('B', 'C', [], 0),
    EmptyLikelihoodRatioTest('A', 'B', ['C'], 0),
    EmptyLikelihoodRatioTest('A', 'C', ['B'], 0),
    EmptyLikelihoodRatioTest('B', 'C', ['A'], 1),
]

real_independence_tests_diamond = [
    # cond set 0
    EmptyLikelihoodRatioTest('A', 'B', [], 0),
    EmptyLikelihoodRatioTest('A', 'C', [], 0),
    EmptyLikelihoodRatioTest('A', 'D', [], 0),
    EmptyLikelihoodRatioTest('B', 'C', [], 0),
    EmptyLikelihoodRatioTest('B', 'D', [], 0),
    EmptyLikelihoodRatioTest('C', 'D', [], 0),
    # cond set 1
    # start a
    EmptyLikelihoodRatioTest('A', 'B', ['C'], 0),
    EmptyLikelihoodRatioTest('A', 'C', ['B'], 0),
    EmptyLikelihoodRatioTest('A', 'D', ['B'], 0),
    EmptyLikelihoodRatioTest('A', 'B', ['D'], 0),
    EmptyLikelihoodRatioTest('A', 'C', ['D'], 0),
    EmptyLikelihoodRatioTest('A', 'D', ['C'], 0),
    # start b
    EmptyLikelihoodRatioTest('B', 'C', ['A'], 1),
    EmptyLikelihoodRatioTest('B', 'D', ['A'], 0),
    EmptyLikelihoodRatioTest('B', 'C', ['D'], 0),
    EmptyLikelihoodRatioTest('B', 'D', ['C'], 0),
    # start c
    EmptyLikelihoodRatioTest('C', 'D', ['A'], 0),
    EmptyLikelihoodRatioTest('C', 'D', ['B'], 0),
    # cond set 2
    EmptyLikelihoodRatioTest('A', 'B', ['C', 'D'], 0),
    EmptyLikelihoodRatioTest('A', 'C', ['B', 'D'], 0),
    EmptyLikelihoodRatioTest('A', 'D', ['B', 'C'], 1),
    EmptyLikelihoodRatioTest('B', 'C', ['A', 'D'], 0),
    EmptyLikelihoodRatioTest('B', 'D', ['A', 'C'], 0),
    EmptyLikelihoodRatioTest('C', 'D', ['A', 'B'], 0),
]

real_independence_tests_chain = [
    # cond set 0
    EmptyLikelihoodRatioTest('A', 'B', [], 0),
    EmptyLikelihoodRatioTest('A', 'C', [], 0),
    EmptyLikelihoodRatioTest('A', 'D', [], 0),
    EmptyLikelihoodRatioTest('B', 'C', [], 0),
    EmptyLikelihoodRatioTest('B', 'D', [], 0),
    EmptyLikelihoodRatioTest('C', 'D', [], 0),
    # cond set 1
    # start a
    EmptyLikelihoodRatioTest('A', 'B', ['C'], 0),
    EmptyLikelihoodRatioTest('A', 'C', ['B'], 1),
    EmptyLikelihoodRatioTest('A', 'D', ['B'], 1),
    EmptyLikelihoodRatioTest('A', 'B', ['D'], 0),
    EmptyLikelihoodRatioTest('A', 'C', ['D'], 0),
    EmptyLikelihoodRatioTest('A', 'D', ['C'], 1),
    # start b
    EmptyLikelihoodRatioTest('B', 'C', ['A'], 0),
    EmptyLikelihoodRatioTest('B', 'D', ['A'], 0),
    EmptyLikelihoodRatioTest('B', 'C', ['D'], 0),
    EmptyLikelihoodRatioTest('B', 'D', ['C'], 1),
    # start c
    EmptyLikelihoodRatioTest('C', 'D', ['A'], 0),
    EmptyLikelihoodRatioTest('C', 'D', ['B'], 0),
    # cond set 2
    EmptyLikelihoodRatioTest('A', 'B', ['C', 'D'], 0),
    EmptyLikelihoodRatioTest('A', 'C', ['B', 'D'], 0),
    EmptyLikelihoodRatioTest('A', 'D', ['B', 'C'], 1),
    EmptyLikelihoodRatioTest('B', 'C', ['A', 'D'], 0),
    EmptyLikelihoodRatioTest('B', 'D', ['A', 'C'], 0),
    EmptyLikelihoodRatioTest('C', 'D', ['A', 'B'], 0),
]

In [9]:
import dgp

# fork
node1 = dgp.GenericNode('A')
node2 = dgp.GenericNode('B', parents=[node1])
node3 = dgp.GenericNode('C', parents=[node1])
nc1 = dgp.NodeCollection('generic_fork', [node1, node2, node3])

# collider
node1 = dgp.GenericNode('A')
node2 = dgp.GenericNode('B')
node3 = dgp.GenericNode('C', parents=[node1, node2])
nc2 = dgp.NodeCollection('generic_collider', [node1, node2, node3])

# diamond
node1 = dgp.GenericNode('A')
node2 = dgp.GenericNode('B', parents=[node1])
node3 = dgp.GenericNode('C', parents=[node1])
node4 = dgp.GenericNode('D', parents=[node2, node3])
nc3 = dgp.NodeCollection('generic_diamond', [node1, node2, node3, node4])

# chain
node1 = dgp.GenericNode('A')
node2 = dgp.GenericNode('B', parents=[node1])
node3 = dgp.GenericNode('C', parents=[node2])
node4 = dgp.GenericNode('D', parents=[node3])
nc4 = dgp.NodeCollection('generic_chain', [node1, node2, node3, node4])


### Categorical data test
# fork
node_restr = [dgp.CategoricalNode]
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', parents=[node1], node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node1], node_restrictions=node_restr)
nc61 = dgp.NodeCollection('categorical_fork', [node1, node2, node3])

# collider
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node1, node2], node_restrictions=node_restr)
nc62 = dgp.NodeCollection('categorical_collider', [node1, node2, node3])

# diamond
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', parents=[node1], node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node1], node_restrictions=node_restr)
node4 = dgp.GenericNode('D', parents=[node2, node3], node_restrictions=node_restr)
nc63 = dgp.NodeCollection('categorical_diamond', [node1, node2, node3, node4])

### Ordinal data test
# fork
node_restr = [dgp.OrdinalNode]
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', parents=[node1], node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node1], node_restrictions=node_restr)
nc71 = dgp.NodeCollection('ordinal_fork', [node1, node2, node3])

# collider
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node1, node2], node_restrictions=node_restr)
nc72 = dgp.NodeCollection('ordinal_collider', [node1, node2, node3])

# diamond
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', parents=[node1], node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node1], node_restrictions=node_restr)
node4 = dgp.GenericNode('D', parents=[node2, node3], node_restrictions=node_restr)
nc73 = dgp.NodeCollection('ordinal_diamond', [node1, node2, node3, node4])


### ONLY CONTINUOS DATA FOR COMPARISON PURPOSES (TIKHONOV REG.)
# fork
node_restr = [dgp.Node]
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', parents=[node1], node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node1], node_restrictions=node_restr)
nc81 = dgp.NodeCollection('continuos_fork', [node1, node2, node3])

# collider
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node1, node2], node_restrictions=node_restr)
nc82 = dgp.NodeCollection('continuos_collider', [node1, node2, node3])

# diamond
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', parents=[node1], node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node1], node_restrictions=node_restr)
node4 = dgp.GenericNode('D', parents=[node2, node3], node_restrictions=node_restr)
nc83 = dgp.NodeCollection('continuos_diamond', [node1, node2, node3, node4])

# chain
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', parents=[node1], node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node2], node_restrictions=node_restr)
node4 = dgp.GenericNode('D', parents=[node3], node_restrictions=node_restr)
nc84 = dgp.NodeCollection('continuos_chain', [node1, node2, node3, node4])


# experiments
node_restr = [dgp.CategoricalNode]
node1 = dgp.GenericNode('A', node_restrictions=node_restr)
node2 = dgp.GenericNode('B', parents=[node1], node_restrictions=node_restr)
node3 = dgp.GenericNode('C', parents=[node2], node_restrictions=node_restr)
node4 = dgp.GenericNode('D', parents=[node3], node_restrictions=node_restr)
nc99 = dgp.NodeCollection('experimental', [node1, node2, node3, node4])

We considered five combinations of variable types and corresponding regression models:  
  * (a) linear-binary (L-B),  
  * (b) linear -multinomial (L-M),  
  * (c) linear-ordinal (L-O),  
  * (d) binary-ordinal (B-O), and  
  * (e) multinomial-ordinal (M-O).  
For each case, we considered the following simple BN mod- els:   
  * (a) X Y (unconditional independence),  
  * (b) X → Y and X ← Y (unconditional dependence),  
  * (c) X → Z ← Y (conditional dependence of X and Y given Z), also known as collider [37], and  
  * (d) X ← Z → Y (conditional indepen- dence of X and Y given Z).  
In all cases, Z is continuous.  

In [37]:
## L-B CASE
# Unc. Indep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.CategoricalNode], max_categories=2)
nc911 = dgp.NodeCollection('L-B Unc. Indep.', [node1, node2])
# Unc. Dep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.CategoricalNode], max_categories=2)
nc912 = dgp.NodeCollection('L-B Unc. Dep.', [node1, node2])
# Con. Dep. Case given Z
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.CategoricalNode], max_categories=2)
node3 = dgp.GenericNode('Z', parents=[node1, node2], node_restrictions=[dgp.Node])
nc913 = dgp.NodeCollection('L-B Con. Dep.', [node1, node2, node3])
# Con. Indep. Case given Z
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.Node])
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.CategoricalNode], max_categories=2)
nc914 = dgp.NodeCollection('L-B Con. Indep.', [node1, node2, node3])

## L-M CASE
# Unc. Indep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.CategoricalNode], min_categories=3)
nc921 = dgp.NodeCollection('L-M Unc. Indep.', [node1, node2])
# Unc. Dep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.CategoricalNode], min_categories=3)
nc922 = dgp.NodeCollection('L-M Unc. Dep.', [node1, node2])
# Con. Dep. Case given Z
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.CategoricalNode], min_categories=3)
node3 = dgp.GenericNode('Z', parents=[node1, node2], node_restrictions=[dgp.Node])
nc923 = dgp.NodeCollection('L-M Con. Dep.', [node1, node2, node3])
# Con. Indep. Case given Z
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.Node])
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.CategoricalNode], min_categories=3)
nc924 = dgp.NodeCollection('L-M Con. Indep.', [node1, node2, node3])

## L-O CASE
# Unc. Indep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc931 = dgp.NodeCollection('L-O Unc. Indep.', [node1, node2])
# Unc. Dep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc932 = dgp.NodeCollection('L-O Unc. Dep.', [node1, node2])
# Con. Dep. Case given Z
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
node3 = dgp.GenericNode('Z', parents=[node1, node2], node_restrictions=[dgp.Node])
nc933 = dgp.NodeCollection('L-O Con. Dep.', [node1, node2, node3])
# Con. Indep. Case given Z
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.Node])
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.OrdinalNode])
nc934 = dgp.NodeCollection('L-O Con. Indep.', [node1, node2, node3])

## B-O CASE
# Unc. Indep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.CategoricalNode], max_categories=2)
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc941 = dgp.NodeCollection('B-O Unc. Indep.', [node1, node2])
# Unc. Dep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.CategoricalNode], max_categories=2)
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc942 = dgp.NodeCollection('B-O Unc. Dep.', [node1, node2])
# Con. Dep. Case given Z
node1 = dgp.GenericNode('X', node_restrictions=[dgp.CategoricalNode], max_categories=2)
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
node3 = dgp.GenericNode('Z', parents=[node1, node2], node_restrictions=[dgp.Node])
nc943 = dgp.NodeCollection('B-O Con. Dep.', [node1, node2, node3])
# Con. Indep. Case given Z
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.CategoricalNode], max_categories=2)
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.OrdinalNode])
nc944 = dgp.NodeCollection('B-O Con. Indep.', [node1, node2, node3])

## M-O CASE
# Unc. Indep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.CategoricalNode])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc951 = dgp.NodeCollection('M-O Unc. Indep.', [node1, node2])
# Unc. Dep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.CategoricalNode])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc952 = dgp.NodeCollection('M-O Unc. Dep.', [node1, node2])
# Con. Dep. Case given Z
node1 = dgp.GenericNode('X', node_restrictions=[dgp.CategoricalNode])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
node3 = dgp.GenericNode('Z', parents=[node1, node2], node_restrictions=[dgp.Node])
nc953 = dgp.NodeCollection('M-O Con. Dep.', [node1, node2, node3])
# Con. Indep. Case given Z
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.CategoricalNode])
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.OrdinalNode])
nc954 = dgp.NodeCollection('M-O Con. Indep.', [node1, node2, node3])


## L-L CASE
# Unc. Indep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.Node])
nc961 = dgp.NodeCollection('L-L Unc. Indep.', [node1, node2])
# Unc. Dep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.Node])
nc962 = dgp.NodeCollection('L-L Unc. Dep.', [node1, node2])
# Con. Dep. Case given Z
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.Node])
node3 = dgp.GenericNode('Z', parents=[node1, node2], node_restrictions=[dgp.Node])
nc963 = dgp.NodeCollection('L-L Con. Dep.', [node1, node2, node3])
# Con. Indep. Case given Z
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.Node])
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.Node])
nc964 = dgp.NodeCollection('L-L Con. Indep.', [node1, node2, node3])


real_independence_tests_unc_ind = [
    EmptyLikelihoodRatioTest('X', 'Y', [], 1)
]

real_independence_tests_unc_dep = [
    EmptyLikelihoodRatioTest('X', 'Y', [], 0)
]

real_independence_tests_con_dep = [
    EmptyLikelihoodRatioTest('X', 'Y', [], 0),
    EmptyLikelihoodRatioTest('X', 'Z', [], 0),
    EmptyLikelihoodRatioTest('Y', 'Z', [], 0),
    EmptyLikelihoodRatioTest('X', 'Y', ['Z'], 1),
    EmptyLikelihoodRatioTest('X', 'Z', ['Y'], 0),
    EmptyLikelihoodRatioTest('Y', 'Z', ['X'], 0),
]

real_independence_tests_con_ind = [
    EmptyLikelihoodRatioTest('X', 'Y', [], 0),
    EmptyLikelihoodRatioTest('X', 'Z', [], 0),
    EmptyLikelihoodRatioTest('Y', 'Z', [], 0),
    EmptyLikelihoodRatioTest('X', 'Y', ['Z'], 1),
    EmptyLikelihoodRatioTest('X', 'Z', ['Y'], 0),
    EmptyLikelihoodRatioTest('Y', 'Z', ['X'], 0),
]

In [38]:

ncs = {
    1: nc1,
    2: nc2,
    3: nc3,
    4: nc4,
    61: nc61,
    62: nc62,
    63: nc63,
    71: nc71,
    72: nc72,
    73: nc73,
    81: nc81,
    82: nc82,
    83: nc83,
    84: nc84,
    99: nc99,
    # paper tests
    911: nc911,
    912: nc912,
    913: nc913,
    914: nc914,
    
    921: nc921,
    922: nc922,
    923: nc923,
    924: nc924,
    
    931: nc931,
    932: nc932,
    933: nc933,
    934: nc934,
    
    941: nc941,
    942: nc942,
    943: nc943,
    944: nc944,
    
    951: nc951,
    952: nc952,
    953: nc953,
    954: nc954,
    
    961: nc961,
    962: nc962,
    963: nc963,
    964: nc964,
    }

ncs_independences = {
    1: real_independence_tests_fork,
    2: real_independence_tests_collider,
    3: real_independence_tests_diamond,
    4: real_independence_tests_chain,
    61: real_independence_tests_fork,
    62: real_independence_tests_collider,
    63: real_independence_tests_diamond,
    71: real_independence_tests_fork,
    72: real_independence_tests_collider,
    73: real_independence_tests_diamond,
    81: real_independence_tests_fork,
    82: real_independence_tests_collider,
    83: real_independence_tests_diamond,
    84: real_independence_tests_chain,
    99: real_independence_tests_chain,
    # paper data
    911: real_independence_tests_unc_ind,
    912: real_independence_tests_unc_dep,
    913: real_independence_tests_con_dep,
    914: real_independence_tests_con_ind,
    
    921: real_independence_tests_unc_ind,
    922: real_independence_tests_unc_dep,
    923: real_independence_tests_con_dep,
    924: real_independence_tests_con_ind,
    
    931: real_independence_tests_unc_ind,
    932: real_independence_tests_unc_dep,
    933: real_independence_tests_con_dep,
    934: real_independence_tests_con_ind,
    
    941: real_independence_tests_unc_ind,
    942: real_independence_tests_unc_dep,
    943: real_independence_tests_con_dep,
    944: real_independence_tests_con_ind,
    
    951: real_independence_tests_unc_ind,
    952: real_independence_tests_unc_dep,
    953: real_independence_tests_con_dep,
    954: real_independence_tests_con_ind,
    
    961: real_independence_tests_unc_ind,
    962: real_independence_tests_unc_dep,
    963: real_independence_tests_con_dep,
    964: real_independence_tests_con_ind,
}

def get_sample_data(node_collection, num_samples):
    node_collection.reset()
    return node_collection.get(num_samples)

In [39]:
import math

def partition_dataframe(df, n):
    total_rows = len(df)
    partition_size = math.ceil(total_rows / n)
    
    partitions = []
    for i in range(n):
        start_idx = i * partition_size
        end_idx = min((i + 1) * partition_size, total_rows)
        partition = df[start_idx:end_idx]
        partitions.append(partition)
    
    return partitions

In [40]:
def get_servers(client_configurations, experiment_name, data, tikhonov_lambda=0, features_per_client=None, max_regressors=None):
    servers = {}    

    for splits in client_configurations:
        if features_per_client is None:
            clients = {i:fedci.Client(chunk) for i,chunk in enumerate(partition_dataframe(data, splits))}
            #clients = {i:fedci.Client(pl.from_pandas(chunk)) for i,chunk in enumerate(np.array_split(data.to_pandas(), splits))}
        else:
            clients = {i:fedci.Client(chunk[random.sample(list(chunk.columns),features_per_client)])
                       for i,chunk in enumerate(partition_dataframe(data, splits))}
            #clients = {i:fedci.Client(pl.from_pandas(chunk[random.sample(list(chunk.columns),features_per_client)]))
            #           for i,chunk in enumerate(np.array_split(data.to_pandas(), splits))}

        servers[server_id_pattern.format(experiment_name, splits)] = fedci.Server(clients, tikhonov_lambda=tikhonov_lambda, max_regressors=max_regressors+1 if max_regressors else None)
    return servers

In [41]:
def get_possible_tests(available_data):

    possible_tests = []
    max_conditioning_set_size = min(len(available_data), max_regressors) if max_regressors is not None else len(available_data)

    for y_var in available_data:
        set_of_regressors = available_data - {y_var}
        for x_var in set_of_regressors:
            set_of_conditioning_variables = set_of_regressors - {x_var}
            conditioning_sets = chain.from_iterable(combinations(set_of_conditioning_variables, r) for r in range(0,max_conditioning_set_size))
            possible_tests.extend([(y_var, x_var, sorted(list(s_labels))) for s_labels in conditioning_sets])
            
    return possible_tests


In [42]:
import polars.selectors as cs
import pandas as pd

In [43]:
from pycit import citest, itest

In [44]:
def test_mixed_independence(continuous, categorical):
    # ANOVA
    categories = np.unique(categorical)
    groups = [continuous[categorical == category] for category in categories]
    _, p_value = stats.f_oneway(*groups)
    #print(f"ANOVA F-statistic: {f_statistic}, p-value: {p_value}")

    # If categorical is binary, you can also use point-biserial correlation
    #if len(categories) == 2:
    #    point_biserial_corr, p_value = stats.pointbiserialr(categorical, continuous)
    #    print(f"Point-biserial correlation: {point_biserial_corr}, p-value: {p_value}")
    return p_value

In [45]:
def run_local_fci(df, labels, alpha=0.05):
    with (ro.default_converter + pandas2ri.converter).context():
        ro.r['source']('../scripts/aggregation.r')
        aggregate_ci_results_f = ro.globalenv['aggregate_ci_results']
        
        d = [('citestResults', ro.conversion.get_conversion().py2rpy(df)), ('labels', ro.StrVector(labels))]
        od = OrderedDict(d)
        lv = ro.ListVector(od)

        result = aggregate_ci_results_f([lv], alpha)

        pag = [x[1].tolist() for x in result['G_PAG_List'].items()][0]
        pag_labels = [list(x[1]) for x in result['G_PAG_Label_List'].items()][0]
        
    return pag,pag_labels

In [46]:
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from collections import OrderedDict
import rpy2.robjects as robjs

def get_ground_truth_tests_mxm(data, possible_tests, do_symmetric_tests=True):
    ground_truth_tests = []

    with (ro.default_converter + pandas2ri.converter).context():
        ro.r['source']('./ci.r')
        calculate_independence_f = ro.globalenv['independence_test']
        
        # iterate tests
        for test in possible_tests:
            if do_symmetric_tests and test[0] > test[1]:
                continue
            x,y,z = test
            z = robjs.r("NULL") if len(z) == 0 else z
            
            pvalue = calculate_independence_f(data.to_pandas(), x, y, ro.StrVector(z))[0]
            pvalue = round(pvalue,4)
            ground_truth_tests.append(EmptyLikelihoodRatioTest(test[0], test[1], list(test[2]), pvalue))
    return ground_truth_tests

In [47]:
import pandas as pd
import numpy as np
from rpy2.robjects import pandas2ri, Formula
from rpy2.robjects.packages import importr
import rpy2.robjects as ro

# Enable automatic conversion between pandas and R data frames
pandas2ri.activate()

# Import necessary R packages
base = importr('base')
#r_stats = importr('stats')

def transform_dataframe(df):
    # Create a copy of the DataFrame to avoid modifying the original
    df_copy = df.copy()
    
    # Initialize an empty R list to store our columns
    r_list = {}#ro.ListVector({})
    
    # Iterate through columns and convert based on data type
    for col in df_copy.columns:
        if df_copy[col].dtype == 'float64':
            # Float columns become numeric (already handled by pandas2ri)
            r_list[col] = pandas2ri.py2rpy(df_copy[col])
        elif df_copy[col].dtype == 'object':
            # String columns become factors
            r_list[col] = base.as_factor(pandas2ri.py2rpy(df_copy[col]))
        elif df_copy[col].dtype == 'int64':
            # Integer columns become ordered factors
            unique_values = sorted(df_copy[col].unique())
            r_list[col] = base.factor(pandas2ri.py2rpy(df_copy[col]), 
                                      levels=ro.IntVector(unique_values), 
                                      ordered=True)
        else:
            print(col)
            print(df_copy[col].dtype)
            assert False
            
    r_list = ro.ListVector(r_list)
    
    # Convert the R list to an R data frame
    r_dataframe = base.as_data_frame(r_list)
    
    return r_dataframe

In [390]:
def get_riod_tests(data, possible_tests, do_symmetric_tests=True):
    
    data = data.with_columns(cs.integer().cast(pl.Int64))
    
    ground_truth_tests = []
    
    if do_symmetric_tests is False:
        return ground_truth_tests
        
    # Call R function
    with (ro.default_converter + pandas2ri.converter).context():
        # load local-ci script
        ro.r['source']('./local-ci.r')
        # load function from R script
        run_ci_test_f = ro.globalenv['run_ci_test']
        
        #print(data.to_pandas())

        df_r = transform_dataframe(data.to_pandas())
        
        #converting it into r object for passing into r function
        #df_r = ro.conversion.get_conversion().py2rpy(data.to_pandas())
        
        #Invoking the R function and getting the result
        if os.path.exists('./tmp/citestResults_dummy.csv'):
            os.remove('./tmp/citestResults_dummy.csv')
        result = run_ci_test_f(df_r, 999, "./tmp/", 'dummy')
        #Converting it back to a pandas dataframe.
        df_pvals = ro.conversion.get_conversion().rpy2py(result['citestResults'])
        labels = list(result['labels'])
        
    df = pl.from_pandas(df_pvals)
    df = df.drop('ord')
    df = df.with_columns(pl.col('S').str.split(',').cast(pl.List(pl.Int64)))
    df = df.with_columns(pl.col('X', 'Y').cast(pl.Int64))
    
    for row in df.rows():
        x = labels[row[0]-1]
        y = labels[row[1]-1]
        if x > y:
            x,y = y,x
        s = [labels[r-1] for r in row[2] if r is not None]
        pval = round(row[3],4)
        
        ground_truth_tests.append(EmptyLikelihoodRatioTest(x, y, s, pval))
        
    return ground_truth_tests
    

In [49]:
def get_ground_truth_tests(data, possible_tests, do_symmetric_tests=True):  
    ground_truth_tests = []

    for test in possible_tests:
        if do_symmetric_tests and test[0] > test[1]:
            continue
        if len(test[2]) > 0:
            X = data[test[0]].to_numpy()
            Y = data[test[1]].to_numpy()
            Z = data[test[2]].to_numpy()
            pvalue = citest(X, Y, Z, test_args={'statistic': 'mixed_cmi', 'n_jobs': 8})
            # if data.schema[test[0]] == pl.String and data.schema[test[1]] == pl.String:
            #     #print('A')
            #     X = data[test[0]].to_numpy()
            #     Y = data[test[1]].to_numpy()
            #     Z = data[test[2]].to_numpy()
            #     pvalue = citest(X, Y, Z, test_args={'statistic': 'mixed_cmi', 'n_jobs': 8})
            # elif data.schema[test[0]] == pl.String and data.schema[test[1]] == pl.Float64:
            #     #print('B')
            #     X = data[test[0]].to_numpy()
            #     Y = data[test[1]].to_numpy()
            #     Z = data[test[2]].to_numpy()
            #     pvalue = citest(X, Y, Z, test_args={'statistic': 'mixed_cmi', 'n_jobs': 8})
            # elif data.schema[test[0]] == pl.Float64 and data.schema[test[1]] == pl.String:
            #     #print('C')
            #     X = data[test[0]].to_numpy()
            #     Y = data[test[1]].to_numpy()
            #     Z = data[test[2]].to_numpy()
            #     pvalue = citest(X, Y, Z, test_args={'statistic': 'mixed_cmi', 'n_jobs': 8})
            # elif data.schema[test[0]] == pl.Float64 and data.schema[test[1]] == pl.Float64:
            #     #print('D')
            #     _, pvalue = CITests.pearsonr(test[1], test[0], list(test[2]), data.cast(pl.Float64).to_pandas(), boolean=False)
            # else:
            #     X = data[test[0]].to_numpy()
            #     Y = data[test[1]].to_numpy()
            #     Z = data[test[2]].to_numpy()
            #     pvalue = citest(X, Y, Z, test_args={'statistic': 'mixed_cmi', 'n_jobs': 8})
            #     #assert False, 'no fitting test'
        else:
            #print(test[0], test[1])
            X = data[test[0]].to_numpy().astype(float)
            Y = data[test[1]].to_numpy().astype(float)
            #print(test[0], test[1])
            #print(X, Y)
            pvalue = itest(X, Y, test_args={'statistic': 'mixed_mi', 'n_jobs': 8})
            
            # if data.schema[test[0]] == pl.String and data.schema[test[1]] == pl.String:
            #     crosstab = pd.crosstab(data.to_pandas()[test[0]], data.to_pandas()[test[1]])
            #     _, pvalue, _, _ = stats.chi2_contingency(crosstab)
            # elif data.schema[test[0]] == pl.String and data.schema[test[1]] == pl.Float64:
            #     #print('E')
            #     X = data[test[0]].to_numpy()
            #     Y = data[test[1]].to_numpy().astype(float)
            #     pvalue = test_mixed_independence(Y, X)
            # elif data.schema[test[0]] == pl.Float64 and data.schema[test[1]] == pl.String:
            #     #print('F')
            #     X = data[test[0]].to_numpy().astype(float)
            #     Y = data[test[1]].to_numpy()
            #     pvalue = test_mixed_independence(X, Y)
            # elif data.schema[test[0]] == pl.Float64 and data.schema[test[1]] == pl.Float64:
            #     #print('G')
            #     v0 = data[test[0]]
            #     v1 = data[test[1]]
            #     _, pvalue = stats.pearsonr(v0, v1)
            # #elif data.schema[test[0]] == pl.Int32 and data.schema[test[1]] == pl.Float64:
            # else:
            #     X = data[test[0]].to_numpy().astype(float)
            #     Y = data[test[1]].to_numpy().astype(float)
            #     pvalue = itest(X, Y, test_args={'statistic': 'mixed_mi', 'n_jobs': 8})
            #     #assert False, 'no fitting test w/o conditiong set'
        pvalue = round(pvalue,4)

        #print(test, pvalue)
                
        ground_truth_tests.append(EmptyLikelihoodRatioTest(test[0], test[1], list(test[2]), pvalue))
    return ground_truth_tests
# TODO: with and without conditioning set

In [535]:
def join_categories_in_regression_sets(tests, reversed_category_expressions):
    #updated_tests = []
    for test in tests:
        test.X_labels = sorted(list(set([reversed_category_expressions[l] if l in reversed_category_expressions else l for l in test.X_labels])))
    return tests

def group_categorical_likelihood_tests(tests, category_expressions, reversed_category_expressions):
    #category_expressions = servers['dag_chain4_1c'].category_expressions
    #reversed_category_expressions = servers['dag_chain4_1c'].reversed_category_expressions
    #tests = server_ci_tests['dag_chain4_1c']

    updated_tests = []
    for test in tests:
        if test.y_label not in reversed_category_expressions:
            updated_tests.append(test)
            continue
        
        category_label = reversed_category_expressions[test.y_label]
        
        # Only run if the current test is the first category. This avoids duplicate tests
        if category_expressions[category_label][0] != test.y_label:
            continue
        
        categorical_test_group = []
        for test_lookup in tests:
            if test_lookup.y_label in category_expressions[category_label] and test_lookup.x_label == test.x_label and sorted(test_lookup.s_labels) == sorted(test.s_labels):
                categorical_test_group.append(test_lookup)
                
        lrt = CategoricalLikelihoodRatioTest(category_label, [t.t0 for t in categorical_test_group], [t.t1 for t in categorical_test_group], len(category_expressions[category_label]))
        updated_tests.append(lrt)
        
    return updated_tests


def group_ordinal_likelihood_tests(tests, ordinal_expressions, reversed_ordinal_expressions):
    #category_expressions = servers['dag_chain4_1c'].category_expressions
    #reversed_category_expressions = servers['dag_chain4_1c'].reversed_category_expressions
    #tests = server_ci_tests['dag_chain4_1c']

    updated_tests = []
    for test in tests:
        if test.y_label not in reversed_ordinal_expressions:
            updated_tests.append(test)
            continue
        
        category_label = reversed_ordinal_expressions[test.y_label]
        #print(category_label)
        
        # Only run if the current test is the first category. This avoids duplicate tests
        if ordinal_expressions[category_label][0] != test.y_label:
            continue
        
        categorical_test_group = []
        for test_lookup in tests:
            if test_lookup.y_label in ordinal_expressions[category_label] and test_lookup.x_label == test.x_label and sorted(test_lookup.s_labels) == sorted(test.s_labels):
                categorical_test_group.append(test_lookup)
                
        lrt = OrdinalLikelihoodRatioTest(category_label, [t.t0 for t in categorical_test_group], [t.t1 for t in categorical_test_group], len(ordinal_expressions[category_label]))
        updated_tests.append(lrt)
        
    return updated_tests


def get_server_test_results(servers, do_symmetric_tests=True):
    testing_rounds = {k:v.testing_engine.finished_rounds for k,v in servers.items()}
    testing_rounds = {k:join_categories_in_regression_sets(v, servers[k].reversed_category_expressions) for k,v in testing_rounds.items()}
    likelihood_tests = {k:fedci.get_likelihood_tests(v) for k,v in testing_rounds.items()}
    #print(likelihood_tests)
    # fix up categorical tests
    #likelihood_tests = {k:group_categorical_likelihood_tests(v, servers[k].category_expressions, servers[k].reversed_category_expressions) for k,v in likelihood_tests.items()}
    
    likelihood_tests = {k:group_ordinal_likelihood_tests(v, servers[k].ordinal_expressions, servers[k].reversed_ordinal_expressions) for k,v in likelihood_tests.items()}
    
    if do_symmetric_tests:
        likelihood_tests = {k:fedci.get_symmetric_likelihood_tests(v) for k,v in likelihood_tests.items()}
    
    return likelihood_tests

In [51]:
def prepare_server_evaluation(ground_truth_tests, server_ci_tests, max_regressors):
    p_value_comparison = {k:[] for k in server_ci_tests.keys()}
    missing_test = {k:0 for k in server_ci_tests.keys()}
    
    for test in ground_truth_tests:
        for k in server_ci_tests.keys():
            if max_regressors is not None and max_regressors < len(test.s_labels):
                continue
            matching_test = [t for t in server_ci_tests[k] if t.y_label == test.y_label and t.x_label == test.x_label and sorted(t.s_labels) == sorted(test.s_labels)]
            if len(matching_test) == 0:
                print(f'No matching test in {k} for {test}')
                #print(test)
                #print(server_ci_tests[k])
                #raise Exception('lol')
                missing_test[k] += 1
                continue
            assert len(matching_test) == 1
            matching_test = matching_test[0]          
            p_value_comparison[k].append((matching_test.p_val, test.p_val))
        
    missing_test = {k:v/len(ground_truth_tests) if len(ground_truth_tests) > 0 else 0 for k,v in missing_test.items()}
    return p_value_comparison, missing_test

In [89]:
def count_correct_alpha_thresholdings(data, alpha):
    # first element in data is from fedci
    tp = sum([1 for a,b in data if (a > alpha and b > alpha)]) / len(data)
    tn = sum([1 for a,b in data if (a < alpha and b < alpha)]) / len(data)
    # t1: false positives
    # positive: calling independence
    fp = sum([1 for a,b in data if (a > alpha and b < alpha)]) / len(data)
    # t2: false negatives
    fn = sum([1 for a,b in data if a < alpha and b > alpha]) / len(data)
    return tp, tn, fp, fn

def count_correct_pval(data, tolerance=1e-4):
    c = sum([1 for a,b in data if abs(a-b)<tolerance]) / len(data) #if len(data) > 0 else 1
    return c

def evaluate_results(p_value_comparison, alphas, tolerance):
    result_alpha = {}
    result_equality = {}
    for k,v in p_value_comparison.items():
        result_alpha[k] = {}
        result_equality[k] = count_correct_pval(v, tolerance)
        for alpha in alphas:
            result_alpha[k][alpha] = count_correct_alpha_thresholdings(v,alpha)
            
    return result_alpha, result_equality

In [53]:
def get_records(servers, name, total_samples, alpha_tests, equality_tests, p_val_comparisons, missed_tests, total_features, features_per_client, comparison_category, tikhonov_lambda, do_symmetric_tests, max_regressors, llf_neg_prob_fix):
    results = []
    for server_id in servers.keys():
        server = servers[server_id]
        alpha_test = alpha_tests[server_id]
        
        r = {
            'name': name,
            'num_clients': len(server.clients),
            'num_samples': total_samples,
            'comparison_category': comparison_category,
            'same_p_val': equality_tests[server_id],
            'missed_tests': missed_tests[server_id],
            'max_regressors': max_regressors,
            'total_features': total_features,
            'features_per_client': features_per_client,
            'tikhonov_lambda': tikhonov_lambda,
            'llf_neg_prob_fix': llf_neg_prob_fix,
            'symmetric': do_symmetric_tests,
            'predicted_p_vals': [p[0] for p in p_val_comparisons[server_id]],
            'true_p_vals': [p[1] for p in p_val_comparisons[server_id]],
        }

        for alpha, alpha_result in alpha_test.items():
            rc = r.copy()
            rc['alpha'] = alpha
            tp, tn, fp, fn = alpha_result
            rc['tp'] = tp
            rc['tn'] = tn
            rc['fp'] = fp
            rc['fn'] = fn
            results.append(rc)

    return results

In [54]:
import json
def write_records(i, file, data, path):
    if len(data) == 0:
        return
    with open(path + file.format(i), 'a') as f:
        for d in data:
            ds = json.dumps(d) + '\n'
            f.write(ds)

In [55]:
import polars.selectors as cs

In [335]:
import statsmodels.api as sm
def multinomial_logistic_regression(df, dependent_var, independent_vars):
    # Add a constant (intercept) to the independent variables
    X = sm.add_constant(df[independent_vars])
    
    # The dependent variable (must be categorical)
    y = df[dependent_var]
    
    # Fit the multinomial logistic regression model
    model = sm.MNLogit(y, X)
    result = model.fit()
    
    # Get the log-likelihood function (LLF) and coefficients
    llf = result.llf
    coefficients = result.params
    
    return llf, coefficients

In [540]:
def process(i, client_configurations, llf_neg_prob_fix=None, total_samples=1000, max_regressors=None, features_to_reduce=None, tikhonov_lambda=0, do_symmetric_tests=True, do_write_records=True):
    #print('Step 1/6 --> Setup')
    #data = pl.read_parquet(f'./fedci/testdata-{i}.parquet')
    #TOTAL_SAMPLES = len(data)
    data = get_sample_data(ncs[i], total_samples)
    
    total_features = len(ncs[i].nodes)
    features_per_client = total_features if features_to_reduce is None else total_features - features_to_reduce
    
    experiment_name = ncs[i].name
    
    servers = get_servers(client_configurations, experiment_name, data, tikhonov_lambda, features_per_client, max_regressors)
    #servers = get_servers([3], experiment_name, data, tikhonov_lambda, features_per_client)

    #print([len(c.data) for c in servers[f'dag_{experiment_name}_{3}c'].clients.values()])

    #print('Step 2/6 --> Run Tests')
    for server in servers.values(): server.run_tests()

    #print('Step 3/6 --> Collect Results')
    possible_tests = get_possible_tests(set(data.columns))
    server_ci_tests = get_server_test_results(servers, do_symmetric_tests=do_symmetric_tests) 
    
    comparison_tests_collection = []
    #ground_truth_tests = get_ground_truth_tests_mxm(data, possible_tests, do_symmetric_tests=do_symmetric_tests)
    #comparison_tests_collection.append(('pooled_citest', ground_truth_tests))
    
    # get real p values from riod code
    riod_tests = get_riod_tests(data, possible_tests, do_symmetric_tests=do_symmetric_tests)
    comparison_tests_collection.append(('riod_pooled_citest', riod_tests))
    
    # import matplotlib.pyplot as plt
    # plt.scatter(data['X'], data['Y'], alpha=0.3)
    # plt.show()
    
    llf0, coeff0 = multinomial_logistic_regression(data.to_pandas(), 'Y', ['Z'])
    llf1, coeff1 = multinomial_logistic_regression(data.to_pandas(), 'Y', ['X', 'Z'])

    print('MN Regression on Intercept')
    print(llf0)
    print(coeff0)
    print('MN Regression on X')
    print(llf1)
    print(coeff1)
    print(f'MN Tested pval: {stats.chi2.sf(-2*(llf0-llf1), 2)}, Test Statistic {-2*(llf0 - llf1)}')
    
    print('Misc...')
    print(data.schema)
    print('---')
    for v in list(server_ci_tests.values())[0]:
        if v.y_label == 'X' and v.x_label == 'Y' and len(v.s_labels) == 1 and v.s_labels[0] == 'Z':
            print(v)
    print('---')
    for v in riod_tests:
        if v.y_label == 'X' and v.x_label == 'Y' and len(v.s_labels) == 1 and v.s_labels[0] == 'Z':
            print(v)
    #print(riod_tests)
    
    # if i in ncs_independences:
    #     real_independences = ncs_independences[i].copy()
    #     if not do_symmetric_tests:
    #         real_independences += [EmptyLikelihoodRatioTest(t.x_label, t.y_label, t.s_labels, t.p_val) for t in real_independences]
    #     comparison_tests_collection.append(('ground_truth', real_independences))
    #ground_truth_tests = real_indep3 # todo: maybe add addition call of prepare_server_evaluation with prefix for different types of ground truth tests
    
    for comparison_name, comparison_tests in comparison_tests_collection:

        #print('Step 4/6 --> Prepare Evaluation')
        p_val_comparisons, missed_tests = prepare_server_evaluation(comparison_tests, server_ci_tests, max_regressors)

        #print('Step 5/6 --> Run Evaluation')
        alpha_tests, equality_tests = evaluate_results(p_val_comparisons, alpha_comparisons, equality_tolerance)

        #print('Step 6/6 --> Log Results')
        records = get_records(servers,
                              experiment_name,
                              total_samples,
                              alpha_tests,
                              equality_tests,
                              p_val_comparisons,
                              missed_tests,
                              total_features,
                              features_per_client,
                              comparison_name,
                              tikhonov_lambda,
                              do_symmetric_tests,
                              max_regressors,
                              llf_neg_prob_fix)
        
        if do_write_records:
            write_records(i, log_filepattern, records, './experiments/base/')

In [256]:
# TODO: remove non-zero correctness.

In [58]:
#TIKHONOV_LAMBDA = 1e-7
TIKHONOV_LAMBDA = 0

In [61]:
paper_tests = [
    911,912,913,914,
    921,922,923,924,
    #931,932,933,934,
    #941,942,943,944,
    #951,952,953,954,
    961,962,963,964
]

In [33]:
max_regressors = None

In [34]:
llf_neg_prob_fix = 'clipping'

In [None]:
[100, 200, 400, 750, 1500, 2000, 2500,]

In [541]:
for tkl in [1e-10]:#, 1e-7]:
    for i in tqdm(range(100), leave=False):
        for sym in [True]:
            for samples in [300,500,1000,3000]:#[100, 300, 600, 1000, 3000, 6000, 10000]:#, 500, 600, 700, 800, 900, 1000]:
                for id_ in [924]:#paper_tests:#[1,2,3,4,61,62,63,64,71,72,73,74,81,82,83,84]:#paper_tests: #1,2,3,4,81,82,83,84  # 72 # ,81,82,83,84
                    #for reductions in [0,1]:
                    #print(f'Running {id_}')
                    process(id_,
                            [1],
                            total_samples=samples,
                            do_symmetric_tests=sym,
                            tikhonov_lambda=tkl,
                            features_to_reduce=None,
                            max_regressors=max_regressors,
                            llf_neg_prob_fix=llf_neg_prob_fix,
                            do_write_records=False#TODO set to true
                            )
                    asdasd

  0%|          | 0/100 [00:00<?, ?it/s]

Regressing Y ~ X + ['Z']
T0 CategoricalTestingRound - y: Y, X: ['Z'], total samples: None, beta: {'Y__cat__2': array([1.22492679, 0.28735301]), 'Y__cat__3': array([ 2.54546297, -0.2859271 ])}, current iteration: 47, current deviance: 507.89620682112854, relative deviance change: 5.839462727581616e-09, llf: {0: -253.94810341056427}, rss: None
T1 CategoricalTestingRound - y: Y, X: ['X', 'Z'], total samples: None, beta: {'Y__cat__2': array([-0.07848562,  1.16501528,  0.29997291]), 'Y__cat__3': array([-0.1878416 ,  2.38873746, -0.24105419])}, current iteration: 40, current deviance: 505.66844455121975, relative deviance change: 9.010519144607549e-09, llf: {0: -252.83422227560987}, rss: None
Num Categories: 3
T0 llf: -253.94810341056427, T1 llf: -252.83422227560987
DOF M0: 4, DOF M1: 6 -> Test DOF = 2
Test Statistic: 2.2277622699087942, p val: 0.3282823771714845
Testing if X= 1 is indep of Y= 2 given S={  }
Running Gaussian linear regression for  2 
Running multinomial regression for  1 
p1

NameError: name 'asdasd' is not defined

In [33]:
total_samples = 1000
features_to_reduce = None
client_configurations = [3]
tikhonov_lambda = 0
do_symmetric_tests = True
i = 63



data = get_sample_data(ncs[i], total_samples)
    
total_features = len(ncs[i].nodes)
features_per_client = total_features if features_to_reduce is None else total_features - features_to_reduce

experiment_name = ncs[i].name

servers = get_servers(client_configurations, experiment_name, data, tikhonov_lambda, features_per_client)
#servers = get_servers([3], experiment_name, data, tikhonov_lambda, features_per_client)

#print([len(c.data) for c in servers[f'dag_{experiment_name}_{3}c'].clients.values()])

#print('Step 2/6 --> Run Tests')
for server in servers.values(): server.run_tests()

#print('Step 3/6 --> Collect Results')
possible_tests = get_possible_tests(set(data.columns))
server_ci_tests = get_server_test_results(servers, do_symmetric_tests=do_symmetric_tests) 

comparison_tests_collection = []
ground_truth_tests = get_ground_truth_tests_mxm(data, possible_tests, do_symmetric_tests=do_symmetric_tests)

In [35]:
nc3.reset()

In [36]:
nc3.get(3000)

D,B,C,A
str,f64,i32,f64
"""1""",-0.116479,1,-0.816289
"""1""",-0.609824,1,1.344061
"""2""",1.99322,2,-0.160638
"""1""",0.11312,1,-0.291801
"""1""",-1.337795,2,0.215637
…,…,…,…
"""1""",-0.959623,2,1.372909
"""1""",-0.937223,2,0.875208
"""1""",-0.535653,1,0.036071
"""1""",-0.186797,1,0.326439


In [42]:
nc3.get(3000)[:1000].write_parquet('./wicked-data-01.parquet')

In [65]:
# import glob

# for f in glob.glob('./log-9*.ndjson'):
#     df = pl.read_ndjson(f)
#     if 'max_regressors' in df.schema:
#         df = df.filter(pl.col('max_regressors') != 1)
#     df.write_ndjson(f)

In [311]:
The following code is used to aggregates binary logistic regression models, each predicting a category of a categorical variable, in order to get the overall multinomial llf. Please explain to me if there is anything wrong with this code.

def compute_categorical_llf2(self, y_label, X_labels, betas):
    def _get_prob(X_labels: List[str], beta):
        _data = self.data            
        _data = _data.with_columns(__dummy_data=pl.lit(0.0))

        X = _data.to_pandas()[X_labels]
        X = X.to_numpy().astype(float)
        X = sm.tools.add_constant(X) 
        
        y = _data.to_pandas()['__dummy_data']
        y = y.to_numpy().astype(float)
        
        glm_model = sm.GLM(y, X, family=family.Binomial())
        glm_results = GLMResults(glm_model, beta, normalized_cov_params=None, scale=None)
        prob = glm_results.predict()
        
        return prob
    
    probs = {cat:np.clip(_get_prob(X_labels, beta),1e-15,1-1e-15) for cat, beta in betas.items()}
    denominator = sum(list(probs.values()))
    probs = {cat:probs[cat] / denominator for cat in probs.keys()}
    
    def get_cat_index(data, y_label, cat):
        cat_val = cat.split('__cat__')[-1]
        return data.with_row_index().filter(pl.col(y_label) == cat_val)['index'].to_list()
    
    cat_indexes = {cat: get_cat_index(self.data, y_label, cat) for cat in probs.keys()}
    
    llf = 0 
    for cat in cat_indexes.keys():

        llf += np.sum(np.log(np.take(probs[cat], cat_indexes[cat])))
    
    return llf

SyntaxError: invalid syntax (342447083.py, line 1)