In [8]:
import dgp
import polars as pl
import polars.selectors as cs
import numpy as np
import os
import fedci

In [52]:
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.Node])
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.CategoricalNode], min_categories=3)
nc924 = dgp.NodeCollection('L-M Con. Indep.', [node1, node2, node3])

# Unc. Dep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc932 = dgp.NodeCollection('L-O Unc. Dep.', [node1, node2])

# Con. Indep. Case given Z
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.Node])
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.OrdinalNode])
nc934 = dgp.NodeCollection('L-O Con. Indep.', [node1, node2, node3])

node1 = dgp.GenericNode('X', node_restrictions=[dgp.BinaryNode])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc941 = dgp.NodeCollection('B-O Unc. Indep.', [node1, node2])

In [47]:
import pandas as pd
import numpy as np
from rpy2.robjects import pandas2ri, Formula
from rpy2.robjects.packages import importr
import rpy2.robjects as ro

# Enable automatic conversion between pandas and R data frames
#pandas2ri.activate()

# Import necessary R packages
base = importr('base')
#r_stats = importr('stats')

def transform_dataframe(df):
    # Create a copy of the DataFrame to avoid modifying the original
    df_copy = df.copy()
    
    with (ro.default_converter + pandas2ri.converter).context():
        # Initialize an empty R list to store our columns
        r_list = {}#ro.ListVector({})
        
        # Iterate through columns and convert based on data type
        for col in df_copy.columns:
            if df_copy[col].dtype == 'float64':
                # Float columns become numeric (already handled by pandas2ri)
                r_list[col] = pandas2ri.py2rpy(df_copy[col])
            elif df_copy[col].dtype == 'object':
                # String columns become factors
                r_list[col] = base.as_factor(pandas2ri.py2rpy(df_copy[col]))
            elif df_copy[col].dtype == 'int64':
                # Integer columns become ordered factors
                unique_values = sorted(df_copy[col].unique())
                r_list[col] = base.factor(pandas2ri.py2rpy(df_copy[col]), 
                                        levels=ro.IntVector(unique_values), 
                                        ordered=True)
            else:
                print(col)
                print(df_copy[col].dtype)
                assert False
                
        r_list = ro.ListVector(r_list)
        
        # Convert the R list to an R data frame
        r_dataframe = base.as_data_frame(r_list)
    
    return r_dataframe

In [48]:
def get_riod_tests(data, do_symmetric_tests=True):
    
    print(data)
    
    data = data.with_columns(cs.integer().cast(pl.Int64))
    
    ground_truth_tests = []
    
    if do_symmetric_tests is False:
        return ground_truth_tests
        
    # Call R function
    with (ro.default_converter + pandas2ri.converter).context():
        # load local-ci script
        ro.r['source']('./local-ci.r')
        # load function from R script
        run_ci_test_f = ro.globalenv['run_ci_test']
        
        print(data.with_columns(cs.boolean().cast(pl.Utf8)).to_pandas())

        df_r = transform_dataframe(data.with_columns(cs.boolean().cast(pl.Utf8)).to_pandas())
        
        #converting it into r object for passing into r function
        #df_r = ro.conversion.get_conversion().py2rpy(data.to_pandas())
        
        #Invoking the R function and getting the result
        if os.path.exists('./tmp/citestResults_dummy.csv'):
            os.remove('./tmp/citestResults_dummy.csv')
        result = run_ci_test_f(df_r, 999, "./tmp/", 'dummy')
        #Converting it back to a pandas dataframe.
        df_pvals = ro.conversion.get_conversion().rpy2py(result['citestResults'])
        labels = list(result['labels'])
        
    df = pl.from_pandas(df_pvals)
    df = df.drop('ord')
    df = df.with_columns(pl.col('S').str.split(',').cast(pl.List(pl.Int64)))
    df = df.with_columns(pl.col('X', 'Y').cast(pl.Int64))
    
    for row in df.rows():
        x = labels[row[0]-1]
        y = labels[row[1]-1]
        if x > y:
            x,y = y,x
        s = [labels[r-1] for r in row[2] if r is not None]
        pval = round(row[3],4)
        
        ground_truth_tests.append(fedci.EmptyLikelihoodRatioTest(x, y, s, pval))
        
    return ground_truth_tests
    

In [53]:
curr_nc = nc941

In [54]:
curr_nc.reset()
data = curr_nc.get(100)
data.head()

Y,X
i32,bool
2,True
4,False
1,True
2,True
3,False


In [55]:
get_riod_tests(data)

shape: (100, 2)
┌─────┬───────┐
│ Y   ┆ X     │
│ --- ┆ ---   │
│ i32 ┆ bool  │
╞═════╪═══════╡
│ 2   ┆ true  │
│ 4   ┆ false │
│ 1   ┆ true  │
│ 2   ┆ true  │
│ 3   ┆ false │
│ …   ┆ …     │
│ 4   ┆ false │
│ 1   ┆ false │
│ 3   ┆ false │
│ 1   ┆ false │
│ 2   ┆ true  │
└─────┴───────┘
    Y      X
0   2   true
1   4  false
2   1   true
3   2   true
4   3  false
.. ..    ...
95  4  false
96  1  false
97  3  false
98  1  false
99  2   true

[100 rows x 2 columns]
    Y     X
1   2  true
2   4 false
3   1  true
4   2  true
5   3 false
6   4  true
7   3  true
8   2

  true
9   2  true
10  4 false
11  3  true
12  3  true
13  2  true
14  4 false
15  3 false
16  1 false
17  1 false
18  3  true
19  4  true
20  1  true
21  4 false
22  3 false
23  2  true
24  4 false
25  3  true
26  1  true
27  1 false
28  3  true
29  2  true
30  1  true
31  2 false
32  4  true
33  2  true
34  4  true
35  1  true
36  1 false
37  3  true
38  1 false
39  2 false
40  2  true
41  4 false
42  1  true
43  2 false
44  1 false
45  2  true
46  3  true
47  3  true
48  4 false
49  1 false
50  1  true
51  2 false
52  4 false
53  3  true
54  1  true
55  2 false
56  3 false
57  4  true
58  4 false
59  2 false
60  3 false
61  4 false
62  2  true
63  3 false
64  1 false
65  1 false
66  1 false
67  4  true
68  3  true
69  3 false
70  3 false
71  4  true
72  4 false
73  2  true
74  1 false
75  1 false
76  3  true
77  1 false
78  1  true
79  1  true
80  4  true
81  4 false
82  4 false
83  3 false
84  3 false
85  1 false
86  1 false
87  2 false
88  1  true
89  1 false
90  1 false
91  1 fal

[LikelihoodRatioTest - y: X, x: Y, S: [], p: 0.1094]