In [1]:
import dgp
import polars as pl
import polars.selectors as cs
import numpy as np
import os
import fedci

In [2]:
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.Node])
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.CategoricalNode], min_categories=3)
nc924 = dgp.NodeCollection('L-M Con. Indep.', [node1, node2, node3])

# Unc. Dep. Case
node1 = dgp.GenericNode('X', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc932 = dgp.NodeCollection('L-O Unc. Dep.', [node1, node2])

# Con. Indep. Case given Z
node1 = dgp.GenericNode('Z', node_restrictions=[dgp.Node])
node2 = dgp.GenericNode('X', parents=[node1], node_restrictions=[dgp.Node])
node3 = dgp.GenericNode('Y', parents=[node1], node_restrictions=[dgp.OrdinalNode])
nc934 = dgp.NodeCollection('L-O Con. Indep.', [node1, node2, node3])

node1 = dgp.GenericNode('X', node_restrictions=[dgp.BinaryNode])
node2 = dgp.GenericNode('Y', node_restrictions=[dgp.OrdinalNode])
nc941 = dgp.NodeCollection('B-O Unc. Indep.', [node1, node2])

In [3]:
import pandas as pd
import numpy as np
from rpy2.robjects import pandas2ri, Formula
from rpy2.robjects.packages import importr
import rpy2.robjects as ro

# Enable automatic conversion between pandas and R data frames
#pandas2ri.activate()

# Import necessary R packages
base = importr('base')
#r_stats = importr('stats')

def transform_dataframe(df):
    # Create a copy of the DataFrame to avoid modifying the original
    df_copy = df.copy()
    
    with (ro.default_converter + pandas2ri.converter).context():
        # Initialize an empty R list to store our columns
        r_list = {}#ro.ListVector({})
        
        # Iterate through columns and convert based on data type
        for col in df_copy.columns:
            if df_copy[col].dtype == 'float64':
                # Float columns become numeric (already handled by pandas2ri)
                r_list[col] = pandas2ri.py2rpy(df_copy[col])
            elif df_copy[col].dtype == 'object':
                # String columns become factors
                r_list[col] = base.as_factor(pandas2ri.py2rpy(df_copy[col]))
            elif df_copy[col].dtype == 'int64':
                # Integer columns become ordered factors
                unique_values = sorted(df_copy[col].unique())
                r_list[col] = base.factor(pandas2ri.py2rpy(df_copy[col]), 
                                        levels=ro.IntVector(unique_values), 
                                        ordered=True)
            else:
                print(col)
                print(df_copy[col].dtype)
                assert False
                
        r_list = ro.ListVector(r_list)
        
        # Convert the R list to an R data frame
        r_dataframe = base.as_data_frame(r_list)
    
    return r_dataframe

In [18]:
def get_riod_tests(data, do_symmetric_tests=True):
    
    print(data)
    
    data = data.with_columns(cs.integer().cast(pl.Int64))
    
    ground_truth_tests = []
    
    if do_symmetric_tests is False:
        return ground_truth_tests
        
    # Call R function
    with (ro.default_converter + pandas2ri.converter).context():
        # load local-ci script
        ro.r['source']('./local-ci.r')
        # load function from R script
        run_ci_test_f = ro.globalenv['run_ci_test']
        
        print(data.with_columns(cs.boolean().cast(pl.Utf8)).to_pandas())

        df_r = transform_dataframe(data.with_columns(cs.boolean().cast(pl.Utf8)).to_pandas())
        
        #converting it into r object for passing into r function
        #df_r = ro.conversion.get_conversion().py2rpy(data.to_pandas())
        
        #Invoking the R function and getting the result
        if os.path.exists('./tmp/citestResults_dummy.csv'):
            os.remove('./tmp/citestResults_dummy.csv')
        result = run_ci_test_f(df_r, 999, "./tmp/", 'dummy')
        #Converting it back to a pandas dataframe.
        df_pvals = ro.conversion.get_conversion().rpy2py(result['citestResults'])
        labels = list(result['labels'])
        
    df = pl.from_pandas(df_pvals)
    df = df.drop('ord')
    df = df.with_columns(pl.col('S').str.split(',').cast(pl.List(pl.Int64)))
    df = df.with_columns(pl.col('X', 'Y').cast(pl.Int64))
    
    for row in df.rows():
        x = labels[row[0]-1]
        y = labels[row[1]-1]
        if x > y:
            x,y = y,x
        s = [labels[r-1] for r in row[2] if r is not None]
        pval = round(row[3],4)
        
        ground_truth_tests.append(fedci.EmptyLikelihoodRatioTest(x, y, s, pval))
        
    return ground_truth_tests
    

In [3]:
curr_nc = nc934

In [10]:
curr_nc.reset()
data = curr_nc.get(100)
data.head()
data['Y'].value_counts()

Y,count
i32,u32
3,21
1,25
2,26
4,28


Y,count
i32,u32
1,27
2,24
4,23
3,26


In [22]:
get_riod_tests(data)

shape: (100, 3)
┌─────┬───────────┬───────────┐
│ Y   ┆ X         ┆ Z         │
│ --- ┆ ---       ┆ ---       │
│ i32 ┆ f64       ┆ f64       │
╞═════╪═══════════╪═══════════╡
│ 2   ┆ -0.428652 ┆ -0.172842 │
│ 1   ┆ -1.362272 ┆ 0.094774  │
│ 1   ┆ -1.08128  ┆ -0.03005  │
│ 1   ┆ 1.212273  ┆ -0.419789 │
│ 1   ┆ 0.642167  ┆ -0.482396 │
│ …   ┆ …         ┆ …         │
│ 2   ┆ -0.685612 ┆ 0.675332  │
│ 2   ┆ -2.048775 ┆ 0.271685  │
│ 2   ┆ -0.035228 ┆ -0.948786 │
│ 2   ┆ 2.559497  ┆ -2.983625 │
│ 2   ┆ 0.066348  ┆ 0.636628  │
└─────┴───────────┴───────────┘
    Y         X         Z
0   2 -0.428652 -0.172842
1   1 -1.362272  0.094774
2   1 -1.081280 -0.030050
3   1  1.212273 -0.419789
4   1  0.642167 -0.482396
.. ..       ...       ...
95  2 -0.685612  0.675332
96  2 -2.048775  0.271685
97  2 -0.035228 -0.948786
98  2  2.559497 -2.983625
99  2  0.066348  0.636628

[100 rows x 3 columns]
[1] "HEYOO"
   Y           X            Z
0  2 -0.42865169 -0.172841906
1  1 -1.36227230  0.094773911
2 

 -0.482395998
5  2  1.02968364  2.364186524
6  2 -1.54092530  0.496929795
7  1  0.47462684  1.450673276
8  2 -0.51119392  1.947327020
9  1 -2.20702207  0.313174315
10 2 -3.34695559 -0.282843035
11 2 -1.04024872  1.913924996
12 2  1.84724736 -2.214075409
13 2 -0.43268415  1.323667331
14 2 -2.00043030  0.046523252
15 2  0.12350863 -0.416287247
16 1 -0.43176167  0.838407665
17 1 -0.60942495 -0.912470542
18 2  0.07814282  0.858757635
19 2  2.12840523 -2.088678876
20 1 -0.35816996 -1.003391669
21 1  0.45878592 -0.831979277
22 1 -0.30603441 -0.915677198
23 2  1.00332323 -0.863559967
24 2 -0.29594839  0.834922406
25 2  1.40108565  0.391171328
26 1 -0.45458279  1.431100726
27 2 -1.35722770 -0.216771094
28 2 -1.87237260  2.102285220
29 2  1.85368509 -0.487924110
30 1 -1.12314910 -0.708125611
31 1  1.72082484 -0.523892258
32 2 -1.55156765  0.174326237
33 1  0.02479131  0.558220516
34 1  0.82187794 -1.007972360
35 1  0.47801326 -0.173616101
36 1  1.53881587 -1.371210358
37 1 -0.64615131  1.650041

R[write to console]: Fehler in eval(mf, parent.frame()) : 
  Argument "data" fehlt (ohne Standardwert)

R[write to console]: Zusätzlich: 
R[write to console]: Warnmeldungen:

R[write to console]: 1: 
R[write to console]: In model.matrix.default(mt, mf, contrasts) :
R[write to console]: 
 
R[write to console]:  non-list contrasts argument ignored

R[write to console]: 2: 
R[write to console]: In model.matrix.default(mt, mf, contrasts) :
R[write to console]: 
 
R[write to console]:  non-list contrasts argument ignored

R[write to console]: 3: 
R[write to console]: In model.matrix.default(mt, mf, contrasts) :
R[write to console]: 
 
R[write to console]:  non-list contrasts argument ignored

R[write to console]: 4: 
R[write to console]: In model.matrix.default(mt, mf, contrasts) :
R[write to console]: 
 
R[write to console]:  non-list contrasts argument ignored

R[write to console]: 5: 
R[write to console]: In model.matrix.default(mt, mf, contrasts) :
R[write to console]: 
 
R[write to con

RRuntimeError: Fehler in eval(mf, parent.frame()) : 
  Argument "data" fehlt (ohne Standardwert)


In [17]:
data

Y,X,Z
i32,f64,f64
2,0.585733,0.34152
2,0.536646,-0.427573
2,1.985452,-2.572513
1,1.418381,-1.21275
2,4.035607,-1.603171
…,…,…
2,-1.859916,0.634115
1,0.632344,-0.529006
2,0.821754,0.679581
2,0.650401,0.171038
