In [43]:
import pandas as pd
import numpy as np

from causallearn.search.ConstraintBased.FCI import fci
from causallearn.utils.GraphUtils import GraphUtils

In [37]:
data = pd.read_csv("../data/clean.csv")

In [38]:
def numerical_encoding(data):
    '''
    Encodes categorical variables in the subject datset into numeric discrete variables. Returns the resulting dataframe. 

    :param: data: raw subject dataset
    ''' 
    X = data.copy()
    X['Ethnicity'] = X['Ethnicity'].map({'C': 0, 'A': 1, 'B': 2, 'H': 3, 'unknown': 4})
    X['Gender'] = X['Gender'].map({'M': 0, 'F': 1})
    X['IRIS'] = X['IRIS'].map({'IR': 0, 'IS': 1, 'Unknown': 2})
    return X


def IR_IS_split(df, numerical=False):
    '''
    Returns two dataframes where each is separated by the IRIS column.

    :param: df: dataframe containing all individuals and the 'IRIS' column
    '''
    if "IRIS" not in df.columns:
        raise Exception("IRIS is not in the columns of df.")

    if numerical:
        IR_df = df[df['IRIS'] == 0]  # Insulin-resistant group
        IS_df = df[df['IRIS'] == 1]  # Insulin-sensitive group

    else:
        IR_df = df[df['IRIS'] == 'IR']
        IS_df = df[df['IRIS'] == 'IS']

    IR_df = IR_df.drop(columns='IRIS')
    IS_df = IS_df.drop(columns='IRIS')
    
    return IR_df, IS_df

In [39]:
data = numerical_encoding(data)
IR, IS = IR_IS_split(data, True)

In [44]:
g_ir, edges_ir = fci(IR.values, independence_test_method='fisherz')

  0%|          | 0/50 [00:00<?, ?it/s]

X27 --> X2
X32 --> X3
X45 --> X3
X4 --> X23
X39 --> X4
X4 --> X40
X45 --> X5
X25 --> X8
X23 --> X13
X19 --> X39
X19 --> X40
X19 --> X41
X23 --> X20
X40 --> X20
X32 --> X27
X27 --> X45
X37 --> X32
X32 --> X38
X42 --> X37
