In [76]:
import pandas as pd
import numpy as np
import pydot

from causallearn.search.ConstraintBased.PC import pc
from causallearn.utils.cit import fastkci
from causallearn.search.ConstraintBased.FCI import fci
from causallearn.search.ScoreBased.GES import ges
from causallearn.utils.GraphUtils import GraphUtils

In [62]:
from causallearn.utils.cit import fastkci

In [95]:
data = pd.read_csv("../data/clean.csv")

In [96]:
data.shape

(331, 51)

In [90]:
def numerical_encoding(data):
    '''
    Encodes categorical variables in the subject datset into numeric discrete variables. Returns the resulting dataframe. 

    :param: data: raw subject dataset
    ''' 
    X = data.copy()
    X['Ethnicity'] = X['Ethnicity'].map({'C': 0, 'A': 1, 'B': 2, 'H': 3, 'unknown': 4})
    X['Gender'] = X['Gender'].map({'M': 0, 'F': 1})
    X['IRIS'] = X['IRIS'].map({'IR': 0, 'IS': 1, 'Unknown': 2})
    return X


def IR_IS_split(df, numerical=False):
    '''
    Returns two dataframes where each is separated by the IRIS column.

    :param: df: dataframe containing all individuals and the 'IRIS' column
    '''
    if "IRIS" not in df.columns:
        raise Exception("IRIS is not in the columns of df.")

    if numerical:
        IR_df = df[df['IRIS'] == 0]  # Insulin-resistant group
        IS_df = df[df['IRIS'] == 1]  # Insulin-sensitive group

    else:
        IR_df = df[df['IRIS'] == 'IR']
        IS_df = df[df['IRIS'] == 'IS']

    IR_df = IR_df.drop(columns='IRIS')
    IS_df = IS_df.drop(columns='IRIS')
    
    return IR_df, IS_df

In [91]:
data = numerical_encoding(data)
IR, IS = IR_IS_split(data, True)
non_genera = ["Ethnicity", "Gender", "Adj.age", "BMI", "SSPG"]

In [92]:
IR

Unnamed: 0,genus_Akkermansia,genus_Alistipes,genus_Anaerotruncus,genus_Anaerovorax,genus_Bacteroides,genus_Barnesiella,genus_Bilophila,genus_Blautia,genus_Butyricicoccus,genus_Butyricimonas,...,genus_unclassified_Erysipelotrichaceae,genus_unclassified_Firmicutes,genus_unclassified_Lachnospiraceae,genus_unclassified_Porphyromonadaceae,genus_unclassified_Ruminococcaceae,Ethnicity,Gender,Adj.age,BMI,SSPG
23,0.000000,0.016135,0.000000,0.000000,0.532562,0.000053,0.000265,0.007112,0.000796,0.000000,...,0.000027,0.001539,0.018470,0.000027,0.003370,1,1,44.81,34.49,184.0
24,0.000000,0.007928,0.000000,0.000000,0.631977,0.000000,0.000581,0.027378,0.002431,0.000000,...,0.000159,0.000740,0.042019,0.000053,0.003013,1,1,44.81,34.49,184.0
25,0.000000,0.002319,0.000000,0.000000,0.867963,0.000000,0.000114,0.003570,0.000159,0.000000,...,0.000000,0.000046,0.007935,0.000000,0.000682,1,1,44.81,34.49,184.0
26,0.000000,0.007797,0.000000,0.000000,0.891510,0.000000,0.000134,0.000605,0.000134,0.000000,...,0.000000,0.000269,0.004772,0.000000,0.002823,1,1,44.81,34.49,184.0
27,0.000000,0.000037,0.000000,0.000000,0.686224,0.000000,0.000000,0.044911,0.000299,0.000000,...,0.000150,0.007815,0.054334,0.000000,0.000785,1,1,44.81,34.49,184.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,0.000128,0.013492,0.000281,0.000179,0.535579,0.000000,0.013518,0.019690,0.001709,0.000025,...,0.008697,0.000025,0.030657,0.003698,0.013798,0,1,58.65,31.24,162.0
327,0.044381,0.008818,0.000390,0.003118,0.258440,0.000049,0.001072,0.161007,0.003118,0.000000,...,0.064111,0.000244,0.046670,0.002485,0.077118,0,1,58.65,31.24,162.0
328,0.000951,0.028284,0.000216,0.000389,0.499816,0.000043,0.000195,0.036826,0.000800,0.000022,...,0.019094,0.001038,0.043940,0.002703,0.025387,0,1,58.65,31.24,162.0
329,0.013300,0.023699,0.000498,0.001611,0.742764,0.000000,0.000029,0.010898,0.000439,0.000000,...,0.018016,0.000029,0.009784,0.001728,0.021649,0,1,58.65,31.24,162.0


In [93]:
IS.drop(columns=non_genera)

Unnamed: 0,genus_Akkermansia,genus_Alistipes,genus_Anaerotruncus,genus_Anaerovorax,genus_Bacteroides,genus_Barnesiella,genus_Bilophila,genus_Blautia,genus_Butyricicoccus,genus_Butyricimonas,...,genus_Veillonella,genus_unclassified_Bacteria,genus_unclassified_Clostridiales,genus_unclassified_Clostridiales_Incertae.Sedis.XIII,genus_unclassified_Coriobacteriaceae,genus_unclassified_Erysipelotrichaceae,genus_unclassified_Firmicutes,genus_unclassified_Lachnospiraceae,genus_unclassified_Porphyromonadaceae,genus_unclassified_Ruminococcaceae
0,0.004521,0.010944,0.000296,0.000211,0.161709,0.003465,0.000042,0.012296,0.000634,0.003085,...,0.000084,0.124651,0.018254,0.000296,0.000423,0.002197,0.008409,0.067143,0.000000,0.057297
1,0.000092,0.077644,0.000917,0.000413,0.320556,0.016140,0.000825,0.028765,0.003577,0.002247,...,0.000015,0.130191,0.022987,0.000550,0.001849,0.017409,0.010561,0.074174,0.000046,0.048894
2,0.000000,0.046771,0.000149,0.000186,0.265494,0.007770,0.000037,0.022419,0.000781,0.001115,...,0.000149,0.036807,0.024092,0.000446,0.001153,0.004536,0.171134,0.027959,0.000000,0.102428
3,0.002365,0.007882,0.000000,0.000300,0.581182,0.001276,0.000037,0.012573,0.000488,0.000751,...,0.000000,0.046652,0.040422,0.000263,0.000188,0.003040,0.008482,0.121603,0.001239,0.028149
4,0.002988,0.010518,0.000359,0.000299,0.400227,0.002988,0.000060,0.007769,0.001195,0.002630,...,0.000000,0.042610,0.012431,0.000120,0.000239,0.001494,0.024861,0.167454,0.002331,0.027789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
310,0.003489,0.028837,0.001580,0.000263,0.038976,0.001119,0.000132,0.009415,0.000132,0.000329,...,0.000000,0.157482,0.311410,0.000593,0.002173,0.016986,0.004214,0.053196,0.000988,0.141484
311,0.002734,0.058189,0.001534,0.001367,0.128093,0.004407,0.000112,0.014059,0.000195,0.000669,...,0.000000,0.313175,0.048286,0.000363,0.000837,0.004184,0.025747,0.034896,0.002120,0.100895
312,0.000957,0.067331,0.001914,0.003610,0.023922,0.000652,0.000217,0.020095,0.000000,0.000435,...,0.000000,0.318124,0.072159,0.001827,0.004045,0.014005,0.015963,0.064634,0.000130,0.147971
324,0.000106,0.123416,0.000426,0.000213,0.090406,0.000000,0.000053,0.005218,0.002183,0.002715,...,0.000266,0.063997,0.320786,0.000000,0.000479,0.000053,0.002715,0.042860,0.000053,0.056224


In [94]:
cg = pc(IS.drop(columns=non_genera).values, alpha=0.05, indep_test=fastkci)

  0%|          | 0/45 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = um.true_divide(


ValueError: zero-size array to reduction operation maximum which has no identity

In [78]:
pyd = GraphUtils.to_pydot(cg.G, labels=IR.drop(columns=non_genera).columns)
pyd.write_png(f"../graphs/pc_IS_genera_causal_graph.png")

In [44]:
g_ir, edges_ir = fci(IR.values, independence_test_method='fisherz')

  0%|          | 0/50 [00:00<?, ?it/s]

X27 --> X2
X32 --> X3
X45 --> X3
X4 --> X23
X39 --> X4
X4 --> X40
X45 --> X5
X25 --> X8
X23 --> X13
X19 --> X39
X19 --> X40
X19 --> X41
X23 --> X20
X40 --> X20
X32 --> X27
X27 --> X45
X37 --> X32
X32 --> X38
X42 --> X37
