In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from causallearn.search.ConstraintBased.PC import pc
from causallearn.utils.GraphUtils import GraphUtils
from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge
from causallearn.graph.GraphNode import GraphNode
import pydot
from IPython.display import Image, display

# FCI for gut microbes dataset

In [4]:
subject_info = pd.read_csv("../datasets/subject_file.csv")
subject_info.head()

Unnamed: 0,SubjectID,Study,Race,Sex,Age,BMI,SSPG,IR_IS_classification
0,ZIS22OE,HMP,C,F,,,,Unknown
1,ZJBOZ2X,Exercise,C,F,65.3,19.82,,Unknown
2,ZJOSZHK,HMP,C,M,41.43,19.42,,Unknown
3,ZJTKAE3,HMP,C,F,58.65,31.24,162.0,IR
4,ZJXC41N,"HMP, Exercise",B,F,49.69,28.24,75.0,IS


In [7]:
# merge gut_microbes with subject on subject id
gut_microbes = pd.read_csv("../datasets/gut_16s_abundance.txt", sep = "\t")
# add SubjectID column to gut_microbes
gut_microbes['SubjectID'] = gut_microbes['SampleID'].str.split('-').str[0]
gut_microbes.head()

Unnamed: 0,SampleID,phylum_Actinobacteria,phylum_Bacteroidetes,phylum_Firmicutes,phylum_Proteobacteria,phylum_Verrucomicrobia,phylum_unclassified_Bacteria,class_Actinobacteria,class_Bacilli,class_Bacteroidia,...,genus_unclassified_Bacteria,genus_unclassified_Clostridiales,genus_unclassified_Clostridiales_Incertae.Sedis.XIII,genus_unclassified_Coriobacteriaceae,genus_unclassified_Erysipelotrichaceae,genus_unclassified_Firmicutes,genus_unclassified_Lachnospiraceae,genus_unclassified_Porphyromonadaceae,genus_unclassified_Ruminococcaceae,SubjectID
0,ZOZOW1T-1010,0.000449,0.650866,0.22508,0.007364,0.015835,0.097431,0.000449,0.000138,0.650866,...,0.097431,0.007883,3.5e-05,0.000415,0.002386,0.015697,0.027452,0.0,0.074128,ZOZOW1T
1,ZOZOW1T-1011,0.000176,0.730518,0.184808,0.0039,0.008186,0.070902,0.000176,7e-05,0.730518,...,0.070902,0.008713,0.000141,0.000176,0.000808,0.008608,0.011805,0.0,0.069215,ZOZOW1T
2,ZOZOW1T-1012,0.000597,0.178364,0.796861,0.000358,0.002788,0.020911,0.000597,0.000119,0.178364,...,0.020911,0.022903,0.000518,0.000398,0.001713,0.000876,0.018362,0.0,0.615351,ZOZOW1T
3,ZOZOW1T-1015,5.8e-05,0.740726,0.226516,0.005977,0.001207,0.024971,5.8e-05,2.9e-05,0.740726,...,0.024971,0.013046,8.6e-05,2.9e-05,0.004109,0.00773,0.043849,2.9e-05,0.067096,ZOZOW1T
4,ZOZOW1T-1021,0.001113,0.55458,0.364195,0.038358,5.9e-05,0.041286,0.001113,5.9e-05,0.55458,...,0.041286,0.006208,0.000117,0.000176,0.000468,0.000996,0.132584,0.015753,0.044624,ZOZOW1T


In [8]:
merged_df = pd.merge(gut_microbes, subject_info, on='SubjectID', how='left')
merged_df.head()

Unnamed: 0,SampleID,phylum_Actinobacteria,phylum_Bacteroidetes,phylum_Firmicutes,phylum_Proteobacteria,phylum_Verrucomicrobia,phylum_unclassified_Bacteria,class_Actinobacteria,class_Bacilli,class_Bacteroidia,...,genus_unclassified_Porphyromonadaceae,genus_unclassified_Ruminococcaceae,SubjectID,Study,Race,Sex,Age,BMI,SSPG,IR_IS_classification
0,ZOZOW1T-1010,0.000449,0.650866,0.22508,0.007364,0.015835,0.097431,0.000449,0.000138,0.650866,...,0.0,0.074128,ZOZOW1T,"HMP, Exercise",C,M,59.48,21.47,91.5,IS
1,ZOZOW1T-1011,0.000176,0.730518,0.184808,0.0039,0.008186,0.070902,0.000176,7e-05,0.730518,...,0.0,0.069215,ZOZOW1T,"HMP, Exercise",C,M,59.48,21.47,91.5,IS
2,ZOZOW1T-1012,0.000597,0.178364,0.796861,0.000358,0.002788,0.020911,0.000597,0.000119,0.178364,...,0.0,0.615351,ZOZOW1T,"HMP, Exercise",C,M,59.48,21.47,91.5,IS
3,ZOZOW1T-1015,5.8e-05,0.740726,0.226516,0.005977,0.001207,0.024971,5.8e-05,2.9e-05,0.740726,...,2.9e-05,0.067096,ZOZOW1T,"HMP, Exercise",C,M,59.48,21.47,91.5,IS
4,ZOZOW1T-1021,0.001113,0.55458,0.364195,0.038358,5.9e-05,0.041286,0.001113,5.9e-05,0.55458,...,0.015753,0.044624,ZOZOW1T,"HMP, Exercise",C,M,59.48,21.47,91.5,IS


In [9]:
# one hot encoding
df = pd.get_dummies(merged_df, sparse=True)
df.head()

Unnamed: 0,phylum_Actinobacteria,phylum_Bacteroidetes,phylum_Firmicutes,phylum_Proteobacteria,phylum_Verrucomicrobia,phylum_unclassified_Bacteria,class_Actinobacteria,class_Bacilli,class_Bacteroidia,class_Betaproteobacteria,...,Race_A,Race_B,Race_C,Race_H,Race_unknown,Sex_F,Sex_M,IR_IS_classification_IR,IR_IS_classification_IS,IR_IS_classification_Unknown
0,0.000449,0.650866,0.22508,0.007364,0.015835,0.097431,0.000449,0.000138,0.650866,0.001936,...,0,0,1,0,0,0,1,0,1,0
1,0.000176,0.730518,0.184808,0.0039,0.008186,0.070902,0.000176,7e-05,0.730518,0.001511,...,0,0,1,0,0,0,1,0,1,0
2,0.000597,0.178364,0.796861,0.000358,0.002788,0.020911,0.000597,0.000119,0.178364,8e-05,...,0,0,1,0,0,0,1,0,1,0
3,5.8e-05,0.740726,0.226516,0.005977,0.001207,0.024971,5.8e-05,2.9e-05,0.740726,0.001178,...,0,0,1,0,0,0,1,0,1,0
4,0.001113,0.55458,0.364195,0.038358,5.9e-05,0.041286,0.001113,5.9e-05,0.55458,0.011595,...,0,0,1,0,0,0,1,0,1,0


In [10]:
# data input for FCI must be numpy array
data = np.array(df)
#data

In [11]:
#gut_microbes = gut_microbes.drop(columns=['SampleID'])
#data = np.array(gut_microbes)
data

array([[4.49469000e-04, 6.50866093e-01, 2.25080386e-01, ...,
        0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
       [1.75673000e-04, 7.30517883e-01, 1.84807814e-01, ...,
        0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
       [5.97467000e-04, 1.78363738e-01, 7.96861308e-01, ...,
        0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
       ...,
       [9.66183600e-03, 5.32155797e-01, 4.45727657e-01, ...,
        1.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.60721800e-03, 7.76423717e-01, 2.04212562e-01, ...,
        1.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [3.74855510e-02, 3.52837563e-01, 5.84466340e-01, ...,
        1.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [None]:
from causallearn.search.ConstraintBased.FCI import fci
#from causallearn.utils.cit import fisherz  # import the independence test method 

independent_test_method = 'chisq'

# default parameters
#g, edges = fci(data)

# or customized parameters
# NOTE ; try chisq test
g, edges = fci(data, independence_test_method='chisq')

# visualization
from causallearn.utils.GraphUtils import GraphUtils

pdy = GraphUtils.to_pydot(g)
pdy.write_png('simple_test.png')




  0%|          | 0/1062 [00:00<?, ?it/s]

# FCI for subject file

In [None]:
gut_microbes_numeric