In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import graphviz
import networkx as nx

import dowhy
from dowhy import CausalModel

sys.path.append('../')

from src.etl import *
from src.eda import *
from src.graph import *
from src.sparsify import *

In [2]:
# T2D
t2d_otu = pd.read_csv("../data/t2d/otu_table.csv", index_col=0)
t2d_metadata = pd.read_csv("../data/t2d/metadata.csv", index_col=0)
t2d = pd.concat([t2d_metadata, t2d_otu], axis=1)

In [3]:
# get causal graph
merged, metadata, disease = t2d, t2d_metadata, 't2d'
data_loglasso = prune_lasso(merged, metadata, f'../data/{disease}/lasso_covariates.txt')
t2d_graph = run_cdnod(data_loglasso, disease, f'{disease}/cdnod')

The pruned dataset has the following dimensions:  (331, 22)


  0%|          | 0/22 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

The following genera are directly linked to the 'IRIS' node: 
 genus_Butyricimonas
genus_Clostridium.XlVb
genus_Odoribacter
genus_unclassified_Bacteria
genus_unclassified_Firmicutes
The following genera are directly linked to the 'Gender' node: 
 genus_Parasutterella
The following genera are directly linked to the 'Ethnicity' node: 
 genus_Butyricicoccus
genus_Coprococcus
genus_unclassified_Bacteria


In [52]:
def make_graph(adjacency_matrix, labels=None):
    idx = np.abs(adjacency_matrix) > 0
    dirs = np.where(idx)
    d = graphviz.Digraph(engine='dot')
    names = labels if labels else [f'x{i}' for i in range(len(adjacency_matrix))]
    for name in names:
        d.node(name)
    for to, from_, coef in zip(dirs[0], dirs[1], adjacency_matrix[idx]):
        d.edge(names[from_], names[to], label=str(coef))
    return d

def str_to_dot(string):
    '''
    Converts input string from graphviz library to valid DOT graph format.
    '''
    graph = string.strip().replace('\n', ';').replace('\t','')
    graph = graph[:9] + graph[10:-2] + graph[-1] # Removing unnecessary characters from string
    return graph

def find_tails(graph):
    L = np.where(graph == -1)
    return list(zip(L[1], L[0]))

def find_arrow_heads(graph):
    L = np.where(graph == 1)
    return list(zip(L[1], L[0]))

def to_nx_graph(graph, labels):
    nodes = range(len(graph))
    nx_graph = nx.DiGraph()
    nx_graph.add_nodes_from(nodes)
    undirected = [(edge[0], edge[1]) for edge in find_tails(graph) if graph[edge[0], edge[1]] == -1]
    directed = [(edge[0], edge[1]) for edge in find_arrow_heads(graph) if graph[edge[0], edge[1]] == -1]
    for (i, j) in undirected:
        nx_graph.add_edge(i, j, color='g')
    for (i, j) in directed:
        nx_graph.add_edge(i, j, color='b')
    return nx_graph

def to_graphviz_graph(graph, labels):
    graphviz_graph = graphviz.Digraph(engine='dot')
    for node in labels:
        graphviz_graph.node(node)
    undirected = [(edge[0], edge[1]) for edge in find_tails(graph) if graph[edge[0], edge[1]] == -1]
    directed = [(edge[0], edge[1]) for edge in find_arrow_heads(graph) if graph[edge[0], edge[1]] == -1]
    # for (i, j) in undirected:
    #     graphviz_graph.edge(labels[i], labels[j])
    for (i, j) in directed:
        graphviz_graph.edge(labels[i], labels[j])
    return graphviz_graph

In [53]:
# t2d_graph_dot = GraphUtils.to_pydot(t2d_graph, labels=data_loglasso.columns)
t2d_graph_dot = to_graphviz_graph(t2d_graph.graph, labels=data_loglasso.columns)

t2d_model = CausalModel(
    data = data_loglasso, 
    treatment = 'genus_Butyricimonas',
    outcome = 'IRIS',
    graph = str_to_dot(t2d_graph_dot.source))

In [58]:
identified_estimand = t2d_model.identify_effect(proceed_when_unidentifiable=True)
print(identified_estimand)

estimate = t2d_model.estimate_effect(identified_estimand,
                                     method_name = 'backdoor.propensity_score_stratification',
                                     test_significance = True)

Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
          d                    
──────────────────────(E[IRIS])
d[genus_Butyricimonas]         
Estimand assumption 1, Unconfoundedness: If U→{genus_Butyricimonas} and U→IRIS then P(IRIS|genus_Butyricimonas,,U) = P(IRIS|genus_Butyricimonas,)

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: frontdoor
No such variable(s) found!

propensity_score_stratification


Exception: Propensity score methods are applicable only for binary treatments

# Do-Calculus on T2D

In [7]:
t2d_otu = pd.read_csv("../data/t2d/otu_table.csv", index_col=0)
t2d_metadata = pd.read_csv("../data/t2d/metadata.csv", index_col=0)
t2d = pd.concat([t2d_metadata, t2d_otu], axis=1)
t2d

Unnamed: 0_level_0,IRIS,Gender,Ethnicity,genus_Akkermansia,genus_Alistipes,genus_Anaerotruncus,genus_Anaerovorax,genus_Bacteroides,genus_Barnesiella,genus_Bilophila,...,genus_Veillonella,genus_unclassified_Bacteria,genus_unclassified_Clostridiales,genus_unclassified_Clostridiales_Incertae.Sedis.XIII,genus_unclassified_Coriobacteriaceae,genus_unclassified_Erysipelotrichaceae,genus_unclassified_Firmicutes,genus_unclassified_Lachnospiraceae,genus_unclassified_Porphyromonadaceae,genus_unclassified_Ruminococcaceae
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ZOZOW1T-4026,0,0,0,0.452125,1.094397,0.029578,0.021127,16.170878,0.346489,0.004230,...,0.008450,12.465140,1.825403,0.029578,0.042255,0.219724,0.840869,6.714274,0.000000,5.729739
ZOZOW1T-44,0,0,0,0.009170,7.764379,0.091705,0.041267,32.055573,1.614013,0.082535,...,0.001530,13.019090,2.298745,0.055023,0.184939,1.740871,1.056139,7.417427,0.004590,4.889419
ZOZOW1T-59,0,0,0,0.000000,4.677102,0.014871,0.018589,26.549429,0.777038,0.003720,...,0.014871,3.680708,2.409191,0.044615,0.115254,0.453582,17.113433,2.795851,0.000000,10.242778
ZOZOW1T-6021,0,0,0,0.236451,0.788170,0.000000,0.030025,58.118150,0.127608,0.003750,...,0.000000,4.665215,4.042186,0.026272,0.018766,0.304008,0.848221,12.160336,0.123855,2.814893
ZOZOW1T-6031,0,0,0,0.298811,1.051814,0.035857,0.029881,40.022710,0.298811,0.005980,...,0.000000,4.261041,1.243053,0.011952,0.023905,0.149405,2.486105,16.745354,0.233072,2.778940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZJTKAE3-01,1,1,0,0.012752,1.349214,0.028055,0.017854,53.557947,0.000000,1.351765,...,0.010202,0.066313,0.390226,0.040808,0.895225,0.869720,0.002550,3.065701,0.369822,1.379820
ZJTKAE3-04,1,1,0,4.438057,0.881766,0.038973,0.311785,25.844010,0.004870,0.107176,...,0.000000,0.107176,0.316656,0.657670,0.993813,6.411068,0.024358,4.667024,0.248453,7.711794
ZJTKAE3-06,1,1,0,0.095145,2.828414,0.021624,0.038923,49.981620,0.004320,0.019462,...,0.004320,0.030273,1.076873,0.069197,0.118932,1.909396,0.103795,4.393988,0.270300,2.538653
ZJTKAE3-6011,1,1,0,1.329974,2.369932,0.049801,0.161120,74.276424,0.000000,0.002930,...,0.000000,0.131826,3.890321,0.328099,0.020506,1.801617,0.002930,0.978439,0.172838,2.164870


In [None]:
propensity_score.continuous_treatment_model()