In [412]:
%load_ext autoreload
%autoreload 2
import pandas as pd
from datetime import datetime
from pathlib import Path
from notebooks.utils import  *
import networkx as nx
import matplotlib.pyplot as plt
from causallearn.search.ConstraintBased.PC import pc
from causallearn.search.ConstraintBased.FCI import fci
import logging
from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge
from causallearn.utils.GraphUtils import GraphUtils
from dowhy import CausalModel
import dowhy.datasets
import dowhy
import pydot

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [413]:
project_root = get_git_root()
data_folder = "data"
datafile = "features.csv"
figsize = (10, 8)
font_graph = 10
font_color_graph = "black"

In [414]:
df = pd.read_csv(Path(project_root, data_folder, datafile))
df = df.dropna(subset="day_survey")

In [415]:
num_prior_days = 10
date_covid = datetime(2020, 3, 1)
# rough date
date_vaccine = datetime(2021, 4, 1)

ema_dictionary = {
    "Y1": "pam",
    "Y2": "phq2_score",
    "Y3": "phq4_score",
    "Y4": "gad2_score",
    "Y5": "social_level",
    "Y6": "sse_score",
    "Y7": "stress",
}
reverse_ema_dictionary = {v: k for k, v in ema_dictionary.items()}

physical_dictionary = {
    "P1": "excercise",
    "P2": "studying",
    "P3": "in house",
    "P4": "sports",
}
social_dictionary = {
    "S1": "traveling",
    "S2": "distance traveled",
    "S3": "time in social location",
    "S4": "visits",
    "S5": "duration unlocked phone in social locations",
    "S6": "frequency of unlocked phone in social locations",
    "S7": "motion at social locations",
}

sleep_dictionary = {
    "Z1": "sleep_duration",
    "Z2": "sleep start time",
    "Z3": "sleep end time",
}

full_dictionary = (
    physical_dictionary | social_dictionary | sleep_dictionary | ema_dictionary
)

ema = [f"Y{i}" for i in range(1, 8, 1)]
physical = [f"P{i}" for i in range(1, 5, 1)]
social = [f"S{i}" for i in range(1, 8, 1)]
sleep = [f"Z{i}" for i in range(1, 4, 1)]

In [416]:
df["date"] = pd.to_datetime(df["day_survey"])
df.rename(columns=reverse_ema_dictionary, inplace=True)
df.set_index(["uid", "date"], inplace=True)
df.dropna(subset=ema + physical + social + sleep, inplace=True)

In [417]:
outcomes = ["Y4"]
features = social
subset = outcomes + social
draw_dictionary = {
    k: v for k, v in full_dictionary.items() if k in (outcomes + features)
}
df = df[subset]
draw_dictionary

{'S1': 'traveling',
 'S2': 'distance traveled',
 'S3': 'time in social location',
 'S4': 'visits',
 'S5': 'duration unlocked phone in social locations',
 'S6': 'frequency of unlocked phone in social locations',
 'S7': 'motion at social locations',
 'Y4': 'gad2_score'}

In [418]:
node_number_dictionary = {i: col for i, col in enumerate(df.columns)}
node_number_dictionary

{0: 'Y4', 1: 'S1', 2: 'S2', 3: 'S3', 4: 'S4', 5: 'S5', 6: 'S6', 7: 'S7'}

In [419]:
df.columns

Index(['Y4', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7'], dtype='object')

In [420]:
cg = pc(np.vstack(df.to_numpy()))

for index, node in enumerate(cg.G.nodes):
    node_name = df.columns[index]
    cg.G.nodes[index]["label"] = node_name
bk = BackgroundKnowledge()

for node in range(1, len(df.columns)):
    print(
        f"adding forbidden between {cg.G.nodes[0]['label']} and {cg.G.nodes[node]['label']}"
    )
    bk.add_forbidden_by_node(cg.G.nodes[0], cg.G.nodes[node])

parameters = {
    "alpha": 0.05,
    "node_names": df.columns,
    "independence_test_method": "fisherz",
    "background_knowledge": bk,
}

  0%|          | 0/8 [00:00<?, ?it/s]

adding forbidden between Y4 and S1
adding forbidden between Y4 and S2
adding forbidden between Y4 and S3
adding forbidden between Y4 and S4
adding forbidden between Y4 and S5
adding forbidden between Y4 and S6
adding forbidden between Y4 and S7


In [433]:
fci_g, edges = fci(np.vstack(df.to_numpy()), **parameters)

for index, node in enumerate(fci_g.nodes):
    node.name = df.columns[index]

pc_g = pc(np.vstack(df.to_numpy()), **parameters)

pyd_pc = GraphUtils.to_pydot(pc_g.G)
pyd_fci = GraphUtils.to_pydot(fci_g, edges=edges, labels=df.columns)

pyd_fci.write_png("fci.png")
pyd_pc.write_png("pc.png")


pyd_pc.write_dot("pc.dot")
pyd_fci.write_dot("fci.dot")

  0%|          | 0/8 [00:00<?, ?it/s]

Starting BK Orientation.
Orienting edge (Knowledge): X3 --> X1
Orienting edge (Knowledge): X5 --> X1
Orienting edge (Knowledge): X7 --> X1
Orienting edge (Knowledge): X8 --> X1
Finishing BK Orientation.
Starting BK Orientation.
Orienting edge (Knowledge): X3 --> X1
Orienting edge (Knowledge): X5 --> X1
Orienting edge (Knowledge): X7 --> X1
Orienting edge (Knowledge): X8 --> X1
Finishing BK Orientation.
X3 --> X1
X5 --> X1
X7 --> X1
X8 --> X1
X2 --> X4
X2 --> X5
X6 --> X2
X5 --> X4


  0%|          | 0/8 [00:00<?, ?it/s]

In [434]:
model = CausalModel(
    data=df,
    treatment="4",
    outcome="0",
    graph="fci.dot",
)
model.logger.setLevel(logging.WARNING)

In [435]:
ied = model.identify_effect(estimand_type="nonparametric-ate")

NetworkXError: graph should be directed acyclic

In [None]:
print(ied)

Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
No such variable(s) found!

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: frontdoor
No such variable(s) found!



In [438]:
node_number_dictionary

{0: 'Y4', 1: 'S1', 2: 'S2', 3: 'S3', 4: 'S4', 5: 'S5', 6: 'S6', 7: 'S7'}

In [437]:
fci_g.nodes[1].name

'Y4'

In [None]:
fci_g.add_directed_edge(fci_g.nodes[4], fci_g.nodes[7])