In [2]:
%pip install dowhy


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
# imports
import networkx as nx 
import numpy as np
import pandas as pd
from dowhy import gcm

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# loading the data 
depression = pd.read_csv("data/Student Depression Dataset.csv")
depression = depression.dropna()
depression = depression.replace({'Yes': 1, 'No': 0})

data_encoded = pd.get_dummies(depression, drop_first=True)
data_encoded = data_encoded[['Academic Pressure', 'Have you ever had suicidal thoughts ?',
 'Financial Stress', 'City_Ahmedabad', 'City_Bhopal', 'City_Faridabad',
 'City_Hyderabad', 'City_Meerut', 'City_Patna', 'Dietary Habits_Moderate',
 'Dietary Habits_Others', 'Dietary Habits_Unhealthy', 'Depression']]

depression_LASSO_features = data_encoded[['Academic Pressure', 'Have you ever had suicidal thoughts ?',
  'Financial Stress', 'City_Ahmedabad', 'City_Bhopal', 'City_Faridabad',
  'City_Hyderabad', 'City_Meerut', 'City_Patna', 'Dietary Habits_Moderate',
  'Dietary Habits_Others', 'Dietary Habits_Unhealthy', 'Depression']]

depression_LASSO_features = depression_LASSO_features.astype(int)

depression_LASSO_features.head()

  depression = depression.replace({'Yes': 1, 'No': 0})


Unnamed: 0,Academic Pressure,Have you ever had suicidal thoughts ?,Financial Stress,City_Ahmedabad,City_Bhopal,City_Faridabad,City_Hyderabad,City_Meerut,City_Patna,Dietary Habits_Moderate,Dietary Habits_Others,Dietary Habits_Unhealthy,Depression
0,5,1,1,0,0,0,0,0,0,0,0,0,1
1,2,0,2,0,0,0,0,0,0,1,0,0,0
2,3,0,1,0,0,0,0,0,0,0,0,0,0
3,3,1,5,0,0,0,0,0,0,1,0,0,1
4,4,1,1,0,0,0,0,0,0,1,0,0,0


In [5]:
depression_LASSO_features['Dietary Habits_Unhealthy'].sum()

10316

In [6]:
# X -> treatment (e.g., sleep duration, dietary habits)
# Z -> observation (e.g., depression)

# create "scoring" for dietary habits -> 0 for unhealthy, 
# 1 for moderate, 2 for other 

# add the ones = 1 from unhealthy,
# = 1 from moderate, and = 1 for others. before this, 
# encode them as above.
depression_LASSO_features['Dietary_Score'] = (
    depression_LASSO_features['Dietary Habits_Moderate'] * 1 +
    depression_LASSO_features['Dietary Habits_Others'] * 2 +
    depression_LASSO_features['Dietary Habits_Unhealthy'] * 0
)
X = depression_LASSO_features['Dietary_Score']

city_columns = ['City_Ahmedabad', 'City_Bhopal', 'City_Faridabad',
                'City_Hyderabad', 'City_Meerut', 'City_Patna']

depression_LASSO_features['City_Index'] = depression_LASSO_features[city_columns].idxmax(axis=1)
depression_LASSO_features['City_Index'] = depression_LASSO_features['City_Index'].apply(lambda x: 
                                                                                        city_columns.index(x) + 1)

Y = depression_LASSO_features['City_Index']
Z = depression_LASSO_features['Depression']

### Possible counterfactual questions:
1. What would happen to Depression (Z), if i intervened on someone's Financial Sress (X)?
2. What would happen to Depression (Z), if i intervened on someone's Academic Pressure (X)? e.g., if I reduce someone's academic pressure by 30%, would the likelihood of depression decrease? 
3. What would happen to Depression (Z), if i intervened on someone's Academix Pressure (X)? 


### X -> Academic Pressure, Z -> Depression

- intervene on Academic Pressure, decrease it by 30%. What do we observe about Depression?

In [14]:
depression_LASSO_features.head()

Unnamed: 0,Academic Pressure,Have you ever had suicidal thoughts ?,Financial Stress,City_Ahmedabad,City_Bhopal,City_Faridabad,City_Hyderabad,City_Meerut,City_Patna,Dietary Habits_Moderate,Dietary Habits_Others,Dietary Habits_Unhealthy,Depression,Dietary_Score,City_Index
0,5,1,1,0,0,0,0,0,0,0,0,0,1,0,1
1,2,0,2,0,0,0,0,0,0,1,0,0,0,1,1
2,3,0,1,0,0,0,0,0,0,0,0,0,0,0,1
3,3,1,5,0,0,0,0,0,0,1,0,0,1,1,1
4,4,1,1,0,0,0,0,0,0,1,0,0,0,1,1


In [35]:
# Set the treatment variable (X) to be Academic Pressure;
# this is what we are intervening on.
X = depression_LASSO_features['Academic Pressure']
Z = depression_LASSO_features['Depression']

In [36]:
# Training data for the model
training_data = pd.DataFrame(data=dict(X=X, Z=Z))

In [37]:
# Construct the Graph, set up the model
causal_model = gcm.InvertibleStructuralCausalModel(nx.DiGraph([('X', 'Z')])) # X -> Z
causal_model.set_causal_mechanism('X', 
                                  gcm.EmpiricalDistribution())
causal_model.set_causal_mechanism('Z', 
                                  gcm.AdditiveNoiseModel(
                                      gcm.ml.create_linear_regressor()))

# fit the model to the training data
gcm.fit(causal_model, training_data)

gcm.counterfactual_samples( # generate counterfactual samples
    causal_model,
    {'X': lambda x: 5}, # intervene on Academic Pressure
    observed_data=pd.DataFrame(data=dict(X=[1], Z=[2])))

Fitting causal mechanism of node Z: 100%|██████████| 2/2 [00:00<00:00, 41.99it/s]


Unnamed: 0,X,Z
0,5,2.677259


**X -> Financial Stress, Z -> Depression**

In [30]:
# Set the treatment variable (X) to be Financial Stress;
# this is what we are intervening on.
X = depression_LASSO_features['Financial Stress']
Z = depression_LASSO_features['Depression']

# Training data for the model
training_data = pd.DataFrame(data=dict(X=X, Z=Z))

# Construct the Graph, set up the model
causal_model = gcm.InvertibleStructuralCausalModel(nx.DiGraph([('X', 'Z')])) # X -> Z
causal_model.set_causal_mechanism('X', gcm.EmpiricalDistribution())
causal_model.set_causal_mechanism('Z', 
                                  gcm.AdditiveNoiseModel(
                                      gcm.ml.create_linear_regressor()))

# fit the model to the training data
gcm.fit(causal_model, training_data)

gcm.counterfactual_samples(
    causal_model,
    {'X': lambda x: 5}, # intervene on Financial Stress
                        # set Financial Stress to 5
    observed_data=pd.DataFrame(data=dict(X=[1], Z=[2])))

Fitting causal mechanism of node Z: 100%|██████████| 2/2 [00:00<00:00, 29.56it/s]


Unnamed: 0,X,Z
0,5,2.498473


In [38]:
# Set the treatment variable (X) to be Financial Stress;
# this is what we are intervening on.
X = depression_LASSO_features['Financial Stress']
Z = depression_LASSO_features['Depression']

# Training data for the model
training_data = pd.DataFrame(data=dict(X=X, Z=Z))

# Construct the Graph, set up the model
causal_model = gcm.InvertibleStructuralCausalModel(nx.DiGraph([('X', 'Z')])) # X -> Y -> Z
causal_model.set_causal_mechanism('X', gcm.EmpiricalDistribution())
causal_model.set_causal_mechanism('Z', gcm.AdditiveNoiseModel(gcm.ml.create_linear_regressor()))

# fit the model to the training data
gcm.fit(causal_model, training_data)

gcm.counterfactual_samples(
    causal_model,
    {'X': lambda x: 2}, # intervene on Financial Stress
                        # set Financial Stress to 2
    observed_data=pd.DataFrame(data=dict(X=[5], Z=[2])))

Fitting causal mechanism of node Z: 100%|██████████| 2/2 [00:00<00:00, 81.29it/s]


Unnamed: 0,X,Z
0,2,1.626145
