In [1]:
%pip install dowhy

Collecting dowhy
  Downloading dowhy-0.12-py3-none-any.whl.metadata (18 kB)
Collecting cvxpy>=1.2.2 (from dowhy)
  Downloading cvxpy-1.6.0-cp39-cp39-macosx_10_9_universal2.whl.metadata (9.2 kB)
Collecting cython<3.0 (from dowhy)
  Downloading Cython-0.29.37-py2.py3-none-any.whl.metadata (3.1 kB)
Collecting osqp>=0.6.2 (from cvxpy>=1.2.2->dowhy)
  Downloading osqp-0.6.7.post3-cp39-cp39-macosx_11_0_arm64.whl.metadata (1.9 kB)
Collecting clarabel>=0.5.0 (from cvxpy>=1.2.2->dowhy)
  Downloading clarabel-0.10.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl.metadata (4.8 kB)
Collecting scs>=3.2.4.post1 (from cvxpy>=1.2.2->dowhy)
  Downloading scs-3.2.7.post2-cp39-cp39-macosx_11_0_arm64.whl.metadata (2.1 kB)
Collecting qdldl (from osqp>=0.6.2->cvxpy>=1.2.2->dowhy)
  Downloading qdldl-0.1.7.post5-cp39-cp39-macosx_11_0_arm64.whl.metadata (1.7 kB)
Downloading dowhy-0.12-py3-none-any.whl (398 kB)
Downloading cvxpy-1.6.0-cp39-cp39-macosx_10_9_universal2.whl (1.5 MB)


In [2]:
# imports
import networkx as nx 
import numpy as np
import pandas as pd
from dowhy import gcm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# loading the data 
depression = pd.read_csv("data/Student Depression Dataset.csv")
depression = depression.dropna()
depression = depression.replace({'Yes': 1, 'No': 0})

data_encoded = pd.get_dummies(depression, drop_first=True)
data_encoded = data_encoded[['Academic Pressure', 'Have you ever had suicidal thoughts ?',
 'Financial Stress', 'City_Ahmedabad', 'City_Bhopal', 'City_Faridabad',
 'City_Hyderabad', 'City_Meerut', 'City_Patna', 'Dietary Habits_Moderate',
 'Dietary Habits_Others', 'Dietary Habits_Unhealthy', 'Depression']]

depression_LASSO_features = data_encoded[['Academic Pressure', 'Have you ever had suicidal thoughts ?',
  'Financial Stress', 'City_Ahmedabad', 'City_Bhopal', 'City_Faridabad',
  'City_Hyderabad', 'City_Meerut', 'City_Patna', 'Dietary Habits_Moderate',
  'Dietary Habits_Others', 'Dietary Habits_Unhealthy', 'Depression']]

depression_LASSO_features = depression_LASSO_features.astype(int)

depression_LASSO_features.head()

  depression = depression.replace({'Yes': 1, 'No': 0})


Unnamed: 0,Academic Pressure,Have you ever had suicidal thoughts ?,Financial Stress,City_Ahmedabad,City_Bhopal,City_Faridabad,City_Hyderabad,City_Meerut,City_Patna,Dietary Habits_Moderate,Dietary Habits_Others,Dietary Habits_Unhealthy,Depression
0,5,1,1,0,0,0,0,0,0,0,0,0,1
1,2,0,2,0,0,0,0,0,0,1,0,0,0
2,3,0,1,0,0,0,0,0,0,0,0,0,0
3,3,1,5,0,0,0,0,0,0,1,0,0,1
4,4,1,1,0,0,0,0,0,0,1,0,0,0


In [4]:
depression_LASSO_features['Dietary Habits_Unhealthy'].sum()

10316

In [5]:
# X -> treatment (e.g., sleep duration, dietary habits)
# Y -> city of residence?
# Z -> observation (e.g., depression)

# create "scoring" for dietary habits -> 0 for unhealthy, 
# 1 for moderate, 2 for other 

# add the ones = 1 from unhealthy,
# = 1 from moderate, and = 1 for others. before this, 
# encode them as above.
depression_LASSO_features['Dietary_Score'] = (
    depression_LASSO_features['Dietary Habits_Moderate'] * 1 +
    depression_LASSO_features['Dietary Habits_Others'] * 2 +
    depression_LASSO_features['Dietary Habits_Unhealthy'] * 0
)
X = depression_LASSO_features['Dietary_Score']

city_columns = ['City_Ahmedabad', 'City_Bhopal', 'City_Faridabad',
                'City_Hyderabad', 'City_Meerut', 'City_Patna']

depression_LASSO_features['City_Index'] = depression_LASSO_features[city_columns].idxmax(axis=1)
depression_LASSO_features['City_Index'] = depression_LASSO_features['City_Index'].apply(lambda x: city_columns.index(x) + 1)

Y = depression_LASSO_features['City_Index']
Z = depression_LASSO_features['Depression']

In [6]:
training_data = pd.DataFrame(data=dict(X=X, Y=Y, Z=Z))

In [7]:
causal_model = gcm.InvertibleStructuralCausalModel(nx.DiGraph([('X', 'Y'), ('Y', 'Z')])) # X -> Y -> Z
causal_model.set_causal_mechanism('X', gcm.EmpiricalDistribution())
causal_model.set_causal_mechanism('Y', gcm.AdditiveNoiseModel(gcm.ml.create_linear_regressor()))
causal_model.set_causal_mechanism('Z', gcm.AdditiveNoiseModel(gcm.ml.create_linear_regressor()))

In [8]:
gcm.fit(causal_model, training_data)

Fitting causal mechanism of node Z: 100%|██████████| 3/3 [00:00<00:00, 93.86it/s]


In [9]:
gcm.counterfactual_samples(
    causal_model,
    {'X': lambda x: 2},
    observed_data=pd.DataFrame(data=dict(X=[1], Y=[2], Z=[3])))

Unnamed: 0,X,Y,Z
0,2,2.040501,3.000568
