In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from causallearn.search.ConstraintBased.PC import pc
from dowhy import gcm

In [2]:
def load_and_preprocess_data(file_path, selected_columns=None):
    df = pd.read_csv(file_path)
    df = df.replace({'Yes': 1, 'No': 0})
    df = df.dropna()
    data_encoded = pd.get_dummies(df, drop_first=True, dtype=int)

    if selected_columns:
        data_encoded = data_encoded[selected_columns]
    
    return data_encoded, data_encoded.to_numpy()

In [121]:
def compute_counterfactuals1(file_path, intervention_var):
    df = pd.read_csv(file_path).dropna()
    df = df.replace({'Yes': 1, 'No': 0})
    data_encoded = pd.get_dummies(df, drop_first=True, dtype=int)

    selected_columns = ['Academic_Pressure', 'Suicidal_Thoughts',
                        'Financial_Stress', 'Depression']
    
    if intervention_var not in selected_columns:
        raise ValueError(f"Invalid intervention variable. Choose from: {selected_columns}")

    # Prompt user for observed values (excluding 'Depression')
    observed_data = {}
    print(f"\nPlease enter observed values for the following variables (excluding '{intervention_var}' and 'Depression'):")
    
    observed_data[intervention_var] = 1
    observed_data['Depression'] = 1
    
    for var in selected_columns:
            if var != intervention_var and var != 'Depression':  # Skip Depression
                while True:
                    try:
                        if var == 'Academic_Pressure' or var == 'Financial_Stress':
                            value = int(input(f"  {var} (1-5): "))
                            if value < 1 or value > 5:
                                print("Invalid input. Please enter a value between 1 and 5.")
                                continue
                        elif var == 'Suicidal_Thoughts':
                            value = int(input(f"  {var} (0-1): "))
                            if value < 0 or value > 1:
                                print("Invalid input. Please enter a value between 0 and 1.")
                                continue

                        observed_data[var] = value
                        break
                    except ValueError:
                        print("Invalid input. Please enter a numeric value.")
                        
    # Convert observed values into DataFrame (without Depression)
    observed_data_df = pd.DataFrame([observed_data])

    # Construct causal model
    causal_model = gcm.InvertibleStructuralCausalModel(nx.DiGraph([
                                        ('Financial_Stress', 'Depression'),
                                        ('Suicidal_Thoughts', 'Depression'),
                                        ('Academic_Pressure', 'Depression')
                                        ])
                                        )
    causal_model.set_causal_mechanism('Financial_Stress', gcm.EmpiricalDistribution())
    causal_model.set_causal_mechanism('Suicidal_Thoughts', 
                                      gcm.EmpiricalDistribution())
    causal_model.set_causal_mechanism('Academic_Pressure', gcm.EmpiricalDistribution())
    causal_model.set_causal_mechanism('Depression', 
                                      gcm.AdditiveNoiseModel(
                                          gcm.ml.create_linear_regressor()))
        
    # Train model
    training_data = pd.DataFrame(data=dict(
    **{'Financial_Stress': data_encoded['Financial Stress']},
    **{'Suicidal_Thoughts': data_encoded['Have you ever had suicidal thoughts ?']},
    **{'Academic_Pressure': data_encoded['Academic Pressure']},
    **{'Depression': data_encoded['Depression']}
    ))
    
    gcm.fit(causal_model, training_data)

    # Generate counterfactuals for different intervention values
    print(f"\nIntervening on '{intervention_var}' with different values...")
    
    print(observed_data_df)
    counterfactual_results = {}
    for value in range(1,6):  
        result = gcm.counterfactual_samples(
            causal_model,
            {'Financial_Stress': lambda x: value},  
            observed_data=observed_data_df
        )

        counterfactual_results[f"{intervention_var}={value}"] = result['Depression'].values[0]
    # Display results
    counterfactual_df = pd.DataFrame(counterfactual_results, index=['Depression'])
    print("\nCounterfactual Results:")
    print(counterfactual_df)

    return counterfactual_df


In [122]:
filepath = 'data/Student Depression Dataset.csv'

In [123]:
compute_counterfactuals1(filepath, 'Financial_Stress')


Please enter observed values for the following variables (excluding 'Financial_Stress' and 'Depression'):
  Academic_Pressure (1-5): 2
  Suicidal_Thoughts (0-1): 2
Invalid input. Please enter a value between 0 and 1.
  Suicidal_Thoughts (0-1): 1


Fitting causal mechanism of node Academic_Pressure: 100%|███████████████████████████████| 4/4 [00:00<00:00, 571.29it/s]


Intervening on 'Financial_Stress' with different values...
   Financial_Stress  Depression  Academic_Pressure  Suicidal_Thoughts
0                 1           1                  2                  1

Counterfactual Results:
            Financial_Stress=1  Financial_Stress=2  Financial_Stress=3  \
Depression                 1.0            1.077797            1.155594   

            Financial_Stress=4  Financial_Stress=5  
Depression            1.233391            1.311188  





Unnamed: 0,Financial_Stress=1,Financial_Stress=2,Financial_Stress=3,Financial_Stress=4,Financial_Stress=5
Depression,1.0,1.077797,1.155594,1.233391,1.311188
