In [1]:
import sys
import os

# Add the parent directory to sys.path
notebook_dir = os.path.abspath('')
project_dir = os.path.dirname(notebook_dir)
sys.path.append(project_dir)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
from scipy import stats
import networkx as nx
import json
from pgmpy.estimators import PC
from pgmpy.models import BayesianNetwork as PgmpyBN
from pgmpy.estimators import BayesianEstimator
from pgmpy.inference import VariableElimination
from datetime import datetime

from src.bayesian_network import BayesianNetwork
from src.bayesian_node import BayesianNode, CategoricalNode

logging.basicConfig(level=logging.INFO)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from src.data_processing import prepare_data

In [2]:
import os
import sys
from dotenv import load_dotenv
load_dotenv()

# Determine environment and data path
environment = os.getenv('ENVIRONMENT', 'local')
data_path = os.getenv('LOCAL_DATA_PATH') if environment == 'local' else os.getenv('CLOUD_DATA_PATH')

# File paths
behavioral_path = os.path.join(data_path, 'connectome_behavioral.csv')
hcp_path = os.path.join(data_path, 'hcp_freesurfer.csv')

In [3]:
behavioral_features = [
    'Subject', 'Age', 'Gender', 'CogFluidComp_Unadj', 'CogCrystalComp_Unadj', 'MMSE_Score',
    'NEOFAC_O', 'NEOFAC_C'
]

hcp_features = [
    'Subject', 'FS_Total_GM_Vol', 'FS_Tot_WM_Vol',
    'FS_L_Hippo_Vol', 'FS_R_Hippo_Vol',
    'FS_L_Amygdala_Vol', 'FS_R_Amygdala_Vol'
]

categorical_columns = ['Gender']

categorical_columns_hcp = ['Gender', 'MMSE_Score', 'Age']

prior_edges = [
    ('Age', 'CogFluidComp_Unadj'),
    ('Age', 'CogCrystalComp_Unadj'),
    ('Gender', 'CogFluidComp_Unadj'),
    ('Gender', 'CogCrystalComp_Unadj'),
    ('FS_Total_GM_Vol', 'CogFluidComp_Unadj'),
    ('FS_Total_GM_Vol', 'CogCrystalComp_Unadj'),
    ('FS_L_Hippo_Vol', 'CogFluidComp_Unadj'),
    ('FS_R_Hippo_Vol', 'CogFluidComp_Unadj'),
    ('NEOFAC_O', 'CogCrystalComp_Unadj'),
    ('NEOFAC_C', 'CogFluidComp_Unadj'),
]

In [4]:
data, categorical_columns, categories = prepare_data(
    behavioral_path=behavioral_path,
    hcp_path=hcp_path,
    behavioral_features=behavioral_features,
    hcp_features=hcp_features,
    categorical_columns=categorical_columns_hcp
    )
data = data[:150]
print(categories)


{'Gender': [0, 1], 'MMSE_Score': [23, 24, 26, 27, 28, 29, 30], 'Age': [0, 1, 2, 3]}


In [5]:
# Create and fit the Bayesian Network
bn = BayesianNetwork(method='hill_climb', max_parents=2, iterations=100, categorical_columns=categorical_columns)
bn.fit(data, prior_edges=prior_edges)

# Analyze the network
results = {}
results['network_structure'] = bn.explain_structure_extended()

# Compute sensitivity for cognitive measures
cognitive_measures = ['CogFluidComp_Unadj', 'CogCrystalComp_Unadj']
sensitivity = {}
for measure in cognitive_measures:
    sensitivity[measure] = bn.compute_sensitivity(measure)
results['sensitivity'] = sensitivity

# Generate insights
insights = []

# Insight 1: Strongest predictors of cognitive performance
for measure in cognitive_measures:
    top_predictors = sorted(sensitivity[measure].items(), key=lambda x: abs(x[1]), reverse=True)[:3]
    insight = f"The top 3 predictors of {measure} are: "
    insight += ", ".join([f"{pred} (sensitivity: {sens:.3f})" for pred, sens in top_predictors])
    insights.append(insight)

# Insight 2: Age-related effects
age_effects = bn.nodes['Age'].children
age_insight = "Age directly influences: " + ", ".join([child.name for child in age_effects])
insights.append(age_insight)

# Insight 3: Gender differences
gender_effects = bn.nodes['Gender'].children
gender_insight = "Gender directly influences: " + ", ".join([child.name for child in gender_effects])
insights.append(gender_insight)

# Insight 4: Brain structure and cognition
brain_cognition_edges = [edge for edge in bn.get_edges() if (edge[0].startswith('FS_') and edge[1] in cognitive_measures)]
brain_cognition_insight = "Key brain structure-cognition relationships: " + ", ".join([f"{edge[0]} → {edge[1]}" for edge in brain_cognition_edges])
insights.append(brain_cognition_insight)

results['insights'] = insights

# Save results to JSON
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"bayesian_network_results_{timestamp}.json"

with open(filename, 'w') as f:
    json.dump(results, f, indent=2)

print(f"Results saved to {filename}")

# Print insights for immediate review
print("\nKey Insights:")
for insight in insights:
    print("- " + insight)

INFO:src.bayesian_network:Learning structure
INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 13 nodes and 10 edges.
INFO:src.bayesian_network:Fitting parameters


Fitting node: FS_Total_GM_Vol
Node data shape: (150,)
Parent data: None (no parents)
Fitting node: FS_Tot_WM_Vol
Node data shape: (150,)
Parent data: None (no parents)
Fitting node: FS_L_Hippo_Vol
Node data shape: (150,)
Parent data: None (no parents)
Fitting node: FS_R_Hippo_Vol
Node data shape: (150,)
Parent data: None (no parents)
Fitting node: FS_L_Amygdala_Vol
Node data shape: (150,)
Parent data: None (no parents)
Fitting node: FS_R_Amygdala_Vol
Node data shape: (150,)
Parent data: None (no parents)
Fitting node: Age
Node data shape: (150,)
Parent data: None (no parents)
Fitting node: Gender
Node data shape: (150,)
Parent data: None (no parents)
Fitting node: CogFluidComp_Unadj
Node data shape: (150,)
Parent data: Present
X shape: (150, 2), y shape: (150,)
X columns: Index(['FS_Total_GM_Vol', 'FS_L_Hippo_Vol'], dtype='object')
y name: CogFluidComp_Unadj
Fitting node: CogCrystalComp_Unadj
Node data shape: (150,)
Parent data: Present
X shape: (150, 2), y shape: (150,)
X columns: Ind