In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import sys
import os
from typing import List, Dict, Any
# Add the parent directory to sys.path
notebook_dir = os.path.abspath('')
project_dir = os.path.dirname(notebook_dir)
sys.path.append(project_dir)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from scipy.stats import rv_continuous, rv_discrete


from src.data_processing import prepare_data
from src.modeling import BayesianModel
from src.inference import Inference
from src.bayesian_node import BayesianNode, CategoricalNode
from src.bayesian_network import BayesianNetwork

logging.basicConfig(level=logging.ERROR)
logger = logging.getLogger(__name__)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from src.data_processing import prepare_data

In [2]:
import os
import sys
from dotenv import load_dotenv
load_dotenv()

# Determine environment and data path
environment = os.getenv('ENVIRONMENT', 'local')
data_path = os.getenv('LOCAL_DATA_PATH') if environment == 'local' else os.getenv('CLOUD_DATA_PATH')

# File paths
behavioral_path = os.path.join(data_path, 'connectome_behavioral.csv')
hcp_path = os.path.join(data_path, 'hcp_freesurfer.csv')

In [3]:
categorical_columns_hcp = ['Gender', 'MMSE_Score', 'Age']

behavioral_features = [
    'Subject', 'Age', 'Gender', 'CogFluidComp_Unadj', 'CogCrystalComp_Unadj', 'MMSE_Score',
    'NEOFAC_O', 'NEOFAC_C', 'ProcSpeed_Unadj', 'CardSort_Unadj', 'PicVocab_Unadj', 'ReadEng_Unadj'
]

hcp_features = [
    'Subject', 'FS_TotCort_GM_Vol', 'FS_SubCort_GM_Vol', 'FS_Total_GM_Vol', 'FS_Tot_WM_Vol', 'FS_BrainStem_Vol',
    'FS_L_Hippo_Vol', 'FS_R_Hippo_Vol', 'FS_L_Amygdala_Vol', 'FS_R_Amygdala_Vol',
    'FS_L_Caudate_Vol', 'FS_R_Caudate_Vol', 'FS_L_Putamen_Vol', 'FS_R_Putamen_Vol',
]

categorical_columns = ['Gender', 'MMSE_Score', 'Age']

prior_edges = [
    ('Age', 'CogFluidComp_Unadj'),
    ('Age', 'CogCrystalComp_Unadj'),
    ('Age', 'MMSE_Score'),
    ('Gender', 'CogFluidComp_Unadj'),
    ('Gender', 'CogCrystalComp_Unadj'),
    ('MMSE_Score', 'CogFluidComp_Unadj'),
    ('MMSE_Score', 'CogCrystalComp_Unadj'),
    ('FS_Total_GM_Vol', 'CogFluidComp_Unadj'),
    ('FS_Total_GM_Vol', 'CogCrystalComp_Unadj'),
    ('FS_Tot_WM_Vol', 'CogFluidComp_Unadj'),
    ('FS_Tot_WM_Vol', 'CogCrystalComp_Unadj'),
    ('FS_L_Hippo_Vol', 'CogFluidComp_Unadj'),
    ('FS_R_Hippo_Vol', 'CogFluidComp_Unadj'),
    ('FS_L_Amygdala_Vol', 'NEOFAC_O'),
    ('FS_R_Amygdala_Vol', 'NEOFAC_O'),
    ('NEOFAC_O', 'CogCrystalComp_Unadj'),
    ('NEOFAC_C', 'CogFluidComp_Unadj'),
    ('FS_L_Hippo_Vol', 'NEOFAC_O'),
    ('FS_R_Hippo_Vol', 'NEOFAC_O'),
]

In [4]:
data, categorical_columns, categories = prepare_data(
    behavioral_path=behavioral_path,
    hcp_path=hcp_path,
    behavioral_features=behavioral_features,
    hcp_features=hcp_features,
    categorical_columns=categorical_columns_hcp
)

data = data.sample(n=100, random_state=42) 

In [5]:
print("MMSE_Score unique values:", data['MMSE_Score'].unique())
print("MMSE_Score value counts:\n", data['MMSE_Score'].value_counts())
print("MMSE_Score dtype:", data['MMSE_Score'].dtype)

# If MMSE_Score is categorical, encode it
if data['MMSE_Score'].dtype == 'object' or data['MMSE_Score'].dtype == 'category':
    data['MMSE_Score'] = pd.Categorical(data['MMSE_Score']).codes

print("MMSE_Score after encoding:\n", data['MMSE_Score'].value_counts())

MMSE_Score unique values: [6 5 4 2 3]
MMSE_Score value counts:
 MMSE_Score
5    39
6    37
4    15
3     5
2     4
Name: count, dtype: int64
MMSE_Score dtype: int8
MMSE_Score after encoding:
 MMSE_Score
5    39
6    37
4    15
3     5
2     4
Name: count, dtype: int64


In [6]:
# Initialize and fit the model
model = BayesianModel(method='k2', max_parents=4, iterations=100, categorical_columns=categorical_columns)
model.fit(data, prior_edges=prior_edges)

# Extract nodes from the fitted model
nodes = model.network.nodes

# Verify node extraction
print("Extracted nodes:", list(nodes.keys()))

# The BayesianNetwork is already created and fitted within the BayesianModel
network = model.network

# Verify node existence in the network
target_node_name = "CogFluidComp_Unadj"
if target_node_name in network.nodes:
    print(f"Node '{target_node_name}' found in the network.")
else:
    print(f"Node '{target_node_name}' NOT found in the network.")
    # Print all node names for debugging
    print("Available nodes in the network:", list(network.nodes.keys()))

# Example usage: Compute sensitivity
try:
    sensitivity = network.compute_sensitivity(target_node_name)
    print(sensitivity)
except ValueError as e:
    print(f"Error: {e}")

Edges learned: [('FS_SubCort_GM_Vol', 'FS_TotCort_GM_Vol'), ('FS_TotCort_GM_Vol', 'FS_SubCort_GM_Vol'), ('FS_TotCort_GM_Vol', 'FS_Total_GM_Vol'), ('FS_TotCort_GM_Vol', 'FS_Tot_WM_Vol'), ('FS_TotCort_GM_Vol', 'FS_BrainStem_Vol'), ('FS_TotCort_GM_Vol', 'FS_L_Hippo_Vol'), ('FS_TotCort_GM_Vol', 'FS_R_Hippo_Vol'), ('FS_TotCort_GM_Vol', 'FS_L_Amygdala_Vol'), ('FS_TotCort_GM_Vol', 'FS_R_Amygdala_Vol'), ('FS_TotCort_GM_Vol', 'FS_L_Caudate_Vol'), ('FS_TotCort_GM_Vol', 'FS_R_Caudate_Vol'), ('FS_TotCort_GM_Vol', 'FS_L_Putamen_Vol'), ('FS_TotCort_GM_Vol', 'FS_R_Putamen_Vol'), ('FS_TotCort_GM_Vol', 'Age'), ('FS_TotCort_GM_Vol', 'Gender'), ('FS_TotCort_GM_Vol', 'CogFluidComp_Unadj'), ('FS_TotCort_GM_Vol', 'CogCrystalComp_Unadj'), ('FS_TotCort_GM_Vol', 'MMSE_Score'), ('FS_TotCort_GM_Vol', 'NEOFAC_O'), ('FS_TotCort_GM_Vol', 'NEOFAC_C'), ('FS_TotCort_GM_Vol', 'ProcSpeed_Unadj'), ('FS_TotCort_GM_Vol', 'CardSort_Unadj'), ('FS_TotCort_GM_Vol', 'PicVocab_Unadj'), ('FS_TotCort_GM_Vol', 'ReadEng_Unadj')]
Nod

RecursionError: maximum recursion depth exceeded

In [None]:
# Compute sensitivity using Inference class
def compute_sensitivity(network: BayesianNetwork, target_node_name: str, num_samples: int = 1000) -> Dict[str, float]:
    if target_node_name not in network.nodes:
        raise ValueError(f"Node {target_node_name} not found in the network.")
    
    # Sample data for the target node
    target_samples = inference.sample_node(target_node_name, num_samples)
    
    # Compute sensitivity
    sensitivities = {}
    for node_name, node in network.nodes.items():
        if node_name == target_node_name:
            continue
        
        # Sample for other nodes
        other_samples = inference.sample_node(node_name, num_samples)
        
        # Compute sensitivity (example: mean difference or correlation)
        sensitivity = np.mean(target_samples) - np.mean(other_samples)
        sensitivities[node_name] = sensitivity
    
    return sensitivities

# Example usage
sensitivity = model.network.compute_sensitivity("CogFluidComp_Unadj")
print(sensitivity)

ValueError: Unsupported distribution type for node CogFluidComp_Unadj: dict

In [None]:
def sample_node_with_inference(node_name: str, size: int = 1) -> np.ndarray:
    try:
        samples = inference.sample_node(node_name, size)
        return samples
    except Exception as e:
        print(f"Error sampling node: {e}")
        return None

# Test sampling a node
node_name = 'CogFluidComp_Unadj'
samples = sample_node_with_inference(node_name, size=1000)

if samples is not None:
    print(f"Samples for {node_name}: {samples[:10]}")  # Print the first 10 samples
else:
    print(f"Failed to sample node {node_name}")

Error sampling node: Node CogFluidComp_Unadj not found in the network.
Failed to sample node CogFluidComp_Unadj


In [None]:
nodes = inference.nodes

In [None]:

# 1. Verify Node Initialization
print("Node Initialization Check")
for node_name, node in nodes.items():
    if isinstance(node, BayesianNode):
        print(f"Node Name: {node_name}, Type: BayesianNode")
    else:
        print(f"Node Name: {node_name}, Type: {type(node).__name__}")

# 2. Check Distributions for Nodes
print("\nDistributions Check")
for node_name, node in nodes.items():
    try:
        distribution = node.get_distribution()
        if isinstance(distribution, (stats.rv_continuous, stats.rv_discrete)):
            print(f"Node Name: {node_name}")
            print(f"Distribution: {distribution}")
            print(f"Distribution Type: {type(distribution).__name__}")
            samples = distribution.rvs(size=10)
            print(f"Samples: {samples}")
        else:
            print(f"Node {node_name} has an unsupported distribution type: {type(distribution).__name__}")
    except Exception as e:
        print(f"Error with node {node_name}: {e}")

# 3. Verify Network Structure
print("\nNetwork Structure Check")
try:
    # Check if network structure is properly defined
    for node_name, node in nodes.items():
        if not hasattr(node, 'children'):
            print(f"Node {node_name} is missing 'children' attribute.")
except AttributeError as e:
    print(f"Network Structure Error: {e}")

# 4. Test Inference Class
print("\nInference Test")
try:
    # Test sampling from a node
    node_name = 'CogFluidComp_Unadj'
    try:
        samples = inference.sample_node(node_name, size=10)
        print(f"Samples for {node_name}: {samples}")
    except ValueError as ve:
        print(f"Sampling Error: {ve}")
    
    # Test sensitivity computation (assuming compute_sensitivity function exists)
    try:
        sensitivity = compute_sensitivity(network, node_name)  # Ensure 'network' is defined
        print(f"Sensitivity for {node_name}: {sensitivity}")
    except ValueError as ve:
        print(f"Sensitivity Computation Error: {ve}")
except Exception as e:
    print(f"Inference Error: {e}")


Node Initialization Check
Node Name: FS_TotCort_GM_Vol, Type: BayesianNode
Node Name: FS_SubCort_GM_Vol, Type: BayesianNode
Node Name: FS_Total_GM_Vol, Type: BayesianNode
Node Name: FS_Tot_WM_Vol, Type: BayesianNode
Node Name: FS_BrainStem_Vol, Type: BayesianNode
Node Name: FS_L_Hippo_Vol, Type: BayesianNode
Node Name: FS_R_Hippo_Vol, Type: BayesianNode
Node Name: FS_L_Amygdala_Vol, Type: BayesianNode
Node Name: FS_R_Amygdala_Vol, Type: BayesianNode
Node Name: FS_L_Caudate_Vol, Type: BayesianNode
Node Name: FS_R_Caudate_Vol, Type: BayesianNode
Node Name: FS_L_Putamen_Vol, Type: BayesianNode
Node Name: FS_R_Putamen_Vol, Type: BayesianNode
Node Name: Age, Type: BayesianNode
Node Name: Gender, Type: BayesianNode
Node Name: CogFluidComp_Unadj, Type: BayesianNode
Node Name: CogCrystalComp_Unadj, Type: BayesianNode
Node Name: MMSE_Score, Type: BayesianNode
Node Name: NEOFAC_O, Type: BayesianNode
Node Name: NEOFAC_C, Type: BayesianNode
Node Name: ProcSpeed_Unadj, Type: BayesianNode
Node Name: