In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import sys
import os
from typing import List, Dict, Any
# Add the parent directory to sys.path
notebook_dir = os.path.abspath('')
project_dir = os.path.dirname(notebook_dir)
sys.path.append(project_dir)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from scipy.stats import rv_continuous, rv_discrete


from src.data_processing import prepare_data, load_data
from src.modeling import BayesianModel
from src.inference import Inference
from src.bayesian_node import BayesianNode, CategoricalNode
from src.bayesian_network import BayesianNetwork

logging.basicConfig(level=logging.ERROR)
logger = logging.getLogger(__name__)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from src.data_processing import prepare_data

In [2]:
import os
import sys
from dotenv import load_dotenv
load_dotenv()

# Determine environment and data path
environment = os.getenv('ENVIRONMENT', 'local')
data_path = os.getenv('LOCAL_DATA_PATH') if environment == 'local' else os.getenv('CLOUD_DATA_PATH')
processed_data_path = os.getenv('LOCAL_DATA_PATH_PROCESSED') if environment == 'local' else os.getenv('CLOUD_DATA_PATH')

# File paths
behavioral_path = os.path.join(data_path, 'connectome_behavioral.csv')
behavioral_path_processed = os.path.join(processed_data_path, 'connectome_behavioral.csv')


hcp_path = os.path.join(data_path, 'hcp_freesurfer.csv')
hcp_path_processed = os.path.join(processed_data_path, 'hcp_freesurfer.csv')

In [3]:
behavioral_features = [
    'Subject', 'Age', 'Gender', 'CogFluidComp_Unadj', 'CogCrystalComp_Unadj', 'MMSE_Score',
    'NEOFAC_O', 'NEOFAC_C', 'ProcSpeed_Unadj', 'CardSort_Unadj', 'PicVocab_Unadj', 'ReadEng_Unadj'
]

hcp_features = [
    'Subject', 'FS_TotCort_GM_Vol', 'FS_SubCort_GM_Vol', 'FS_Total_GM_Vol', 'FS_Tot_WM_Vol', 'FS_BrainStem_Vol',
    'FS_L_Hippo_Vol', 'FS_R_Hippo_Vol', 'FS_L_Amygdala_Vol', 'FS_R_Amygdala_Vol',
    'FS_L_Caudate_Vol', 'FS_R_Caudate_Vol', 'FS_L_Putamen_Vol', 'FS_R_Putamen_Vol',
]

categorical_columns = ['Age', 'Gender']

prior_edges = [
    ('Age', 'CogFluidComp_Unadj'),
    ('Age', 'CogCrystalComp_Unadj'),
    ('Age', 'MMSE_Score'),
    ('Gender', 'CogFluidComp_Unadj'),
    ('Gender', 'CogCrystalComp_Unadj'),
    ('MMSE_Score', 'CogFluidComp_Unadj'),
    ('MMSE_Score', 'CogCrystalComp_Unadj'),
    ('FS_Total_GM_Vol', 'CogFluidComp_Unadj'),
    ('FS_Total_GM_Vol', 'CogCrystalComp_Unadj'),
    ('FS_Tot_WM_Vol', 'CogFluidComp_Unadj'),
    ('FS_Tot_WM_Vol', 'CogCrystalComp_Unadj'),
    ('FS_L_Hippo_Vol', 'CogFluidComp_Unadj'),
    ('FS_R_Hippo_Vol', 'CogFluidComp_Unadj'),
    ('FS_L_Amygdala_Vol', 'NEOFAC_O'),
    ('FS_R_Amygdala_Vol', 'NEOFAC_O'),
    ('NEOFAC_O', 'CogCrystalComp_Unadj'),
    ('NEOFAC_C', 'CogFluidComp_Unadj'),
    ('FS_L_Hippo_Vol', 'NEOFAC_O'),
    ('FS_R_Hippo_Vol', 'NEOFAC_O'),
]

In [4]:
def map_age_to_category(age_str):
    # Define age bins and corresponding ordinal categories
    bins = ['22-25', '26-30', '31-35', '36+']
    categories = [1, 2, 3, 4]  # Assigning ordinal values to age ranges
    
    if pd.isna(age_str):
        return np.nan
    age_str = age_str.strip()
    
    if age_str in bins:
        return categories[bins.index(age_str)]
    else:
        return np.nan

def process_age_gender(data: pd.DataFrame) -> pd.DataFrame:
    if 'Age' in data.columns and data['Age'].dtype == 'object':
        data['Age'] = data['Age'].apply(map_age_to_category)
    return data

# Load your data
behavioral_data = pd.read_csv(behavioral_path)
hcp_data = pd.read_csv(hcp_path)
# Process Age column
behavioral_data = process_age_gender(behavioral_data)

# You can now save the processed data back to CSV if needed or pass it directly to prepare_data
behavioral_data.to_csv('/Users/macbookair/Documents/NeuroBayesianModel/data/processed/connectome_behavioral.csv', index=False)

In [5]:
# Now call prepare_data
data, categorical_columns, categories = prepare_data(
    behavioral_path=behavioral_path_processed,
    hcp_path=hcp_path_processed,
    behavioral_features=behavioral_features,
    hcp_features=hcp_features,
    categorical_columns=categorical_columns,
    index='Subject'
)

data = data.sample(n=100, random_state=42) 

In [6]:
# Check DataFrame structure
print("DataFrame Types:")
print(data.dtypes)

# Initialize and fit the model
model = BayesianModel(method='nsl', max_parents=4, iterations=100, categorical_columns=categorical_columns)
try:
    model.fit(data, prior_edges=prior_edges)
except ValueError as e:
    print(f"Error fitting the model: {e}")
    # Additional debugging information
    print("Data columns:", data.columns)
    print("Categorical columns:", categorical_columns)

# Extract nodes from the fitted model
nodes = model.network.nodes

# Verify node creation
print("\nNodes in the Network:")
for node_name, node in nodes.items():
    print(f"Node Name: {node_name}, Node Type: {type(node)}")

# Check CategoricalNode setup
age_node = nodes.get('Age')
if age_node:
    print("\nAge Node Details:")
    print(f"Categories: {age_node.categories}")
    print(f"Fitted: {age_node.fitted}")

target_node_name = "Age"  # or any other node name
try:
    sensitivities = model.network.compute_sensitivity(target_node_name)
    print("Sensitivities:", sensitivities)
except ValueError as e:
    print(f"Error computing sensitivity: {e}")

# Check sensitivity computation
target_node_name = "CogFluidComp_Unadj"
try:
    sensitivity = model.network.compute_sensitivity(target_node_name)
    print("\nSensitivity:", sensitivity)
except ValueError as e:
    print(f"\nError computing sensitivity: {e}")
    if age_node:
        print("Age Node Details:", age_node)

DataFrame Types:
FS_TotCort_GM_Vol       float64
FS_SubCort_GM_Vol       float64
FS_Total_GM_Vol         float64
FS_Tot_WM_Vol           float64
FS_BrainStem_Vol        float64
FS_L_Hippo_Vol          float64
FS_R_Hippo_Vol          float64
FS_L_Amygdala_Vol       float64
FS_R_Amygdala_Vol       float64
FS_L_Caudate_Vol        float64
FS_R_Caudate_Vol        float64
FS_L_Putamen_Vol        float64
FS_R_Putamen_Vol        float64
Age                        int8
Gender                     int8
CogFluidComp_Unadj      float64
CogCrystalComp_Unadj    float64
MMSE_Score              float64
NEOFAC_O                float64
NEOFAC_C                float64
ProcSpeed_Unadj         float64
CardSort_Unadj          float64
PicVocab_Unadj          float64
ReadEng_Unadj           float64
dtype: object

Nodes in the Network:
Node Name: FS_TotCort_GM_Vol, Node Type: <class 'bayesian_node.BayesianNode'>
Node Name: FS_SubCort_GM_Vol, Node Type: <class 'bayesian_node.BayesianNode'>
Node Name: FS_Total_GM

KeyboardInterrupt: 

In [None]:
# Initialize and fit the model
model = BayesianModel(method='nsl', max_parents=4, iterations=100, categorical_columns=categorical_columns)
try:
    model.fit(data, prior_edges=prior_edges)
except ValueError as e:
    print(f"Error fitting the model: {e}")
    # Additional debugging information
    print("Data columns:", data.columns)
    print("Categorical columns:", categorical_columns)

# Extract nodes from the fitted model
nodes = model.network.nodes

# Verify node existence in the network
target_node_name = "CogFluidComp_Unadj"
if target_node_name in nodes:
    print(f"Node '{target_node_name}' found in the network.")
    try:
        sensitivity = model.network.compute_sensitivity(target_node_name)
        print("Sensitivity:", sensitivity)
    except ValueError as e:
        print(f"Error computing sensitivity: {e}")
else:
    print(f"Node '{target_node_name}' NOT found in the network.")
    print("Available nodes in the network:", list(nodes.keys()))

Node 'CogFluidComp_Unadj' found in the network.
Error computing sensitivity: Unsupported distribution type for node Age


In [None]:
# Compute sensitivity using Inference class
def compute_sensitivity(network: BayesianNetwork, target_node_name: str, num_samples: int = 1000) -> Dict[str, float]:
    if target_node_name not in network.nodes:
        raise ValueError(f"Node {target_node_name} not found in the network.")
    
    # Sample data for the target node
    target_samples = inference.sample_node(target_node_name, num_samples)
    
    # Compute sensitivity
    sensitivities = {}
    for node_name, node in network.nodes.items():
        if node_name == target_node_name:
            continue
        
        # Sample for other nodes
        other_samples = inference.sample_node(node_name, num_samples)
        
        # Compute sensitivity (example: mean difference or correlation)
        sensitivity = np.mean(target_samples) - np.mean(other_samples)
        sensitivities[node_name] = sensitivity
    
    return sensitivities

# Example usage
sensitivity = model.network.compute_sensitivity("CogFluidComp_Unadj")
print(sensitivity)

ValueError: Unsupported distribution type for node Age

In [None]:
def sample_node_with_inference(node_name: str, size: int = 1) -> np.ndarray:
    try:
        samples = inference.sample_node(node_name, size)
        return samples
    except Exception as e:
        print(f"Error sampling node: {e}")
        return None

# Test sampling a node
node_name = 'CogFluidComp_Unadj'
samples = sample_node_with_inference(node_name, size=1000)

if samples is not None:
    print(f"Samples for {node_name}: {samples[:10]}")  # Print the first 10 samples
else:
    print(f"Failed to sample node {node_name}")

Error sampling node: Node CogFluidComp_Unadj not found in the network.
Failed to sample node CogFluidComp_Unadj


In [None]:
nodes = inference.nodes

In [None]:

# 1. Verify Node Initialization
print("Node Initialization Check")
for node_name, node in nodes.items():
    if isinstance(node, BayesianNode):
        print(f"Node Name: {node_name}, Type: BayesianNode")
    else:
        print(f"Node Name: {node_name}, Type: {type(node).__name__}")

# 2. Check Distributions for Nodes
print("\nDistributions Check")
for node_name, node in nodes.items():
    try:
        distribution = node.get_distribution()
        if isinstance(distribution, (stats.rv_continuous, stats.rv_discrete)):
            print(f"Node Name: {node_name}")
            print(f"Distribution: {distribution}")
            print(f"Distribution Type: {type(distribution).__name__}")
            samples = distribution.rvs(size=10)
            print(f"Samples: {samples}")
        else:
            print(f"Node {node_name} has an unsupported distribution type: {type(distribution).__name__}")
    except Exception as e:
        print(f"Error with node {node_name}: {e}")

# 3. Verify Network Structure
print("\nNetwork Structure Check")
try:
    # Check if network structure is properly defined
    for node_name, node in nodes.items():
        if not hasattr(node, 'children'):
            print(f"Node {node_name} is missing 'children' attribute.")
except AttributeError as e:
    print(f"Network Structure Error: {e}")

# 4. Test Inference Class
print("\nInference Test")
try:
    # Test sampling from a node
    node_name = 'CogFluidComp_Unadj'
    try:
        samples = inference.sample_node(node_name, size=10)
        print(f"Samples for {node_name}: {samples}")
    except ValueError as ve:
        print(f"Sampling Error: {ve}")
    
    # Test sensitivity computation (assuming compute_sensitivity function exists)
    try:
        sensitivity = compute_sensitivity(network, node_name)  # Ensure 'network' is defined
        print(f"Sensitivity for {node_name}: {sensitivity}")
    except ValueError as ve:
        print(f"Sensitivity Computation Error: {ve}")
except Exception as e:
    print(f"Inference Error: {e}")


Node Initialization Check
Node Name: FS_TotCort_GM_Vol, Type: BayesianNode
Node Name: FS_SubCort_GM_Vol, Type: BayesianNode
Node Name: FS_Total_GM_Vol, Type: BayesianNode
Node Name: FS_Tot_WM_Vol, Type: BayesianNode
Node Name: FS_BrainStem_Vol, Type: BayesianNode
Node Name: FS_L_Hippo_Vol, Type: BayesianNode
Node Name: FS_R_Hippo_Vol, Type: BayesianNode
Node Name: FS_L_Amygdala_Vol, Type: BayesianNode
Node Name: FS_R_Amygdala_Vol, Type: BayesianNode
Node Name: FS_L_Caudate_Vol, Type: BayesianNode
Node Name: FS_R_Caudate_Vol, Type: BayesianNode
Node Name: FS_L_Putamen_Vol, Type: BayesianNode
Node Name: FS_R_Putamen_Vol, Type: BayesianNode
Node Name: Age, Type: BayesianNode
Node Name: Gender, Type: BayesianNode
Node Name: CogFluidComp_Unadj, Type: BayesianNode
Node Name: CogCrystalComp_Unadj, Type: BayesianNode
Node Name: MMSE_Score, Type: BayesianNode
Node Name: NEOFAC_O, Type: BayesianNode
Node Name: NEOFAC_C, Type: BayesianNode
Node Name: ProcSpeed_Unadj, Type: BayesianNode
Node Name: