In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os

# Add the parent directory to sys.path
notebook_dir = os.path.abspath('')
project_dir = os.path.dirname(notebook_dir)
sys.path.append(project_dir)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
from scipy import stats
import json
from datetime import datetime
from joblib import parallel_backend


from src.bayesian_network import BayesianNetwork

logging.basicConfig(level=logging.ERROR)
logger = logging.getLogger(__name__)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from src.data_processing import prepare_data

In [2]:
import os
import sys
from dotenv import load_dotenv
load_dotenv()

# Determine environment and data path
environment = os.getenv('ENVIRONMENT', 'local')
data_path = os.getenv('LOCAL_DATA_PATH') if environment == 'local' else os.getenv('CLOUD_DATA_PATH')

# File paths
behavioral_path = os.path.join(data_path, 'connectome_behavioral.csv')
hcp_path = os.path.join(data_path, 'hcp_freesurfer.csv')

In [3]:
categorical_columns_hcp = ['Gender', 'MMSE_Score', 'Age']

behavioral_features = [
    'Subject', 'Age', 'Gender', 'CogFluidComp_Unadj', 'CogCrystalComp_Unadj', 'MMSE_Score',
    'NEOFAC_O', 'NEOFAC_C', 'ProcSpeed_Unadj', 'CardSort_Unadj', 'PicVocab_Unadj', 'ReadEng_Unadj'
]

hcp_features = [
    'Subject', 'FS_TotCort_GM_Vol', 'FS_SubCort_GM_Vol', 'FS_Total_GM_Vol', 'FS_Tot_WM_Vol', 'FS_BrainStem_Vol',
    'FS_L_Hippo_Vol', 'FS_R_Hippo_Vol', 'FS_L_Amygdala_Vol', 'FS_R_Amygdala_Vol',
    'FS_L_Caudate_Vol', 'FS_R_Caudate_Vol', 'FS_L_Putamen_Vol', 'FS_R_Putamen_Vol',
]

categorical_columns = ['Gender', 'MMSE_Score', 'Age']

prior_edges = [
    ('Age', 'CogFluidComp_Unadj'),
    ('Age', 'CogCrystalComp_Unadj'),
    ('Age', 'MMSE_Score'),
    ('Gender', 'CogFluidComp_Unadj'),
    ('Gender', 'CogCrystalComp_Unadj'),
    ('MMSE_Score', 'CogFluidComp_Unadj'),
    ('MMSE_Score', 'CogCrystalComp_Unadj'),
    ('FS_Total_GM_Vol', 'CogFluidComp_Unadj'),
    ('FS_Total_GM_Vol', 'CogCrystalComp_Unadj'),
    ('FS_Tot_WM_Vol', 'CogFluidComp_Unadj'),
    ('FS_Tot_WM_Vol', 'CogCrystalComp_Unadj'),
    ('FS_L_Hippo_Vol', 'CogFluidComp_Unadj'),
    ('FS_R_Hippo_Vol', 'CogFluidComp_Unadj'),
    ('FS_L_Amygdala_Vol', 'NEOFAC_O'),
    ('FS_R_Amygdala_Vol', 'NEOFAC_O'),
    ('NEOFAC_O', 'CogCrystalComp_Unadj'),
    ('NEOFAC_C', 'CogFluidComp_Unadj'),
    ('FS_L_Hippo_Vol', 'NEOFAC_O'),
    ('FS_R_Hippo_Vol', 'NEOFAC_O'),
]

In [4]:
data, categorical_columns, categories = prepare_data(
    behavioral_path=behavioral_path,
    hcp_path=hcp_path,
    behavioral_features=behavioral_features,
    hcp_features=hcp_features,
    categorical_columns=categorical_columns_hcp
    )

data = data.sample(n=200, random_state=42) 

In [5]:
# Create and fit the Bayesian Network
bn = BayesianNetwork(method='hill_climb', max_parents=2, iterations=100, categorical_columns=categorical_columns)
try:
    with parallel_backend('multiprocessing', n_jobs=-1):
        bn.fit(data, prior_edges=prior_edges, )
except Exception as e:
    logger.error(f"Error during fitting: {str(e)}", exc_info=True)
finally:
    print("Fitting done.")

if bn.nodes:
    results = {}
    results['network_structure'] = bn.explain_structure_extended()

    cognitive_measures = ['CogFluidComp_Unadj', 'CogCrystalComp_Unadj']
    sensitivity = {measure: bn.compute_sensitivity(measure) for measure in cognitive_measures}
    results['sensitivity'] = sensitivity

    insights = []
    for measure in cognitive_measures:
        top_predictors = sorted(sensitivity[measure].items(), key=lambda x: abs(x[1]), reverse=True)[:3]
        insight = f"The top 3 predictors of {measure} are: " + ", ".join([f"{pred} (sensitivity: {sens:.3f})" for pred, sens in top_predictors])
        insights.append(insight)

    age_effects = bn.nodes['Age'].children
    insights.append("Age directly influences: " + ", ".join([child.name for child in age_effects]))

    gender_effects = bn.nodes['Gender'].children
    insights.append("Gender directly influences: " + ", ".join([child.name for child in gender_effects]))

    brain_cognition_edges = [edge for edge in bn.get_edges() if (edge[0].startswith('FS_') and edge[1] in cognitive_measures)]
    insights.append("Key brain structure-cognition relationships: " + ", ".join([f"{edge[0]} → {edge[1]}" for edge in brain_cognition_edges]))

    results['insights'] = insights

    # Add new analyses
    try:
        results['key_relationships'] = bn.get_key_relationships()
    except Exception as e:
        logger.error(f"Error getting key relationships: {str(e)}", exc_info=True)
        results['key_relationships'] = ["Error occurred while getting key relationships."]

    try:
        results['clinical_implications'] = bn.get_clinical_implications()
    except Exception as e:
        logger.error(f"Error getting clinical implications: {str(e)}", exc_info=True)
        results['clinical_implications'] = ["Error occurred while getting clinical implications."]

    try:
        results['novel_insights'] = bn.get_novel_insights()
    except Exception as e:
        logger.error(f"Error getting novel insights: {str(e)}", exc_info=True)
        results['novel_insights'] = ["Error occurred while getting novel insights."]

    try:
        results['age_specific_insights'] = bn.get_age_specific_insights()
    except Exception as e:
        logger.error(f"Error getting age-specific insights: {str(e)}", exc_info=True)
        results['age_specific_insights'] = ["Error occurred while getting age-specific insights."]

    try:
        results['gender_specific_insights'] = bn.get_gender_specific_insights()
    except Exception as e:
        logger.error(f"Error getting gender-specific insights: {str(e)}", exc_info=True)
        results['gender_specific_insights'] = ["Error occurred while getting gender-specific insights."]

    results['summary'] = bn.summarize_key_findings()

    try:
        results['model_performance'] = {
            'accuracy': bn.get_accuracy(),
            'precision': bn.get_precision(),
            'recall': bn.get_recall()
        }
    except Exception as e:
        logger.error(f"Error calculating model performance: {str(e)}", exc_info=True)
        results['model_performance'] = {
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0
        }

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    results_filename = f"bayesian_network_results_{timestamp}.json"
    summary_filename = f"bayesian_network_summary_{timestamp}.json"

    try:
        bn.write_results_to_json(results, filename=results_filename)
        logger.info(f"Detailed results saved to {results_filename}")
    except Exception as e:
        logger.error(f"Error writing detailed results: {str(e)}", exc_info=True)

    try:
        bn.write_summary_to_json(results, filename=summary_filename)
        logger.info(f"Summary results saved to {summary_filename}")
    except Exception as e:
        logger.error(f"Error writing summary results: {str(e)}", exc_info=True)

else:
    logger.error("Bayesian Network fitting failed. Unable to perform analysis.")

INFO:src.bayesian_network:Learning structure
INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 24 nodes and 19 edges.
INFO:src.bayesian_network:Fitting parameters
INFO:src.bayesian_network:Learning structure
INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


Parameter fitting complete.
Fitting done.


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 24 nodes and 0 edges.
INFO:src.bayesian_network:Fitting parameters
INFO:src.bayesian_network:Learning structure
INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


Parameter fitting complete.


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 24 nodes and 0 edges.
INFO:src.bayesian_network:Fitting parameters
INFO:src.bayesian_network:Learning structure
INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


Parameter fitting complete.


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 24 nodes and 0 edges.
INFO:src.bayesian_network:Fitting parameters
INFO:src.bayesian_network:Learning structure
INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


Parameter fitting complete.


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 24 nodes and 0 edges.
INFO:src.bayesian_network:Fitting parameters


Parameter fitting complete.


INFO:src.bayesian_network:Learning structure
INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 24 nodes and 0 edges.
INFO:src.bayesian_network:Fitting parameters
INFO:src.bayesian_network:Learning structure
INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


Parameter fitting complete.


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 24 nodes and 0 edges.
INFO:src.bayesian_network:Fitting parameters
INFO:src.bayesian_network:Learning structure


Parameter fitting complete.


INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 24 nodes and 0 edges.
INFO:src.bayesian_network:Fitting parameters
INFO:src.bayesian_network:Learning structure
INFO:src.structure_learning:Starting structure learning with max_parents=2, iterations=100


Parameter fitting complete.


  0%|          | 0/100 [00:00<?, ?it/s]

INFO:src.structure_learning:Structure learning complete. Learned 24 nodes and 0 edges.
INFO:src.bayesian_network:Fitting parameters
ERROR:__main__:Error writing detailed results: 'BayesianNetwork' object has no attribute 'y_true'
Traceback (most recent call last):
  File "/var/folders/6p/45_nyjf50bxf3qffl5qnk3pw0000gp/T/ipykernel_14505/571128375.py", line 88, in <module>
    bn.write_results_to_json(results, filename=results_filename)
  File "/Users/macbookair/Documents/NeuroBayesianModel/src/bayesian_network.py", line 125, in write_results_to_json
    results["performance_metrics"] = self.get_performance_metrics()
                                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/macbookair/Documents/NeuroBayesianModel/src/bayesian_network.py", line 525, in get_performance_metrics
    mse = np.mean((self.y_true - self.y_pred)**2)
                   ^^^^^^^^^^^
AttributeError: 'BayesianNetwork' object has no attribute 'y_true'
INFO:__main__:Summary results saved to baye

Parameter fitting complete.
Summary successfully written to logs/bayesian_network_summary_20240724_191317.json


In [6]:
#import networkx as nx
#import matplotlib.pyplot as plt

#def visualize_network(self):
#    G = nx.DiGraph()
#   for node, parents in self.nodes.items():
#        for parent in parents:
#            G.add_edge(parent.name, node)
    
#    plt.figure(figsize=(12, 8))
#    pos = nx.spring_layout(G)
#    nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=500, font_size=8, arrows=True)
#    plt.title("Bayesian Network Structure")
#    plt.show()