In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the data
data = pd.read_csv("data\dementia_data-MRI-features.csv")

In [2]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 373 entries, 0 to 372
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Subject ID  373 non-null    object 
 1   MRI ID      373 non-null    object 
 2   Group       373 non-null    object 
 3   Visit       373 non-null    int64  
 4   MR Delay    373 non-null    int64  
 5   M/F         373 non-null    object 
 6   Hand        373 non-null    object 
 7   Age         373 non-null    int64  
 8   EDUC        373 non-null    int64  
 9   SES         354 non-null    float64
 10  MMSE        371 non-null    float64
 11  CDR         373 non-null    float64
 12  eTIV        373 non-null    int64  
 13  nWBV        373 non-null    float64
 14  ASF         373 non-null    float64
dtypes: float64(5), int64(5), object(5)
memory usage: 43.8+ KB


In [8]:
data['SES'] = data['SES'].fillna(data['SES'].mode()[0])
data['MMSE'] = data['MMSE'].fillna(data['MMSE'].median())


In [25]:
# Encode categorical variables
label_encoders = {}
for col in ['Group', 'M/F', 'Hand']:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le  # Store encoder if you need to reverse-transform later

# Standardize numerical columns
scaler = StandardScaler()
numerical_cols = ['Age', 'EDUC', 'SES', 'MMSE', 'CDR', 'eTIV', 'nWBV', 'ASF']
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

# Inspect the preprocessed data
print(data.head())

data['Age'] = pd.cut(data['Age'], bins=[-float('inf'), -0.5, 0.5, float('inf')], labels=[0, 1, 2])

# Discretize MMSE (based on severity)
data['MMSE'] = pd.cut(data['MMSE'], bins=[-float('inf'), -0.5, 0.5, float('inf')], labels=[0, 1, 2])

# Discretize CDR (e.g., low, medium, high severity)
data['CDR'] = pd.cut(data['CDR'], bins=[-float('inf'), -0.2, 0.5, float('inf')], labels=[0, 1, 2])

# Discretize eTIV, nWBV, ASF using quantiles
data['eTIV'] = pd.qcut(data['eTIV'], q=3, labels=[0, 1, 2])
data['nWBV'] = pd.qcut(data['nWBV'], q=3, labels=[0, 1, 2])
data['ASF'] = pd.qcut(data['ASF'], q=3, labels=[0, 1, 2])

AssertionError: 

In [14]:
data.to_csv("data\pp-dementia_data-MRI-features.csv", index=False)

In [19]:
from pgmpy.estimators import HillClimbSearch, BicScore
from pgmpy.models import BayesianModel

# Initialize the scoring method
scoring_method = BicScore(data)

# Initialize the Hill Climb search object
hc = HillClimbSearch(data)

# Estimate the model structure using Hill Climbing
best_model = hc.estimate(scoring_method=scoring_method)

# Print the best model structure (edges)
print("Learned structure edges:", best_model.edges())


  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned structure edges: [('Group', 'CDR'), ('Group', 'MMSE'), ('M/F', 'SES'), ('SES', 'EDUC'), ('CDR', 'nWBV'), ('CDR', 'M/F'), ('eTIV', 'ASF'), ('nWBV', 'Age'), ('ASF', 'M/F')]


In [28]:
from pgmpy.models import BayesianNetwork

# Define the edges of the Bayesian network
aggregated_edges = [
    ('Group', 'CDR'),
    ('Group', 'MMSE'),
    ('M/F', 'SES'),
    ('SES', 'EDUC'),
    ('CDR', 'nWBV'),
    ('CDR', 'M/F'),
    ('eTIV', 'ASF'),
    ('nWBV', 'Age'),
    ('ASF', 'M/F')
]

structure:P(PT);P(O|PT);P(T|O,PT);P(W|O,PT);P(H|T,PT)
structure:P(Group);P(CDR|Group);P(MMSE|Group);P(M/F|CDR,Group);P(SES|M/F,Group);P(EDUC|SES,Group);P(ASF|eTIV,Group);P(M/F|ASF,Group);P(nWBV|CDR,Group);P(Age|mWBV,Group);P(eTIV);

# Create a Bayesian network model
model = BayesianNetwork(aggregated_edges)

# Get the structure of the Bayesian network
def get_structure(node):
    structure = []
    for parent in model.get_parents(node):
        structure.append(f"P({node}|{parent})")
    
    for child in model.get_children(node):
        structure.append(f"P({child}|{node})")
    
    return "structure: " + "; ".join(structure)

# Get the structure for the parent node "Group"
group_structure = get_structure("Group")
print(group_structure)


structure: P(CDR|Group); P(MMSE|Group)
