In [1]:
import pandas as pd

# Load the dataset from the file (replace 'IR_3 ds4.xls' with your actual file name and extension)
file_path = 'IR3-dataset.csv'  # Assuming a .csv file format here

# Load the data with a single column, and then expand it into separate columns
data = pd.read_csv(file_path, header=None)

# Assuming all data is in the first column, split it into separate columns
data = data[0].str.split(',', expand=True)

# Assign column names based on the provided structure
data.columns = ['age', 'Gender', 'Family', 'diet', 'Lifestyle', 'cholestrol', 'heartdisease']


In [2]:
# Convert data to numeric if necessary (assuming all values are categorical or integer)
data = data.apply(pd.to_numeric, errors='coerce')

# Drop rows with missing values (optional, depending on your use case)
data.dropna(inplace=True)


In [3]:
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Define the Bayesian Network structure based on domain knowledge
model = BayesianNetwork([
    ('age', 'heartdisease'),
    ('Gender', 'heartdisease'),
    ('Family', 'heartdisease'),
    ('diet', 'heartdisease'),
    ('Lifestyle', 'heartdisease'),
    ('cholestrol', 'heartdisease')
])

# Fit the model to the data using Maximum Likelihood Estimation
model.fit(data, estimator=MaximumLikelihoodEstimator)


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
'''0 → Ages 0–20
1 → Ages 21–35
2 → Ages 36–50
3 → Ages 51–65
4 → Ages 66+'''

In [4]:
# Initialize inference engine
heart_infer = VariableElimination(model)

# Define evidence based on a hypothetical patient profile
evidence = {
    'age': 3,           # Example age category
    'Gender': 0,        # Example gender (0 for female, 1 for male)
    'Family': 0,        # Example family history
    'diet': 0,          # Example diet type
    'Lifestyle': 2,     # Example lifestyle rating
    'cholestrol': 2     # Example cholesterol level
}

# Query the model to get the probability of heart disease
q = heart_infer.query(variables=['heartdisease'], evidence=evidence)

print(q)


+-------------------+---------------------+
| heartdisease      |   phi(heartdisease) |
| heartdisease(0.0) |              0.5000 |
+-------------------+---------------------+
| heartdisease(1.0) |              0.5000 |
+-------------------+---------------------+


In [6]:
data.columns

Index(['age', 'Gender', 'Family', 'diet', 'Lifestyle', 'cholestrol',
       'heartdisease'],
      dtype='object')

In [7]:
data.head()

Unnamed: 0,age,Gender,Family,diet,Lifestyle,cholestrol,heartdisease
1,0.0,0.0,1.0,1.0,3.0,0.0,1.0
2,0.0,1.0,1.0,1.0,3.0,0.0,1.0
3,1.0,0.0,0.0,0.0,2.0,1.0,1.0
4,4.0,0.0,1.0,1.0,3.0,2.0,0.0
5,3.0,1.0,1.0,0.0,0.0,2.0,0.0
