<a href="https://colab.research.google.com/github/mdsadaqathali5/lab-records/blob/main/problem7(54).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Write a program to construct a Bayesian network considering medical data.Use this model to demonstrate the diagnosis of heart patients using standard heart disease dataset.


In [5]:
import bayespy as bp
import numpy as np
import pandas as pd
import csv
from colorama import init
from colorama import Fore, Back, Style
init()

In [6]:
#Define parameters Enumvalues
#Age
ageEnum = {'SuperSeniorCitizen':0, 'SeniorCitizen':1, 'MiddleAged':2, 'Youth':3, 'Teen':4}
#Gender
genderEnum = {'Male':0, 'Female':1}
#FamilyHistory
familyHistoryEnum = {'Yes':0, 'No':1, 'Other': 2} # Added 'Other' category
#dietEnum(CalorieIntake)
dietEnum = {'High':0, 'Medium':1, 'Low':2}
#Lifestyle
lifestyleEnum = {'Athlete':0, 'Active':1, 'Moderate':2, 'Sedetary':3}
#cholesterol
cholesterolEnum = {'High':0, 'BorderLine':1, 'Normal':2}
#heartDisease
heartDiseaseEnum = {'Yes':0, 'No':1, 'Unknown': 2} # Added 'Unknown' category for value 2

###Importing Dataset

In [7]:
import csv
with open('cleaned_merged_heart_dataset.csv') as csvfile:
    lines = csv.reader(csvfile, delimiter=',')
    dataset = list(lines)
    data = []

    # Assuming the CSV contains numerical values corresponding to the enum indices
    for x in dataset[1:]:
        # Convert the string values from the CSV to integers
        data.append([
            int(x[0]), # Age
            int(x[1]), # Gender
            int(x[2]), # FamilyHistory
            int(x[3]), # Diet
            int(x[4]), # Lifestyle
            int(x[5]), # Cholesterol
            int(x[6])  # HeartDisease
        ])


In [8]:
# Training data from machine learning to do
data = np.array(data).astype('int16')
N=len(data)

# Debugging: Print min/max values for each column to identify invalid indices
print("Min and max values in each data column:")
for i in range(data.shape[1]):
    print(f"Column {i}: Min = {np.min(data[:, i])}, Max = {np.max(data[:, i])})")

Min and max values in each data column:
Column 0: Min = 29, Max = 77)
Column 1: Min = 0, Max = 1)
Column 2: Min = 0, Max = 4)
Column 3: Min = 94, Max = 200)
Column 4: Min = 126, Max = 564)
Column 5: Min = 0, Max = 1)
Column 6: Min = 0, Max = 2)


In [10]:
#input data coloumn assignment
# Map Age to categories
age_data = np.zeros(N, dtype=int)
for i in range(N):
    if data[i, 0] <= 19:
        age_data[i] = ageEnum['Teen']
    elif data[i, 0] <= 29:
        age_data[i] = ageEnum['Youth']
    elif data[i, 0] <= 49:
        age_data[i] = ageEnum['MiddleAged']
    elif data[i, 0] <= 69:
        age_data[i] = ageEnum['SeniorCitizen']
    else:
        age_data[i] = ageEnum['SuperSeniorCitizen']

p_age = bp.nodes.Dirichlet(1.0*np.ones(5))
age = bp.nodes.Categorical(p_age, plates=(N,))
age.observe(age_data)

# Gender data already seems to be 0 or 1, matching the enum
p_gender = bp.nodes.Dirichlet(1.0*np.ones(2))
gender = bp.nodes.Categorical(p_gender, plates=(N,))
gender.observe(data[:,1])

# Map FamilyHistory to categories, mapping values > 2 to 'Other' (index 2)
familyhistory_data = np.minimum(data[:, 2], 2) # Map values 3 and 4 to index 2

p_familyhistory = bp.nodes.Dirichlet(1.0*np.ones(3))
familyhistory = bp.nodes.Categorical(p_familyhistory, plates=(N,))
familyhistory.observe(familyhistory_data)

# Map Diet to categories using quantile binning (assuming 3 categories)
diet_data = pd.qcut(data[:, 3], q=3, labels=False, duplicates='drop')
# Ensure the labels are within the valid range [0, 2]
diet_data = np.clip(diet_data, 0, 2)


p_diet=bp.nodes.Dirichlet(1.0*np.ones(3))
diet=bp.nodes.Categorical(p_diet,plates=(N,))
diet.observe(diet_data)

# Map Lifestyle to categories using quantile binning (assuming 4 categories)
lifestyle_data = pd.qcut(data[:, 4], q=4, labels=False, duplicates='drop')
# Ensure the labels are within the valid range [0, 3]
lifestyle_data = np.clip(lifestyle_data, 0, 3)

p_lifestyle=bp.nodes.Dirichlet(1.0*np.ones(4))
lifestyle=bp.nodes.Categorical(p_lifestyle,plates=(N,))
lifestyle.observe(lifestyle_data)

# Map Cholesterol to categories (assuming 0->High, 1->BorderLine, others unknown/not present)
cholesterol_data = np.zeros(N, dtype=int)
for i in range(N):
    if data[i, 5] == 0:
        cholesterol_data[i] = cholesterolEnum['High']
    elif data[i, 5] == 1:
        cholesterol_data[i] = cholesterolEnum['BorderLine']
    else:
        # If there are other values besides 0 and 1, this will assign them to 0 (High)
        # based on the initialization, which might not be ideal.
        # Based on the min/max output, only 0 and 1 are present.
        pass # No need to do anything if data[i, 5] is not 0 or 1, it remains 0

p_cholesterol=bp.nodes.Dirichlet(1.0*np.ones(3))
cholesterol=bp.nodes.Categorical(p_cholesterol,plates=(N,))
cholesterol.observe(cholesterol_data)

In [11]:
plates=(5, 2, 2, 3, 4, 3)
age = bp.nodes.Categorical(np.ones(5)/5, plates=(1888,))
gender = bp.nodes.Categorical(np.ones(2)/2, plates=(1888,))
familyhistory = bp.nodes.Categorical(np.ones(2)/2, plates=(1888,))
diet = bp.nodes.Categorical(np.ones(3)/3, plates=(1888,))
lifestyle = bp.nodes.Categorical(np.ones(4)/4, plates=(1888,))
cholesterol = bp.nodes.Categorical(np.ones(3)/3, plates=(1888,))

p_heartdisease = bp.nodes.Dirichlet(np.ones(3), plates=(5,2,2,3,4,3))

heartdisease = bp.nodes.MultiMixture([age, gender, familyhistory, diet, lifestyle, cholesterol],bp.nodes.Categorical, p_heartdisease)


heartdisease.observe(data[:, 6])
p_heartdisease.update()


In [12]:
#Sample Test with hard coded values
print("SampleProbability")
print("Probability(HeartDisease|Age=SuperSeniorCitizen,Gender=Female,FamilyHistory=Yes, DietIntake=Medium, LifeStyle=Sedetary,Cholesterol=High)")
print(bp.nodes.MultiMixture([ageEnum['SuperSeniorCitizen'],genderEnum['Female'],familyHistoryEnum['Yes'], dietEnum['Medium'],lifestyleEnum['Sedetary'],cholesterolEnum['High']], bp.nodes.Categorical,p_heartdisease).get_moments()[0][heartDiseaseEnum['Yes']])


SampleProbability
Probability(HeartDisease|Age=SuperSeniorCitizen,Gender=Female,FamilyHistory=Yes, DietIntake=Medium, LifeStyle=Sedetary,Cholesterol=High)
0.4262408493108354


In [13]:
#Interactive Test
m= 0
while (m==0):
    print("\n")
    res = bp.nodes.MultiMixture([int(input('Enter Age: '+ str(ageEnum))), int(input('Enter Gender: '+ str(genderEnum))), int(input('Enter FamilyHistory: '+str(familyHistoryEnum))), int(input('Enter dietEnum(CalorieIntake): '+str(dietEnum))), int(input('Enter Lifestyle: '+str(lifestyleEnum))), int(input('Enter Cholesterol: '+str(cholesterolEnum)))], bp.nodes.Categorical,p_heartdisease).get_moments()[0][heartDiseaseEnum['Yes']]

    print("Probability(HeartDisease) = " +  str(res))
    m=int(input("Enter for Continue:0,Exit:1 "))



Enter Age: {'SuperSeniorCitizen': 0, 'SeniorCitizen': 1, 'MiddleAged': 2, 'Youth': 3, 'Teen': 4}1
Enter Gender: {'Male': 0, 'Female': 1}0
Enter FamilyHistory: {'Yes': 0, 'No': 1, 'Other': 2}0
Enter dietEnum(CalorieIntake): {'High': 0, 'Medium': 1, 'Low': 2}1
Enter Lifestyle: {'Athlete': 0, 'Active': 1, 'Moderate': 2, 'Sedetary': 3}2
Enter Cholesterol: {'High': 0, 'BorderLine': 1, 'Normal': 2}2
Probability(HeartDisease) = 0.4262408493108354
Enter for Continue:0,Exit:1 1
