In [5]:

#Importamos las librerías a usar
import pandas as pd
#Importamos los datos como dataframe
columnas = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num']
df = pd.read_csv('processed_cleveland.data', names=columnas)

#Convertimos datos a números

#En donde tengamos datos faltantes (?) ponemos 0

for column in df.columns:
    # Reemplazar valores "?" con 0 en la columna y fila respectivas
    df[column] = df[column].replace('?', 0)

df = df.apply(pd.to_numeric, errors='coerce')    

In [18]:
#la variable age tiene que ser discreta para la red bayesiana, por lo que la dividiremos por cuartiles siendo 29 la edad minima y 77 la edad maxima 
df.loc[(df['age'] >= 29.0) & (df['age'] < 48.0), 'age_discreta'] = 1 
df.loc[(df['age'] >= 48.0) & (df['age'] < 56.0), 'age_discreta'] = 2 
df.loc[(df['age'] >= 56.0) & (df['age'] < 61.0), 'age_discreta'] = 3 
df.loc[(df['age'] >= 61.0) & (df['age'] <= 77.0), 'age_discreta'] = 4 
#la variable oldpeak tiene que ser discreta, por lo que la dividiremos por cuartiles siendo 0 el cuartil minimo y 6.2 el cuartil maximo
df.loc[(df['oldpeak'] >= 0) & (df['oldpeak'] < 0.800000), 'oldpeak_discreta'] = 1
df.loc[(df['oldpeak'] >= 0.800000) & (df['oldpeak'] < 1.600000), 'oldpeak_discreta'] = 2
df.loc[(df['oldpeak'] >= 1.600000) & (df['oldpeak'] <= 6.200000), 'oldpeak_discreta'] = 3 

#Variable num
df.loc[(df['num'] == 0) , 'num_discreta'] = 0
df.loc[(df['num'] != 0) , 'num_discreta'] = 1

#df  

In [19]:
#se crea la red bayesiana
from pgmpy.models import BayesianNetwork
model=BayesianNetwork(
    [("age_discreta","ca"),
     ("sex","thal"),
     ("thal","slope"),
     ("thal","exang"),
     ("slope","oldpeak_discreta"),
     ("oldpeak_discreta","ca"),
     ("exang","cp"),
     ("ca","num_discreta"),
     ("cp","num_discreta"),])

In [20]:
from pgmpy.estimators import MaximumLikelihoodEstimator
model.fit(
    data=df,
    estimator=MaximumLikelihoodEstimator
)
for i in model.nodes():
    print(i)
    print(model.get_cpds(i))

age_discreta
+-------------------+----------+
| age_discreta(1.0) | 0.247525 |
+-------------------+----------+
| age_discreta(2.0) | 0.250825 |
+-------------------+----------+
| age_discreta(3.0) | 0.240924 |
+-------------------+----------+
| age_discreta(4.0) | 0.260726 |
+-------------------+----------+
ca
+------------------+-----+-----------------------+
| age_discreta     | ... | age_discreta(4.0)     |
+------------------+-----+-----------------------+
| oldpeak_discreta | ... | oldpeak_discreta(3.0) |
+------------------+-----+-----------------------+
| ca(0.0)          | ... | 0.28125               |
+------------------+-----+-----------------------+
| ca(1.0)          | ... | 0.1875                |
+------------------+-----+-----------------------+
| ca(2.0)          | ... | 0.3125                |
+------------------+-----+-----------------------+
| ca(3.0)          | ... | 0.21875               |
+------------------+-----+-----------------------+
sex
+----------+--------

In [33]:

from pgmpy.inference import VariableElimination
# Definir función para calcular la probabilidad
def calcular_probabilidad(age, sex, thal, slope, exang, oldpeak, ca, cp):
    

    if age >= 29.0 and age < 48:
        age_discreta = 1
    if age >= 48 and age < 56:
        age_discreta = 2   
    if age >= 56 and age < 61:
        age_discreta = 3   
    if age >= 61 and age <= 77:
        age_discreta = 4         

    
    # Cargar la red bayesiana y crear el objeto de inferencia
    from pgmpy.models import BayesianNetwork
    from pgmpy.estimators import MaximumLikelihoodEstimator
    model = BayesianNetwork(
        [("age_discreta","ca"),
         ("sex","thal"),
         ("thal","slope"),
         ("thal","exang"),
         ("slope","oldpeak_discreta"),
         ("oldpeak_discreta","ca"),
         ("exang","cp"),
         ("ca","num_discreta"),
         ("cp","num_discreta"),])
    model.fit(data=df, estimator=MaximumLikelihoodEstimator)
    infer = VariableElimination(model)

    # Definir la evidencia para las variables
    evidence = {'age_discreta': age_discreta,
                'sex': sex,
                'thal': thal,
                'slope': slope,
                'exang': exang,
                'oldpeak_discreta': oldpeak,
                'ca': ca,
                'cp': cp}

    # Calcular la probabilidad de la enfermedad cardíaca (num=1)
    q = infer.query(variables=['num_discreta'], evidence=evidence)
    #print(q)
    prob_enfermedad = q.values.tolist()
    return prob_enfermedad #Bota proba de no enfermedad vs enfermedad [0,1]
calcular_probabilidad(40,1,6,2,1,2,2,4)

[0.06896551724137931, 0.9310344827586207]