In [1]:
from pgmpy.models import BayesianModel

In [15]:
cancer_model=BayesianModel([('Pollution','Cancer'),('Smoker','Cancer'),('Cancer','Xray'),('Cancer','Dyspnoea')])

In [16]:
cancer_model.nodes()

NodeView(('Pollution', 'Cancer', 'Smoker', 'Xray', 'Dyspnoea'))

In [17]:
cancer_model.edges()

OutEdgeView([('Pollution', 'Cancer'), ('Cancer', 'Xray'), ('Cancer', 'Dyspnoea'), ('Smoker', 'Cancer')])

In [18]:
cancer_model.get_cpds()

[]

In [19]:
from pgmpy.factors.discrete import TabularCPD
cpd_poll=TabularCPD(variable='Pollution',variable_card=2,values=[[0.9],[0.1]])
cpd_smoke=TabularCPD(variable='Smoker',variable_card=2,values=[[0.3],[0.7]])
cpd_cancer=TabularCPD(variable='Cancer',variable_card=2,values=[[0.03,0.05,0.001,0.02],[0.97,0.95,0.999,0.98]],evidence=['Smoker','Pollution'],evidence_card=[2,2])
cpd_xray=TabularCPD(variable='Xray',variable_card=2,values=[[0.9,0.2],[0.1,0.8]],evidence=['Cancer'],evidence_card=[2])
cpd_dysp=TabularCPD(variable='Dyspnoea',variable_card=2,values=[[0.65,0.3],[0.35,0.7]],evidence=['Cancer'],evidence_card=[2])

In [20]:
cancer_model.add_cpds(cpd_poll,cpd_smoke,cpd_cancer,cpd_xray,cpd_dysp)

In [21]:
cancer_model.check_model()

True

In [22]:
cancer_model.is_active_trail('Pollution','Smoker')
cancer_model.is_active_trail('Pollution','Smoker',observed=['Cancer'])

True

In [23]:
cancer_model.get_cpds()

[<TabularCPD representing P(Pollution:2) at 0x3c8c2f0>,
 <TabularCPD representing P(Smoker:2) at 0x3c8c250>,
 <TabularCPD representing P(Cancer:2 | Smoker:2, Pollution:2) at 0x3c8c310>,
 <TabularCPD representing P(Xray:2 | Cancer:2) at 0x3c8c330>,
 <TabularCPD representing P(Dyspnoea:2 | Cancer:2) at 0x3c8c270>]

In [24]:
print(cancer_model.get_cpds('Pollution'))

╒═════════════╤═════╕
│ Pollution_0 │ 0.9 │
├─────────────┼─────┤
│ Pollution_1 │ 0.1 │
╘═════════════╧═════╛


In [25]:
print(cancer_model.get_cpds('Smoker'))

╒══════════╤═════╕
│ Smoker_0 │ 0.3 │
├──────────┼─────┤
│ Smoker_1 │ 0.7 │
╘══════════╧═════╛


In [26]:
print(cancer_model.get_cpds('Xray'))

╒════════╤══════════╤══════════╕
│ Cancer │ Cancer_0 │ Cancer_1 │
├────────┼──────────┼──────────┤
│ Xray_0 │ 0.9      │ 0.2      │
├────────┼──────────┼──────────┤
│ Xray_1 │ 0.1      │ 0.8      │
╘════════╧══════════╧══════════╛


In [27]:
print(cancer_model.get_cpds('Dyspnoea'))

╒════════════╤══════════╤══════════╕
│ Cancer     │ Cancer_0 │ Cancer_1 │
├────────────┼──────────┼──────────┤
│ Dyspnoea_0 │ 0.65     │ 0.3      │
├────────────┼──────────┼──────────┤
│ Dyspnoea_1 │ 0.35     │ 0.7      │
╘════════════╧══════════╧══════════╛


In [28]:
print(cancer_model.get_cpds('Cancer'))

╒═══════════╤═════════════╤═════════════╤═════════════╤═════════════╕
│ Smoker    │ Smoker_0    │ Smoker_0    │ Smoker_1    │ Smoker_1    │
├───────────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Pollution │ Pollution_0 │ Pollution_1 │ Pollution_0 │ Pollution_1 │
├───────────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Cancer_0  │ 0.03        │ 0.05        │ 0.001       │ 0.02        │
├───────────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Cancer_1  │ 0.97        │ 0.95        │ 0.999       │ 0.98        │
╘═══════════╧═════════════╧═════════════╧═════════════╧═════════════╛


In [29]:
cancer_model.local_independencies('Xray')

(Xray _|_ Dyspnoea, Pollution, Smoker | Cancer)

In [30]:
cancer_model.local_independencies('Pollution')

(Pollution _|_ Dyspnoea, Cancer, Smoker, Xray)

In [31]:
cancer_model.local_independencies('Smoker')

(Smoker _|_ Dyspnoea, Pollution, Cancer, Xray)

In [32]:
cancer_model.local_independencies('Dyspnoea')

(Dyspnoea _|_ Pollution, Smoker, Xray | Cancer)

In [33]:
cancer_model.local_independencies('Cancer')

(Cancer _|_ Dyspnoea, Xray | Pollution, Smoker)

In [34]:
cancer_model.get_independencies()

(Pollution _|_ Smoker)
(Pollution _|_ Dyspnoea, Xray | Cancer)
(Pollution _|_ Xray | Dyspnoea, Cancer)
(Pollution _|_ Dyspnoea, Xray | Smoker, Cancer)
(Pollution _|_ Dyspnoea | Xray, Cancer)
(Pollution _|_ Xray | Dyspnoea, Smoker, Cancer)
(Pollution _|_ Dyspnoea | Xray, Smoker, Cancer)
(Smoker _|_ Pollution)
(Smoker _|_ Dyspnoea, Xray | Cancer)
(Smoker _|_ Xray | Dyspnoea, Cancer)
(Smoker _|_ Dyspnoea, Xray | Pollution, Cancer)
(Smoker _|_ Dyspnoea | Xray, Cancer)
(Smoker _|_ Xray | Dyspnoea, Pollution, Cancer)
(Smoker _|_ Dyspnoea | Pollution, Xray, Cancer)
(Xray _|_ Dyspnoea, Pollution, Smoker | Cancer)
(Xray _|_ Pollution, Smoker | Dyspnoea, Cancer)
(Xray _|_ Dyspnoea, Smoker | Pollution, Cancer)
(Xray _|_ Dyspnoea, Pollution | Smoker, Cancer)
(Xray _|_ Smoker | Dyspnoea, Pollution, Cancer)
(Xray _|_ Pollution | Dyspnoea, Smoker, Cancer)
(Xray _|_ Dyspnoea | Pollution, Smoker, Cancer)
(Dyspnoea _|_ Pollution, Smoker, Xray | Cancer)
(Dyspnoea _|_ Smoker, Xray | Pollution, Cancer)
(Dy

In [35]:
from pgmpy.inference import VariableElimination

In [36]:
cancer_infer=VariableElimination(cancer_model)

In [37]:
q=cancer_infer.query(variables=['Cancer'],evidence={'Smoker':1})
print(q['Cancer'])

╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0029 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9971 │
╘══════════╧═══════════════╛


  phi.values = phi.values[slice_]


In [38]:
q=cancer_infer.query(variables=['Cancer'],evidence={'Smoker':1})
print(q['Cancer'])

╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0029 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9971 │
╘══════════╧═══════════════╛


In [39]:
q=cancer_infer.query(variables=['Cancer'],evidence={'Smoker':1,'Pollution':1})
print(q['Cancer'])

╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0200 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9800 │
╘══════════╧═══════════════╛


In [40]:
import numpy as np
from urllib.request import urlopen
import urllib
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import pandas as pd

In [41]:
Cleveland_data_URL='http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data'

In [42]:
np.set_printoptions(threshold=np.nan)

In [43]:
names=['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal','heartdisease']

In [44]:
heartDisease=pd.read_csv(urlopen(Cleveland_data_URL),names=names)

In [45]:
heartDisease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0


In [46]:
del heartDisease['ca']

In [47]:
 del heartDisease['slope']

In [48]:
del heartDisease['thal']

In [49]:
del heartDisease['oldpeak']

In [50]:
heartDisease=heartDisease.replace('?',np.nan)
heartDisease.dtypes
heartDisease.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'heartdisease'],
      dtype='object')

In [51]:
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator,BayesianEstimator

In [52]:
model=BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),('sex','trestbps'),('exang','trestbps'),('trestbps','heartdisease'),('fbs','heartdisease'),('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')])

In [53]:
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)
print(model.get_cpds('age'))
print(model.get_cpds('chol'))
print(model.get_cpds('sex'))
model.get_independencies()

╒═════════╤════════════╕
│ age(28) │ 0.00383142 │
├─────────┼────────────┤
│ age(29) │ 0.00383142 │
├─────────┼────────────┤
│ age(30) │ 0.00383142 │
├─────────┼────────────┤
│ age(31) │ 0.00766284 │
├─────────┼────────────┤
│ age(32) │ 0.0153257  │
├─────────┼────────────┤
│ age(33) │ 0.00766284 │
├─────────┼────────────┤
│ age(34) │ 0.0153257  │
├─────────┼────────────┤
│ age(35) │ 0.0191571  │
├─────────┼────────────┤
│ age(36) │ 0.0191571  │
├─────────┼────────────┤
│ age(37) │ 0.0306513  │
├─────────┼────────────┤
│ age(38) │ 0.0191571  │
├─────────┼────────────┤
│ age(39) │ 0.0344828  │
├─────────┼────────────┤
│ age(40) │ 0.0191571  │
├─────────┼────────────┤
│ age(41) │ 0.0383142  │
├─────────┼────────────┤
│ age(42) │ 0.0268199  │
├─────────┼────────────┤
│ age(43) │ 0.0421456  │
├─────────┼────────────┤
│ age(44) │ 0.0268199  │
├─────────┼────────────┤
│ age(45) │ 0.0229885  │
├─────────┼────────────┤
│ age(46) │ 0.045977   │
├─────────┼────────────┤
│ age(47) │ 0.0344828  │


(age _|_ exang, sex)
(age _|_ exang | sex)
(age _|_ exang, sex | fbs)
(age _|_ restecg, chol, thalach | heartdisease)
(age _|_ sex | exang)
(age _|_ heartdisease, chol, restecg, thalach | trestbps, fbs)
(age _|_ restecg, chol, thalach | trestbps, heartdisease)
(age _|_ exang | fbs, sex)
(age _|_ restecg, chol, thalach | heartdisease, sex)
(age _|_ restecg, chol, thalach | heartdisease, fbs)
(age _|_ sex | exang, fbs)
(age _|_ restecg, thalach | heartdisease, chol)
(age _|_ restecg, chol | heartdisease, thalach)
(age _|_ chol, thalach | heartdisease, restecg)
(age _|_ restecg, chol, thalach | heartdisease, exang)
(age _|_ heartdisease, chol, restecg, thalach | trestbps, fbs, sex)
(age _|_ restecg, chol, thalach | trestbps, sex, heartdisease)
(age _|_ restecg, chol, thalach | trestbps, fbs, heartdisease)
(age _|_ heartdisease, restecg, thalach | trestbps, fbs, chol)
(age _|_ heartdisease, chol, restecg | trestbps, fbs, thalach)
(age _|_ heartdisease, chol, thalach | trestbps, fbs, restec

In [54]:
from pgmpy.inference import VariableElimination

In [55]:
HeartDisease_infer=VariableElimination(model)

In [57]:
q=HeartDisease_infer.query(variables=['heartdisease'],evidence={'age':28})
print(q['heartdisease'])

╒════════════════╤═════════════════════╕
│ heartdisease   │   phi(heartdisease) │
╞════════════════╪═════════════════════╡
│ heartdisease_0 │              0.5770 │
├────────────────┼─────────────────────┤
│ heartdisease_1 │              0.4230 │
╘════════════════╧═════════════════════╛


In [59]:
q=HeartDisease_infer.query(variables=['heartdisease'],evidence={'chol':100})
print(q['heartdisease'])

╒════════════════╤═════════════════════╕
│ heartdisease   │   phi(heartdisease) │
╞════════════════╪═════════════════════╡
│ heartdisease_0 │              0.5510 │
├────────────────┼─────────────────────┤
│ heartdisease_1 │              0.4490 │
╘════════════════╧═════════════════════╛
