In [1]:
!pip install pgmpy --upgrade  # Update pgmpy to the latest version. Sometimes 'BayesianModel' moves between versions
from pgmpy.models import DiscreteBayesianNetwork # Use DiscreteBayesianNetwork instead of BayesianNetwork
# %%
cancer_model = DiscreteBayesianNetwork([('Pollution','Cancer'),('Smoker','Cancer'),('Cancer','Xray'),('Cancer','Dyspnoea')]) # Replace BayesianNetwork with DiscreteBayesianNetwork

Collecting pgmpy
  Downloading pgmpy-1.0.0-py3-none-any.whl.metadata (9.4 kB)
Collecting pyro-ppl (from pgmpy)
  Downloading pyro_ppl-1.9.1-py3-none-any.whl.metadata (7.8 kB)
Collecting pyro-api>=0.1.1 (from pyro-ppl->pgmpy)
  Downloading pyro_api-0.1.2-py3-none-any.whl.metadata (2.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->pgmpy)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->pgmpy)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->pgmpy)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->pgmpy)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->pgmpy)
  Downloa

In [2]:
print(cancer_model)

DiscreteBayesianNetwork with 5 nodes and 4 edges


In [3]:
cancer_model.nodes()

NodeView(('Pollution', 'Cancer', 'Smoker', 'Xray', 'Dyspnoea'))

In [4]:
cancer_model.edges()

OutEdgeView([('Pollution', 'Cancer'), ('Cancer', 'Xray'), ('Cancer', 'Dyspnoea'), ('Smoker', 'Cancer')])

In [5]:
cancer_model.get_cpds()
#conditional probabilities

[]

In [6]:
#creation of cond prob table

from pgmpy.factors.discrete import TabularCPD  #cond probab density
#2 values are passed means 0.9 will be prob when pollution will be high and for low the value is 0.1
cpd_poll = TabularCPD(variable='Pollution',variable_card=2,values=[[0.1],[0.9]])
cpd_smoke = TabularCPD(variable='Smoker',variable_card=2,values=[[0.3],[0.7]])
cpd_cancer=TabularCPD(variable='Cancer',variable_card=2,values=[[0.97,0.95,0.001,0.02],[0.03,0.05,0.999,0.98]],
                      evidence=['Smoker','Pollution'],
                      evidence_card=[2,2])
cpd_xray=TabularCPD(variable='Xray',variable_card=2,values=[[0.9,0.2],[0.1,0.8]],
                      evidence=['Cancer'],
                      evidence_card=[2])
cpd_dysp=TabularCPD(variable='Dyspnoea',variable_card=2,values=[[0.65,0.3],[0.35,0.7]],
                      evidence=['Cancer'],
                      evidence_card=[2])


In [7]:
cancer_model.add_cpds(cpd_poll,cpd_smoke,cpd_cancer,cpd_xray,cpd_dysp)
cancer_model.check_model()

True

In [8]:
#cancer_model.is_active_trail('Pollution','Smoker')
!pip install pgmpy --upgrade
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.inference import CausalInference

# ... (your existing code for creating and defining cancer_model) ...

# Create a CausalInference object
infer = CausalInference(cancer_model)

# Check for d-separation using the is_d_separated method (if available)
# Or, try using other inference methods like query for conditional independence
try:
    are_d_separated = infer.is_d_separated("Pollution", "Smoker", {})  # Check for is_d_separated method
except AttributeError:
    # If is_d_separated is not available, consider using other inference methods
    # For example, to check for conditional independence:
    # result = infer.query(['Pollution'], evidence={'Smoker': 0})  # Query for Pollution given Smoker=0
    # You can then analyze the result to determine conditional independence
    print("d-separation check not directly supported. Consider using other inference methods.")
    are_d_separated = None  # Or handle the case as needed

# Print the result
print(f"'Pollution' and 'Smoker' are d-separated: {are_d_separated}")

d-separation check not directly supported. Consider using other inference methods.
'Pollution' and 'Smoker' are d-separated: None


In [9]:
cancer_model.get_cpds()
print(cancer_model.get_cpds('Pollution'))


+--------------+-----+
| Pollution(0) | 0.1 |
+--------------+-----+
| Pollution(1) | 0.9 |
+--------------+-----+


In [10]:
cancer_model.get_cpds()
print(cancer_model.get_cpds('Cancer'))

+-----------+--------------+--------------+--------------+--------------+
| Smoker    | Smoker(0)    | Smoker(0)    | Smoker(1)    | Smoker(1)    |
+-----------+--------------+--------------+--------------+--------------+
| Pollution | Pollution(0) | Pollution(1) | Pollution(0) | Pollution(1) |
+-----------+--------------+--------------+--------------+--------------+
| Cancer(0) | 0.97         | 0.95         | 0.001        | 0.02         |
+-----------+--------------+--------------+--------------+--------------+
| Cancer(1) | 0.03         | 0.05         | 0.999        | 0.98         |
+-----------+--------------+--------------+--------------+--------------+


In [11]:
cancer_model.get_cpds()
print(cancer_model.get_cpds('Smoker'))

+-----------+-----+
| Smoker(0) | 0.3 |
+-----------+-----+
| Smoker(1) | 0.7 |
+-----------+-----+


In [12]:
cancer_model.get_cpds()
print(cancer_model.get_cpds('Xray'))

+---------+-----------+-----------+
| Cancer  | Cancer(0) | Cancer(1) |
+---------+-----------+-----------+
| Xray(0) | 0.9       | 0.2       |
+---------+-----------+-----------+
| Xray(1) | 0.1       | 0.8       |
+---------+-----------+-----------+


In [13]:
cancer_model.get_cpds()
print(cancer_model.get_cpds('Dyspnoea'))

+-------------+-----------+-----------+
| Cancer      | Cancer(0) | Cancer(1) |
+-------------+-----------+-----------+
| Dyspnoea(0) | 0.65      | 0.3       |
+-------------+-----------+-----------+
| Dyspnoea(1) | 0.35      | 0.7       |
+-------------+-----------+-----------+


In [14]:
#determine local independencies meant which all are independent edges

cancer_model.local_independencies('Xray')

(Xray ⟂ Dyspnoea, Smoker, Pollution | Cancer)

In [15]:
cancer_model.local_independencies('Pollution')


(Pollution ⟂ Smoker)

In [16]:
cancer_model.local_independencies('Smoker')


(Smoker ⟂ Pollution)

In [17]:
cancer_model.local_independencies('Dyspnoea')


(Dyspnoea ⟂ Smoker, Pollution, Xray | Cancer)

In [18]:
cancer_model.local_independencies('Cancer')



In [19]:
cancer_model.get_independencies()
#taking random 6

(Smoker ⟂ Pollution)
(Xray ⟂ Smoker | Cancer)
(Xray ⟂ Pollution | Cancer)
(Dyspnoea ⟂ Xray | Cancer)
(Dyspnoea ⟂ Pollution | Cancer)
(Dyspnoea ⟂ Smoker | Cancer)

In [20]:
#konsa feature kispr dependent hai and which one is better,allows to eliminate the features/variables that are not efficient wrt model
#Inferencing with Bayesian Network

from pgmpy.inference import VariableElimination
cancer_infer=VariableElimination(cancer_model)
q=cancer_infer.query(variables=['Cancer'],evidence={'Smoker':1})
print(q)
#if the person is smoker, then the cond prob is this

+-----------+---------------+
| Cancer    |   phi(Cancer) |
| Cancer(0) |        0.0181 |
+-----------+---------------+
| Cancer(1) |        0.9819 |
+-----------+---------------+


In [21]:
q=cancer_infer.query(variables=['Cancer'],evidence={'Smoker': 1,'Pollution':1})
print(q)

+-----------+---------------+
| Cancer    |   phi(Cancer) |
| Cancer(0) |        0.0200 |
+-----------+---------------+
| Cancer(1) |        0.9800 |
+-----------+---------------+
