In this notebook, we show examples for using the Structure Learning Algorithms in pgmpy. Currently, pgmpy has implementation of 3 main algorithms:
1. PC with stable and parallel variants.
2. Hill-Climb Search
3. Exhaustive Search

For PC the following conditional independence test can be used:
1. Chi-Square test
2. Pearsonr

For Hill-Climb and Exhausitive Search the following scoring methods can be used:
1. K2 Score
2. BDeu Score
3. Bic Score

## Generate some data

In [7]:
from itertools import combinations

from pgmpy.estimators import PC, HillClimbSearch, ExhaustiveSearch
from pgmpy.estimators import K2Score
from pgmpy.utils import get_example_model
from pgmpy.sampling import BayesianModelSampling

In [2]:
model = get_example_model('alarm')
samples = BayesianModelSampling(model).forward_sample(size=int(1e5))
samples.head()

  "Found unknown state name. Trying to switch to using all state names as state numbers"
Generating for node: CVP: 100%|██████████| 37/37 [01:25<00:00,  2.32s/it]         


Unnamed: 0,MINVOLSET,VENTMACH,DISCONNECT,VENTTUBE,INTUBATION,PULMEMBOLUS,SHUNT,PAP,FIO2,KINKEDTUBE,...,HRBP,LVFAILURE,HISTORY,HYPOVOLEMIA,STROKEVOLUME,CO,BP,LVEDVOLUME,PCWP,CVP
0,NORMAL,NORMAL,False,LOW,NORMAL,False,NORMAL,NORMAL,NORMAL,False,...,HIGH,False,False,False,NORMAL,HIGH,HIGH,NORMAL,NORMAL,NORMAL
1,NORMAL,NORMAL,False,LOW,NORMAL,False,NORMAL,NORMAL,NORMAL,False,...,HIGH,False,False,False,NORMAL,NORMAL,LOW,NORMAL,NORMAL,NORMAL
2,NORMAL,NORMAL,False,LOW,NORMAL,False,NORMAL,NORMAL,NORMAL,False,...,HIGH,False,False,False,NORMAL,HIGH,HIGH,NORMAL,NORMAL,NORMAL
3,NORMAL,NORMAL,False,LOW,NORMAL,False,NORMAL,NORMAL,NORMAL,False,...,HIGH,False,False,False,NORMAL,HIGH,HIGH,NORMAL,NORMAL,NORMAL
4,NORMAL,NORMAL,False,LOW,NORMAL,False,NORMAL,NORMAL,NORMAL,False,...,HIGH,False,False,False,NORMAL,HIGH,HIGH,NORMAL,NORMAL,NORMAL


In [3]:
# Funtion to evaluate the learned model structures.

def score(estimated_model, true_model):
    tp = 0
    fn = 0
    fp = 0
    tn = 0

    for edge in combinations(true_model.nodes(), 2):
        if (edge in estimated_model.edges()) and (edge in true_model.edges()):
            tp += 1
        elif (edge in estimated_model.edges()) and (not (edge in true_model.edges())):
            fp += 1
        elif (not(edge in estimated_model.edges())) and (edge in true_model.edges()):
            fn += 1
        elif (not(edge in estimated_model.edges())) and (not(edge in true_model.edges())):
            tn += 1
    print(f"Total: {tp+fn+fp+tn}. True-positive: {tp}, False-Negative: {fn}, False-Positive: {fp}, True-Negative: {tn}")
    print(f"Edge accuracy: {(tp+tn)/(tp+fn+fp+tn)}")

## Learn the model structure using PC

In [4]:
est = PC(data=samples)
estimated_model = est.estimate(variant='stable', max_cond_vars=4)
score(estimated_model, model)

  warn("Reached maximum number of allowed conditional variables. Exiting")
7it [04:26, 38.14s/it]

Total: 666. True-positive: 25, False-Negative: 4, False-Positive: 5, True-Negative: 632
Edge accuracy: 0.9864864864864865



  + "oriented arbitrarily."


In [5]:
est = PC(data=samples)
estimated_model = est.estimate(variant='orig', max_cond_vars=4)
score(estimated_model, model)

7it [03:16, 28.08s/it]

Total: 666. True-positive: 24, False-Negative: 5, False-Positive: 5, True-Negative: 632
Edge accuracy: 0.984984984984985





## Learn the model structure using Hill-Climb Search

In [10]:
scoring_method = K2Score(data=samples)
est = HillClimbSearch(data=samples, scoring_method=scoring_method)
estimated_model = est.estimate(max_indegree=4, max_iter=int(1e4))
score(estimated_model, model)

Total: 666. True-positive: 17, False-Negative: 12, False-Positive: 9, True-Negative: 628
Edge accuracy: 0.9684684684684685
