In [1]:
#!pip install pgmpy
## https://www.youtube.com/watch?v=DEHqIxX1Kq4

In [2]:
from pgmpy.factors.discrete import TabularCPD
from pgmpy.models import BayesianModel 

### Setting up model

1. Set the structure

In [3]:
olympic_model = BayesianModel([('Genetics','OlympicTrials'),
                              ('Practice','OlympicTrials'),
                              ('OlympicTrials','Offer')])

2. Set up the relashionships (CPDs)

In [4]:
genetics_cpd = TabularCPD(variable='Genetics', 
                           variable_card=2, 
                           values=[[0.2, 0.8]])

In [5]:
practice_cpd = TabularCPD(variable='Practice', 
                           variable_card=2, 
                           values=[[0.7, 0.3]])

In [6]:
offer_cpd = TabularCPD(variable='Offer', 
                           variable_card=2, 
                           values=[[0.95, 0.8,0.5],
                                  [0.05, 0.2,0.5]],
                      evidence = ['OlympicTrials'],
                      evidence_card = [3])

In [7]:
olympic_trials_cpd = TabularCPD(variable='OlympicTrials', 
                           variable_card=3, 
                           values=[[0.5, 0.8,0.8,0.9],
                                  [0.3, 0.15,0.1,0.08],
                                  [0.2, 0.05,0.1,0.02]],
                      evidence = ['Genetics','Practice'],
                      evidence_card = [2,2])

3. Add relashionship to models

In [8]:
olympic_model.add_cpds(genetics_cpd,practice_cpd,offer_cpd,olympic_trials_cpd)

Examine the structure of graph

In [9]:
olympic_model.get_cpds()

[<TabularCPD representing P(Genetics:2) at 0xa24504240>,
 <TabularCPD representing P(Practice:2) at 0x1924e50748>,
 <TabularCPD representing P(Offer:2 | OlympicTrials:3) at 0x1924e50ac8>,
 <TabularCPD representing P(OlympicTrials:3 | Genetics:2, Practice:2) at 0x1924e50cc0>]

find active trial nodes

In [10]:
olympic_model.active_trail_nodes('Genetics')

{'Genetics': {'Genetics', 'Offer', 'OlympicTrials'}}

In [12]:
olympic_model.active_trail_nodes('OlympicTrials')

{'OlympicTrials': {'Genetics', 'Offer', 'OlympicTrials', 'Practice'}}

Find Local Independecies

In [13]:
olympic_model.local_independencies('Genetics')

(Genetics _|_ Practice, Offer, OlympicTrials)

In [14]:
olympic_model.local_independencies('OlympicTrials')

(OlympicTrials _|_ Offer | Practice, Genetics)

Get all independencies

In [15]:
olympic_model.get_independencies()

(Genetics _|_ Practice)
(Genetics _|_ Offer | OlympicTrials)
(Genetics _|_ Offer | Practice, OlympicTrials)
(Practice _|_ Genetics)
(Practice _|_ Offer | OlympicTrials)
(Practice _|_ Offer | Genetics, OlympicTrials)
(Offer _|_ Practice, Genetics | OlympicTrials)
(Offer _|_ Genetics | Practice, OlympicTrials)
(Offer _|_ Practice | Genetics, OlympicTrials)

### Making Inference

We can get probability distributions that are not explicitly spelles out in our graphs

In [16]:
from pgmpy.inference import VariableElimination

In [17]:
olympic_infer = VariableElimination(olympic_model)

In [18]:
prob_offer = olympic_infer.query(variables = ['Offer'])

  phi1.values = phi1.values[slice_]


In [33]:
print(prob_offer['Offer'])
### note
'''
offer_0 equal no offer

'''

╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.8898 │
├─────────┼──────────────┤
│ Offer_1 │       0.1102 │
╘═════════╧══════════════╛


'\noffer_0 equal no offer\n\n'

We can also get conditional probability distribution that take into account that we already know

In [34]:
prob_offer_good_genre = olympic_infer.query(variables = ['Offer','Genetics'])

print(prob_offer_good_genre['Genetics'])
print(prob_offer_good_genre['Offer'])



╒════════════╤═════════════════╕
│ Genetics   │   phi(Genetics) │
╞════════════╪═════════════════╡
│ Genetics_0 │          0.2000 │
├────────────┼─────────────────┤
│ Genetics_1 │          0.8000 │
╘════════════╧═════════════════╛
╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.8898 │
├─────────┼──────────────┤
│ Offer_1 │       0.1102 │
╘═════════╧══════════════╛


  phi1.values = phi1.values[slice_]
  phi.values = phi.values[slice_]


In [32]:
prob_offer_good_genre = olympic_infer.query(variables = ['Offer','OlympicTrials'])

print(prob_offer_good_genre['OlympicTrials'])
print(prob_offer_good_genre['Offer'])

### note
'''
offer_0 equal no offer

'''

╒═════════════════╤══════════════════════╕
│ OlympicTrials   │   phi(OlympicTrials) │
╞═════════════════╪══════════════════════╡
│ OlympicTrials_0 │               0.7820 │
├─────────────────┼──────────────────────┤
│ OlympicTrials_1 │               0.1262 │
├─────────────────┼──────────────────────┤
│ OlympicTrials_2 │               0.0918 │
╘═════════════════╧══════════════════════╛
╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.8898 │
├─────────┼──────────────┤
│ Offer_1 │       0.1102 │
╘═════════╧══════════════╛


  phi1.values = phi1.values[slice_]


'\noffer_0 equal no offer\n\n'

Probability of Getting Olympic Offer when have bad genre

In [35]:
prob_offer_bad_genre = olympic_infer.query(variables = ['Offer'],evidence={'Genetics':1})
print('Probability of Getting Olympic Offer when have bad genre')
print(prob_offer_bad_genre['Offer'])

Probability of Getting Olympic Offer when have bad genre
╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.9017 │
├─────────┼──────────────┤
│ Offer_1 │       0.0983 │
╘═════════╧══════════════╛


  phi1.values = phi1.values[slice_]
  phi.values = phi.values[slice_]


In [36]:
prob_offer_bad_genre = olympic_infer.query(variables = ['Offer'],evidence={'Genetics':0})
print(prob_offer_bad_genre['Offer'])

╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.8420 │
├─────────┼──────────────┤
│ Offer_1 │       0.1580 │
╘═════════╧══════════════╛


  phi1.values = phi1.values[slice_]
  phi.values = phi.values[slice_]


Probability of Getting Olympic Offer when have good genre and did practice

In [38]:
prob_offer_good_genre_did_practice = olympic_infer.query(variables = ['Offer'],
                                                         evidence={'Genetics':0,
                                                                  'Practice':0})
print('Probability of Getting Olympic Offer when have good genre and did practice')
print(prob_offer_good_genre_did_practice['Offer'])

Probability of Getting Olympic Offer when have good genre and did practice
╒═════════╤══════════════╕
│ Offer   │   phi(Offer) │
╞═════════╪══════════════╡
│ Offer_0 │       0.8150 │
├─────────┼──────────────┤
│ Offer_1 │       0.1850 │
╘═════════╧══════════════╛


  phi.values = phi.values[slice_]


We can also go upstram logically. For example, evidence about performance at the Olympic Trials affects the probabilities of Genectics

In [42]:
prob_good_genres_if_amazing_olympic_trials = olympic_infer.query(variables = ['Genetics'],
                                                                evidence = {'OlympicTrials':2} )

print(prob_good_genres_if_amazing_olympic_trials['Genetics'])

╒════════════╤═════════════════╕
│ Genetics   │   phi(Genetics) │
╞════════════╪═════════════════╡
│ Genetics_0 │          0.3377 │
├────────────┼─────────────────┤
│ Genetics_1 │          0.6623 │
╘════════════╧═════════════════╛


  phi1.values = phi1.values[slice_]


In [44]:
prob_good_genres_if_no_practice_good_olympic_trials = olympic_infer.query(variables = ['Genetics'],
                                                                evidence = {'Practice':1,
                                                                            'OlympicTrials':2} )

# prob of genetic when good practice and good olypics trial

print(prob_good_genres_if_no_practice_good_olympic_trials['Genetics'])

╒════════════╤═════════════════╕
│ Genetics   │   phi(Genetics) │
╞════════════╪═════════════════╡
│ Genetics_0 │          0.3846 │
├────────────┼─────────────────┤
│ Genetics_1 │          0.6154 │
╘════════════╧═════════════════╛


We can find out the most probable state for a variable

In [45]:
olympic_infer.map_query(variables=['Genetics'])

  phi.values = phi.values[slice_]
  phi1.values = phi1.values[slice_]


{'Genetics': 1}

In [46]:
olympic_infer.map_query(variables=['Offer'])

  phi.values = phi.values[slice_]
  phi1.values = phi1.values[slice_]


{'Offer': 0}

In [47]:
olympic_infer.map_query(variables=['OlympicTrials'])

  phi.values = phi.values[slice_]
  phi1.values = phi1.values[slice_]


{'OlympicTrials': 0}