### Probabilistic Graphical Models

In [67]:
from pgmpy.independencies import IndependenceAssertion

assertion1 = IndependenceAssertion('X','Y')
assertion1

(X _|_ Y)

In [68]:
assertion2 = IndependenceAssertion('X','Y','Z')
assertion2

(X _|_ Y | Z)

In [69]:
assertion2 = IndependenceAssertion('X',['Y','Z'],['A','B'])
assertion2

(X _|_ Z, Y | A, B)

#### Ch. 5 Learning

In [70]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator

raw_data = np.random.randint(low=0, high=2, size=(100, 2))
print( type(raw_data) )
print(raw_data)


<class 'numpy.ndarray'>
[[1 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 0]
 [0 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [1 1]
 [0 1]
 [0 1]
 [0 1]
 [1 1]
 [1 1]
 [0 1]
 [0 1]
 [1 1]
 [0 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [0 1]
 [1 1]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [1 1]
 [1 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [0 1]
 [1 0]
 [0 0]
 [0 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [1 1]
 [0 0]
 [1 1]
 [1 0]
 [0 1]
 [1 1]
 [0 1]
 [0 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]]


In [71]:
data = pd.DataFrame(raw_data, columns=['X','Y'])
print( type(data) )
print(data)

<class 'pandas.core.frame.DataFrame'>
    X  Y
0   1  0
1   0  0
2   1  1
3   0  0
4   1  1
5   1  1
6   1  0
7   0  0
8   1  0
9   1  1
10  0  0
11  1  0
12  0  1
13  0  0
14  1  1
15  0  1
16  0  0
17  0  0
18  1  0
19  0  0
20  0  0
21  1  0
22  1  1
23  1  1
24  1  1
25  0  1
26  1  0
27  1  0
28  1  0
29  1  0
.. .. ..
70  0  1
71  1  0
72  0  0
73  0  1
74  1  1
75  1  0
76  1  1
77  1  1
78  1  0
79  0  0
80  1  1
81  1  1
82  0  0
83  0  1
84  1  0
85  1  0
86  1  0
87  0  1
88  1  1
89  0  0
90  1  1
91  1  0
92  0  1
93  1  1
94  0  1
95  0  1
96  1  1
97  1  1
98  1  0
99  0  0

[100 rows x 2 columns]


In [72]:
# Two coin tossing model assuming they are independent
coin_model = BayesianModel( [('X','Y')])
coin_model.fit(data, estimator=MaximumLikelihoodEstimator)
cpd_x = coin_model.get_cpds('X')
print(cpd_x)
cpd_y = coin_model.get_cpds('Y')
print(cpd_y)

╒══════════╤══════╕
│ ['X', 0] │ 0.59 │
├──────────┼──────┤
│ ['X', 1] │ 0.41 │
╘══════════╧══════╛
╒══════════╤═════════════════════╤════════════════════╕
│ X        │ ['X', '0']          │ ['X', '1']         │
├──────────┼─────────────────────┼────────────────────┤
│ ['Y', 0] │ 0.43902439024390244 │ 0.4067796610169492 │
├──────────┼─────────────────────┼────────────────────┤
│ ['Y', 1] │ 0.5609756097560976  │ 0.5932203389830508 │
╘══════════╧═════════════════════╧════════════════════╛


In [73]:
# late for school model

raw_data = np.random.randint(low=0, high=2, size=(100, 6))
data = pd.DataFrame(raw_data, columns=['A', 'R', 'J', 'G', 'L', 'Q'])
student_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')])
student_model.fit(data, estimator=MaximumLikelihoodEstimator)
student_model.get_cpds()

[<TabularCPD representing P(L:2 | G:2, J:2) at 0x111036550>,
 <TabularCPD representing P(A:2) at 0x111036518>,
 <TabularCPD representing P(R:2) at 0x111036588>,
 <TabularCPD representing P(Q:2 | J:2) at 0x1110362e8>,
 <TabularCPD representing P(G:2) at 0x111036630>,
 <TabularCPD representing P(J:2 | A:2, R:2) at 0x111035518>]

In [74]:
# Bayesian approach for late for school model
from pgmpy.estimators import BayesianEstimator

# Generating some random data
raw_data = np.random.randint(low=0, high=2, size=(1000, 6))
print(raw_data)

[[1 0 0 0 1 0]
 [0 1 0 0 0 0]
 [1 0 1 1 1 1]
 ..., 
 [0 1 0 0 1 0]
 [0 1 1 0 1 0]
 [0 1 1 1 0 1]]


In [89]:
data = pd.DataFrame(raw_data, columns=['A', 'R', 'J', 'G', 'L', 'Q'])
student_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')])
student_model.fit(data, estimator=BayesianEstimator)
print( student_model.get_cpds() )

print(student_model.get_cpds('A'))

print(student_model.get_independencies())

print(student_model.nodes())

print(student_model.edges())

ValueError: Shape of passed values is (2, 1000), indices imply (6, 1000)

#### Learning model structure

In [94]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator

# Generating random data
raw_data = np.random.randint(low=0, high=2, size=(1000, 2))
data = pd.DataFrame(raw_data, columns=['X', 'Y'])
coin_model = BayesianModel()
coin_model.fit(data, estimator=MaximumLikelihoodEstimator)

print(coin_model.get_cpds())

print(coin_model.get_cpds('X'))
print(coin_model.get_cpds('Y'))

print(coin_model.nodes())

print(coin_model.edges())

[<TabularCPD representing P(Y:2 | X:2) at 0x111067898>, <TabularCPD representing P(X:2) at 0x1110676d8>]
╒══════════╤═══════╕
│ ['X', 0] │ 0.522 │
├──────────┼───────┤
│ ['X', 1] │ 0.478 │
╘══════════╧═══════╛
╒══════════╤════════════════════╤════════════════════╕
│ X        │ ['X', '0']         │ ['X', '1']         │
├──────────┼────────────────────┼────────────────────┤
│ ['Y', 0] │ 0.4827586206896552 │ 0.5209205020920502 │
├──────────┼────────────────────┼────────────────────┤
│ ['Y', 1] │ 0.5172413793103449 │ 0.4790794979079498 │
╘══════════╧════════════════════╧════════════════════╛
['Y', 'X']
[('X', 'Y')]


In [95]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator

# Generating random data
raw_data = np.random.randint(low=0, high=2, size=(1000, 3))
data = pd.DataFrame(raw_data, columns=['X', 'Y', 'Z'])
coin_model = BayesianModel()
coin_model.fit(data, estimator=MaximumLikelihoodEstimator)

print(coin_model.get_cpds())

print(coin_model.get_cpds('X'))
print(coin_model.get_cpds('Y'))
print(coin_model.get_cpds('Z'))

print(coin_model.nodes())

print(coin_model.edges())

[<TabularCPD representing P(Z:2 | Y:2, X:2) at 0x11106e710>, <TabularCPD representing P(Y:2 | X:2) at 0x11106e550>, <TabularCPD representing P(X:2) at 0x11106e668>]
╒══════════╤═══════╕
│ ['X', 0] │ 0.536 │
├──────────┼───────┤
│ ['X', 1] │ 0.464 │
╘══════════╧═══════╛
╒══════════╤════════════════════╤════════════════════╕
│ X        │ ['X', '0']         │ ['X', '1']         │
├──────────┼────────────────────┼────────────────────┤
│ ['Y', 0] │ 0.5323275862068966 │ 0.4832089552238806 │
├──────────┼────────────────────┼────────────────────┤
│ ['Y', 1] │ 0.4676724137931034 │ 0.5167910447761194 │
╘══════════╧════════════════════╧════════════════════╛
╒══════════╤════════════════════╤═════════════════════╤════════════════════╤═════════════════════╕
│ X        │ ['X', '0']         │ ['X', '0']          │ ['X', '1']         │ ['X', '1']          │
├──────────┼────────────────────┼─────────────────────┼────────────────────┼─────────────────────┤
│ Y        │ ['Y', '0']         │ ['Y', '1']    

#### Bayesian score

In [98]:
# Generating random data
raw_data = np.random.randint(low=0, high=2, size=(1000, 3))
data = pd.DataFrame(raw_data, columns=['X', 'Y', 'Z'])
coin_model = BayesianModel()
coin_model.fit(data, estimator=MaximumLikelihoodEstimator)

print(coin_model.get_cpds())

print(coin_model.get_cpds('X'))
print(coin_model.get_cpds('Y'))
print(coin_model.get_cpds('Z'))

print(coin_model.nodes())

print(coin_model.edges())

[<TabularCPD representing P(Z:2 | Y:2, X:2) at 0x11106eb70>, <TabularCPD representing P(Y:2 | X:2) at 0x11106ea20>, <TabularCPD representing P(X:2) at 0x11106ea58>]
╒══════════╤══════╕
│ ['X', 0] │ 0.53 │
├──────────┼──────┤
│ ['X', 1] │ 0.47 │
╘══════════╧══════╛
╒══════════╤════════════════════╤═════════════════════╕
│ X        │ ['X', '0']         │ ['X', '1']          │
├──────────┼────────────────────┼─────────────────────┤
│ ['Y', 0] │ 0.5042553191489362 │ 0.5264150943396226  │
├──────────┼────────────────────┼─────────────────────┤
│ ['Y', 1] │ 0.4957446808510638 │ 0.47358490566037736 │
╘══════════╧════════════════════╧═════════════════════╛
╒══════════╤═════════════════════╤════════════════════╤═════════════════════╤═════════════════════╕
│ X        │ ['X', '0']          │ ['X', '0']         │ ['X', '1']          │ ['X', '1']          │
├──────────┼─────────────────────┼────────────────────┼─────────────────────┼─────────────────────┤
│ Y        │ ['Y', '0']          │ ['Y', '1

In [99]:
# late for school model
raw_data = np.random.randint(low=0, high=2, size=(1000, 6))
data = pd.DataFrame(raw_data, columns=['A', 'R', 'J', 'G', 'L', 'Q'])
#student_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')])
student_model = BayesianModel()
student_model.fit(data, estimator=BayesianEstimator)
student_model.get_cpds()

[<TabularCPD representing P(L:2 | A:2, J:2, R:2, G:2) at 0x110708cc0>,
 <TabularCPD representing P(A:2) at 0x102687d68>,
 <TabularCPD representing P(Q:2 | L:2, A:2, J:2, R:2, G:2) at 0x110771198>,
 <TabularCPD representing P(R:2 | A:2) at 0x110525748>,
 <TabularCPD representing P(G:2 | A:2, R:2, J:2) at 0x110525cf8>,
 <TabularCPD representing P(J:2 | A:2, R:2) at 0x1105257b8>]

In [100]:
student_model.nodes()

['L', 'A', 'Q', 'R', 'G', 'J']

In [101]:
student_model.edges()

[('L', 'Q'),
 ('A', 'L'),
 ('A', 'G'),
 ('A', 'J'),
 ('A', 'R'),
 ('A', 'Q'),
 ('R', 'L'),
 ('R', 'G'),
 ('R', 'J'),
 ('R', 'Q'),
 ('G', 'L'),
 ('G', 'Q'),
 ('J', 'L'),
 ('J', 'G'),
 ('J', 'Q')]