In [3]:
pip install networkx

Collecting networkx
  Using cached https://files.pythonhosted.org/packages/41/8f/dd6a8e85946def36e4f2c69c84219af0fa5e832b018c970e92f2ad337e45/networkx-2.4-py3-none-any.whl
Installing collected packages: networkx
Successfully installed networkx-2.4
Note: you may need to restart the kernel to use updated packages.


In [4]:
import numpy as np
import pandas as pd
import networkx as nx
import sklearn as sk
import matplotlib.pyplot as plt

In [30]:
observable_states = ['foreground usage', 'background usage', 'service', 'no usage']
state_probabilities = [0.5, 0.3, 0.1, 0.1]
state_space = pd.Series(state_probabilities, index=states, name='observable_states')
print(state_space)
print(state_space.sum())

foreground usage    0.5
background usage    0.3
service             0.1
no usage            0.1
Name: observable_states, dtype: float64
1.0


In [31]:
transition_matrix = pd.DataFrame(columns=states, index=states)
transition_matrix.loc[states[0]] = [0.2, 0.3, 0.4, 0.1]
transition_matrix.loc[states[1]] = [0.45, 0.4, 0.1, 0.05]
transition_matrix.loc[states[2]] = [0.4, 0.2, 0.3, 0.1]
transition_matrix.loc[states[3]] = [0.2, 0.7, 0.1, 0.2]
print(transition_matrix)

                 foreground usage background usage service no usage
foreground usage              0.2              0.3     0.4      0.1
background usage             0.45              0.4     0.1     0.05
service                       0.4              0.2     0.3      0.1
no usage                      0.2              0.7     0.1      0.2


In [8]:
pip install pprint

Collecting pprint
  Downloading https://files.pythonhosted.org/packages/99/12/b6383259ef85c2b942ab9135f322c0dce83fdca8600d87122d2b0181451f/pprint-0.1.tar.gz
Building wheels for collected packages: pprint
  Building wheel for pprint (setup.py): started
  Building wheel for pprint (setup.py): finished with status 'done'
  Created wheel for pprint: filename=pprint-0.1-cp37-none-any.whl size=1255 sha256=72bfb99c4e9aa06ac733367252a2f47dea000566f7ed21517a91bd6ce7cbc57c
  Stored in directory: C:\Users\aisha\AppData\Local\pip\Cache\wheels\42\d4\c6\16a6495aecc1bda5d5857bd036efd50617789ba9bea4a05124
Successfully built pprint
Installing collected packages: pprint
Successfully installed pprint-0.1
Note: you may need to restart the kernel to use updated packages.


In [27]:
from pprint import pprint
def get_markov_edges(transition_df):
    edges = {}
    for column in transition_df.columns:
        for index in transition_df.index:
            edges[(index, column)] = transition_df.loc[index, column]
    return edges

edge_weights = get_markov_edges(transition_matrix)
pprint(edge_weights)

{('background usage', 'background usage'): 0.4,
 ('background usage', 'foreground usage'): 0.45,
 ('background usage', 'no usage'): 0.05,
 ('background usage', 'service'): 0.1,
 ('foreground usage', 'background usage'): 0.3,
 ('foreground usage', 'foreground usage'): 0.2,
 ('foreground usage', 'no usage'): 0.1,
 ('foreground usage', 'service'): 0.4,
 ('no usage', 'background usage'): 0.7,
 ('no usage', 'foreground usage'): 0.2,
 ('no usage', 'no usage'): 0.2,
 ('no usage', 'service'): 0.1,
 ('service', 'background usage'): 0.2,
 ('service', 'foreground usage'): 0.4,
 ('service', 'no usage'): 0.1,
 ('service', 'service'): 0.3}


In [14]:
pip install pydot

Collecting pydot
  Downloading https://files.pythonhosted.org/packages/33/d1/b1479a770f66d962f545c2101630ce1d5592d90cb4f083d38862e93d16d2/pydot-1.4.1-py2.py3-none-any.whl
Installing collected packages: pydot
Successfully installed pydot-1.4.1
Note: you may need to restart the kernel to use updated packages.


In [28]:
G = nx.MultiDiGraph()

G.add_nodes_from(states)
print(f'Nodes:\n{G.nodes()}\n')

for k, v in edge_weights.items():
    tmp_origin, tmp_destination = k[0], k[1]
    G.add_edge(tmp_origin, tmp_destination, weight=v, label=v)
print(f'Edges:')
pprint(G.edges(data=True))

Nodes:
['foreground usage', 'background usage', 'service', 'no usage']

Edges:
OutMultiEdgeDataView([('foreground usage', 'foreground usage', {'weight': 0.2, 'label': 0.2}), ('foreground usage', 'background usage', {'weight': 0.3, 'label': 0.3}), ('foreground usage', 'service', {'weight': 0.4, 'label': 0.4}), ('foreground usage', 'no usage', {'weight': 0.1, 'label': 0.1}), ('background usage', 'foreground usage', {'weight': 0.45, 'label': 0.45}), ('background usage', 'background usage', {'weight': 0.4, 'label': 0.4}), ('background usage', 'service', {'weight': 0.1, 'label': 0.1}), ('background usage', 'no usage', {'weight': 0.05, 'label': 0.05}), ('service', 'foreground usage', {'weight': 0.4, 'label': 0.4}), ('service', 'background usage', {'weight': 0.2, 'label': 0.2}), ('service', 'service', {'weight': 0.3, 'label': 0.3}), ('service', 'no usage', {'weight': 0.1, 'label': 0.1}), ('no usage', 'foreground usage', {'weight': 0.2, 'label': 0.2}), ('no usage', 'background usage', {'weight

In [32]:
hidden_states = ['unproductive', 'productive', 'semiproductive']
init_probabilities = [0.35, 0.35, 0.3]
hidden_state_space = pd.Series(init_probabilities, index=hidden_states, name='states')
print(hidden_state_space)
print('\n', hidden_state_space.sum())

unproductive      0.35
productive        0.35
semiproductive    0.30
Name: states, dtype: float64

 1.0


In [35]:
q_df = pd.DataFrame(columns=hidden_states, index=hidden_states)
q_df.loc[hidden_states[0]] = [0.4, 0.2, 0.4]
q_df.loc[hidden_states[1]] = [0.45, 0.45, 0.1]
q_df.loc[hidden_states[2]] = [0.45, 0.25, .3]

print(q_df)

q = q_df.values
print('\n', q, q.shape, '\n')
print(q_df.sum(axis=1))

               unproductive productive semiproductive
unproductive            0.4        0.2            0.4
productive             0.45       0.45            0.1
semiproductive         0.45       0.25            0.3

 [[0.4 0.2 0.4]
 [0.45 0.45 0.1]
 [0.45 0.25 0.3]] (3, 3) 

unproductive      1.0
productive        1.0
semiproductive    1.0
dtype: float64


In [38]:
b_df = pd.DataFrame(columns=observable_states, index=hidden_states)
b_df.loc[hidden_states[0]] = [0.2, 0.5, 0.2, 0.1]
b_df.loc[hidden_states[1]] = [0.4, 0.1, 0.3, 0.2]
b_df.loc[hidden_states[2]] = [0.1, 0.6, 0.1, 0.2]

print(b_df)

b = b_df.values
print('\n', b, b.shape, '\n')
print(b_df.sum(axis=1))

               foreground usage background usage service no usage
unproductive                0.2              0.5     0.2      0.1
productive                  0.4              0.1     0.3      0.2
semiproductive              0.1              0.6     0.1      0.2

 [[0.2 0.5 0.2 0.1]
 [0.4 0.1 0.3 0.2]
 [0.1 0.6 0.1 0.2]] (3, 4) 

unproductive      1.0
productive        1.0
semiproductive    1.0
dtype: float64


In [40]:
hide_edges_wts = get_markov_edges(q_df)
pprint(hide_edges_wts)

emit_edges_wts = get_markov_edges(b_df)
pprint(emit_edges_wts)

{('productive', 'productive'): 0.45,
 ('productive', 'semiproductive'): 0.1,
 ('productive', 'unproductive'): 0.45,
 ('semiproductive', 'productive'): 0.25,
 ('semiproductive', 'semiproductive'): 0.3,
 ('semiproductive', 'unproductive'): 0.45,
 ('unproductive', 'productive'): 0.2,
 ('unproductive', 'semiproductive'): 0.4,
 ('unproductive', 'unproductive'): 0.4}
{('productive', 'background usage'): 0.1,
 ('productive', 'foreground usage'): 0.4,
 ('productive', 'no usage'): 0.2,
 ('productive', 'service'): 0.3,
 ('semiproductive', 'background usage'): 0.6,
 ('semiproductive', 'foreground usage'): 0.1,
 ('semiproductive', 'no usage'): 0.2,
 ('semiproductive', 'service'): 0.1,
 ('unproductive', 'background usage'): 0.5,
 ('unproductive', 'foreground usage'): 0.2,
 ('unproductive', 'no usage'): 0.1,
 ('unproductive', 'service'): 0.2}


In [42]:
G = nx.MultiDiGraph()

# nodes correspond to states
G.add_nodes_from(hidden_states)
print(f'Nodes:\n{G.nodes()}\n')

# edges represent hidden probabilities
for k, v in hide_edges_wts.items():
    tmp_origin, tmp_destination = k[0], k[1]
    G.add_edge(tmp_origin, tmp_destination, weight=v, label=v)

# edges represent emission probabilities
for k, v in emit_edges_wts.items():
    tmp_origin, tmp_destination = k[0], k[1]
    G.add_edge(tmp_origin, tmp_destination, weight=v, label=v)
    
print(f'Edges:')
pprint(G.edges(data=True))    

Nodes:
['unproductive', 'productive', 'semiproductive']

Edges:
OutMultiEdgeDataView([('unproductive', 'unproductive', {'weight': 0.4, 'label': 0.4}), ('unproductive', 'productive', {'weight': 0.2, 'label': 0.2}), ('unproductive', 'semiproductive', {'weight': 0.4, 'label': 0.4}), ('unproductive', 'foreground usage', {'weight': 0.2, 'label': 0.2}), ('unproductive', 'background usage', {'weight': 0.5, 'label': 0.5}), ('unproductive', 'service', {'weight': 0.2, 'label': 0.2}), ('unproductive', 'no usage', {'weight': 0.1, 'label': 0.1}), ('productive', 'unproductive', {'weight': 0.45, 'label': 0.45}), ('productive', 'productive', {'weight': 0.45, 'label': 0.45}), ('productive', 'semiproductive', {'weight': 0.1, 'label': 0.1}), ('productive', 'foreground usage', {'weight': 0.4, 'label': 0.4}), ('productive', 'background usage', {'weight': 0.1, 'label': 0.1}), ('productive', 'service', {'weight': 0.3, 'label': 0.3}), ('productive', 'no usage', {'weight': 0.2, 'label': 0.2}), ('semiproductive

In [44]:
obs_map = {'foreground usage':0, 'background usage':1, 'service':2, 'no usage':3}
obs = np.array([1,1,2,3,1,3,3,3,0,1,1,1,2,3,3,1,0,3,2,2,3,3,0,1,0,1])
inv_obs_map = dict((v,k) for k, v in obs_map.items())
obs_seq = [inv_obs_map[v] for v in list(obs)]

print( pd.DataFrame(np.column_stack([obs, obs_seq]), 
                columns=['observation_code', 'observation_seq']) )

   observation_code   observation_seq
0                 1  background usage
1                 1  background usage
2                 2           service
3                 3          no usage
4                 1  background usage
5                 3          no usage
6                 3          no usage
7                 3          no usage
8                 0  foreground usage
9                 1  background usage
10                1  background usage
11                1  background usage
12                2           service
13                3          no usage
14                3          no usage
15                1  background usage
16                0  foreground usage
17                3          no usage
18                2           service
19                2           service
20                3          no usage
21                3          no usage
22                0  foreground usage
23                1  background usage
24                0  foreground usage
25          