<a href="https://colab.research.google.com/github/msbeeman/Causal/blob/main/Directed_Markov_Properties.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pgmpy

In [3]:
import pandas as pd
import numpy as np
from pgmpy.base.DAG import DAG
from pgmpy.estimators.CITests import chi_square
from pgmpy.independencies import IndependenceAssertion

In [63]:
G = DAG()
G.add_edges_from(
    [
      ('A','E'),
      ('S','E'),
      ('E','O'),
      ('E','R'),
      ('O','T'),
      ('R','T')
    ]
)

In [64]:
N = 30
transportation_survey_link = '/content/sample_data/transportation_survey.csv'
data = pd.read_csv(transportation_survey_link).iloc[:N]
data.head()

Unnamed: 0,A,S,E,O,R,T
0,adult,F,high,emp,small,train
1,young,M,high,emp,big,car
2,adult,M,uni,emp,big,other
3,old,F,uni,emp,big,car
4,young,F,uni,emp,big,car


In [65]:
# D-Separations
dseps = G.get_independencies()
print(dseps)

(R ⟂ S, A, O | E)
(R ⟂ A, O | E, S)
(R ⟂ S, O | E, A)
(R ⟂ S, A | E, O)
(R ⟂ S, A | T, E)
(R ⟂ O | E, S, A)
(R ⟂ A | E, S, O)
(R ⟂ A | E, T, S)
(R ⟂ S | E, A, O)
(R ⟂ S | T, E, A)
(R ⟂ S, A | T, E, O)
(R ⟂ A | E, T, S, O)
(R ⟂ S | T, E, A, O)
(S ⟂ A)
(S ⟂ R, T, O | E)
(S ⟂ T | R, O)
(S ⟂ T, O | R, E)
(S ⟂ R, T, O | E, A)
(S ⟂ R, T | E, O)
(S ⟂ R, O | T, E)
(S ⟂ T | R, A, O)
(S ⟂ T, O | R, E, A)
(S ⟂ T | R, E, O)
(S ⟂ O | R, T, E)
(S ⟂ R, T | E, A, O)
(S ⟂ R, O | T, E, A)
(S ⟂ R | T, E, O)
(S ⟂ T | R, E, A, O)
(S ⟂ O | R, T, E, A)
(S ⟂ R | T, E, A, O)
(A ⟂ S)
(A ⟂ R, T, O | E)
(A ⟂ T | R, O)
(A ⟂ T, O | R, E)
(A ⟂ R, T, O | E, S)
(A ⟂ R, T | E, O)
(A ⟂ R, O | T, E)
(A ⟂ T | R, S, O)
(A ⟂ T, O | R, E, S)
(A ⟂ T | R, E, O)
(A ⟂ O | R, T, E)
(A ⟂ R, T | E, S, O)
(A ⟂ R, O | E, T, S)
(A ⟂ R | T, E, O)
(A ⟂ T | R, E, S, O)
(A ⟂ O | R, T, E, S)
(A ⟂ R | E, T, S, O)
(O ⟂ R, S, A | E)
(O ⟂ S, A | R, E)
(O ⟂ R, A | E, S)
(O ⟂ R, S | E, A)
(O ⟂ S, A | T, E)
(O ⟂ A | R, E, S)
(O ⟂ S | R, E, A)
(O 

In [66]:
#Chi-squared tests for independence
significance = .01

def test_dsep(dsep: IndependenceAssertion):
  test_outputs = []
  for X in list(dsep.get_assertion()[0]):
    for Y in list(dsep.get_assertion()[1]):
      Z = list(dsep.get_assertion()[2])
      test_result = chi_square(X=X, Y=Y, Z=Z, data=data, boolean=True, significance_level=significance)
      test_outputs.append((IndependenceAssertion(X, Y, Z), test_result))
  return test_outputs

In [67]:
results = [test_dsep(dsep) for dsep in dseps.get_assertions()]
results_flat = [item for sublist in results for item in sublist]
results_flat

[((R ⟂ S | E), True),
 ((R ⟂ A | E), True),
 ((R ⟂ O | E), False),
 ((R ⟂ A | E, S), True),
 ((R ⟂ O | E, S), False),
 ((R ⟂ S | E, A), True),
 ((R ⟂ O | E, A), False),
 ((R ⟂ S | E, O), True),
 ((R ⟂ A | E, O), True),
 ((R ⟂ S | T, E), True),
 ((R ⟂ A | T, E), True),
 ((R ⟂ O | A, E, S), False),
 ((R ⟂ A | E, S, O), True),
 ((R ⟂ A | T, E, S), True),
 ((R ⟂ S | E, A, O), True),
 ((R ⟂ S | T, E, A), True),
 ((R ⟂ S | T, E, O), True),
 ((R ⟂ A | T, E, O), True),
 ((R ⟂ A | T, E, S, O), True),
 ((R ⟂ S | T, E, A, O), True),
 ((S ⟂ A), True),
 ((S ⟂ R | E), True),
 ((S ⟂ T | E), True),
 ((S ⟂ O | E), True),
 ((S ⟂ T | R, O), True),
 ((S ⟂ T | R, E), True),
 ((S ⟂ O | R, E), True),
 ((S ⟂ R | E, A), True),
 ((S ⟂ T | E, A), True),
 ((S ⟂ O | E, A), True),
 ((S ⟂ R | E, O), True),
 ((S ⟂ T | E, O), True),
 ((S ⟂ R | T, E), True),
 ((S ⟂ O | T, E), True),
 ((S ⟂ T | R, A, O), True),
 ((S ⟂ T | R, E, A), True),
 ((S ⟂ O | R, E, A), True),
 ((S ⟂ T | R, E, O), True),
 ((S ⟂ O | R, T, E), True)

In [68]:
results = {k: v for k, v in results_flat}
print(results)

{(R ⟂ S | E): True, (R ⟂ A | E): True, (R ⟂ O | E): False, (R ⟂ A | E, S): True, (R ⟂ O | E, S): False, (R ⟂ S | E, A): True, (R ⟂ O | E, A): False, (R ⟂ S | E, O): True, (R ⟂ A | E, O): True, (R ⟂ S | T, E): True, (R ⟂ A | T, E): True, (R ⟂ O | A, E, S): False, (R ⟂ A | E, S, O): True, (R ⟂ A | T, E, S): True, (R ⟂ S | E, A, O): True, (R ⟂ S | T, E, A): True, (R ⟂ S | T, E, O): True, (R ⟂ A | T, E, O): True, (R ⟂ A | T, E, S, O): True, (R ⟂ S | T, E, A, O): True, (S ⟂ A): True, (S ⟂ T | E): True, (S ⟂ O | E): True, (S ⟂ T | R, O): True, (S ⟂ T | R, E): True, (S ⟂ O | R, E): True, (S ⟂ T | E, A): True, (S ⟂ O | E, A): True, (S ⟂ T | E, O): True, (S ⟂ O | T, E): True, (S ⟂ T | R, A, O): True, (S ⟂ T | R, E, A): True, (S ⟂ O | R, E, A): True, (S ⟂ T | R, E, O): True, (S ⟂ O | R, T, E): True, (S ⟂ T | E, A, O): True, (S ⟂ O | T, E, A): True, (S ⟂ T | R, E, A, O): True, (S ⟂ O | R, T, E, A): True, (A ⟂ T | E): True, (A ⟂ O | E): True, (A ⟂ T | R, O): True, (A ⟂ T | R, E): True, (A ⟂ O | R,

In [69]:
boolean_results = list(results.values())
numTrues = len([x for x in boolean_results if x == True])
numFalses = len([x for x in boolean_results if x == False])

# As N increases, the number of passing tests increases and the number of failing tests decreases.
print(f'For N={N}, out of {len(results)} tests, {numTrues} passed as True and {numFalses} as false.')



For N=30, out of 61 tests, 57 passed as True and 4 as false.
