In [1]:
from IPython.display import display, Math, Latex,HTML
%matplotlib inline
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb
import pyAgrum.lib.dynamicBN as gdyn
import pyAgrum.causal as csl
import pyAgrum.causal.notebook as cslnb
import os
import math
import numpy as np
import pandas as pd
import scipy.stats
import matplotlib.pyplot as plt
from glob import glob


# Propoagation Network

In [2]:
model = gum.BayesNet()

statusLabel = ['Born in Canada', 'Immigrants, landed 5 or less years earlier',
               'Immigrants, landed more than 10 years earlier',
               'Immigrants, landed more than 5 to 10 years earlier']

model.add(gum.LabelizedVariable('Status','Status',statusLabel)) 

educationLabel = ['0 to 8  years', 'Some high school',
       'High school graduate', 'Some postsecondary',
       'Postsecondary certificate or diploma', 'University degree']

model.add(gum.LabelizedVariable('Education','Education',educationLabel)) 
model.add(gum.NumericalDiscreteVariable('Employment Rate','Employment Rate',range(0,101))) 

df = pd.read_csv("demographics.csv")

df2 = df.groupby("Country of origin").sum()
df2 = df2.sort_values(by=["Total"],ascending=False)[:11]
df2 = df2.sort_values(by = [ "Country of origin", "Year"])

countryLabel = list(df2.index)

for index in range(len(countryLabel)):
    if countryLabel[index] == "Unknown ":
        del countryLabel[index]
        break

df = df[df["Country of origin"].isin(countryLabel)]
df = df.sort_values(by = [ "Country of origin", "Year"])

ageLabel = ["0 - 4",
            "5 - 11",
            "12 - 17",
            "18 - 59",
            "60",
            "other"]

sexLabel = ["Female", "Male"]

model.add(gum.LabelizedVariable('Origin','Origin',countryLabel))
model.add(gum.LabelizedVariable('Age','Age',ageLabel))
model.add(gum.LabelizedVariable('Sex','Sex',sexLabel))

initYear = 2015
yearLabel = [str(i+initYear) for i in range(8)]

model.add(gum.LabelizedVariable('Year','Year',yearLabel)) 

nodeList = ["Economic",
            "Sponsor",
            "Refugee",
           ]

maxRange = 500
bins={n:[i for i in range(maxRange)] for n in nodeList}

provinceLabel = ["Newfoundland and Labrador",
                "Prince Edward Island",
                "Nova Scotia",
                "New Brunswick",
                "Quebec",
                "Ontario",
                "Manitoba",
                "Saskatchewan",
                "Alberta",
                "British Columbia",
                "Yukon",
                "Northwest Territories",
                "Nunavut"]

provinceLabel = provinceLabel[:-3]
model.add(gum.LabelizedVariable('Province','Province',provinceLabel))

rangeNode = {"Economic": 150,
             "Sponsor": 70,
             "Refugee": 50,
            }

coeff = {"Economic": 100,
         "Sponsor": 100,
         "Refugee": 100,
        }
for n in nodeList:
    bins[n] = bins[n][:rangeNode[n]]
        
    model.add(gum.NumericalDiscreteVariable(n,n,bins[n]))
    for i in range(len(bins[n])):
        bins[n][i] = coeff[n]*bins[n][i]
      
    bins[n].insert(0,0)
    bins[n][-1] = 1000000

model.addArc('Year','Origin')
model.addArc('Origin','Age')
model.addArc('Origin','Sex')

model.addArc('Province','Employment Rate')
model.addArc('Sex','Employment Rate')
model.addArc('Education','Employment Rate')
model.addArc('Year','Employment Rate')
model.addArc('Status','Employment Rate')

model.addArc('Province','Sponsor')
model.addArc('Province','Refugee')
model.addArc('Province','Economic')

model.addArc('Year','Sponsor')
model.addArc('Year','Refugee')
model.addArc('Year','Economic')

model.addArc('Year','Province')
model.addArc('Origin','Province')

model
