# INF-2600-1 24V Artificial Intelligence: Assignment 3 Pre-code
This code implements a Bayesian Network model for Analyzing Sensor Data for Weather Prediction using the pgmpy library in Python.


### BUILDING THE STRUCTURE OF BAYESIAN NETWORK: Using PgmPy
#### Install Package: `!pip install pgmpy`

In [986]:
pip install pgmpy

Note: you may need to restart the kernel to use updated packages.


In [987]:
# Including the necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.estimators import MaximumLikelihoodEstimator

# Factoring the dataset

In [988]:
# Import data, make a copy of the original

df0 = pd.read_csv('precode/seattle-weather.csv')
dfc1 = df0.copy()
dfc1.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [989]:
# Get characteristics of dataset including columns with missing data as well:
dfc1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1461 entries, 0 to 1460
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   date           1461 non-null   object 
 1   precipitation  1461 non-null   float64
 2   temp_max       1461 non-null   float64
 3   temp_min       1461 non-null   float64
 4   wind           1461 non-null   float64
 5   weather        1461 non-null   object 
dtypes: float64(4), object(2)
memory usage: 68.6+ KB


In [990]:
# Checking the unique values in the 'weather' column
unique_fields = dfc1['weather'].unique()
print(unique_fields)

['drizzle' 'rain' 'sun' 'snow' 'fog']


In [991]:
dfc1.describe()

Unnamed: 0,precipitation,temp_max,temp_min,wind
count,1461.0,1461.0,1461.0,1461.0
mean,3.029432,16.439083,8.234771,3.241136
std,6.680194,7.349758,5.023004,1.437825
min,0.0,-1.6,-7.1,0.4
25%,0.0,10.6,4.4,2.2
50%,0.0,15.6,8.3,3.0
75%,2.8,22.2,12.2,4.0
max,55.9,35.6,18.3,9.5


In [992]:
# Put categorical varaibles in a list
categorical_lst = ['date','weather']
# Create a seperate & smaller dataframe for categorical variables
dfc2a = pd.DataFrame(dfc1, columns=categorical_lst, copy=True)
dfc2a.head()

Unnamed: 0,date,weather
0,2012-01-01,drizzle
1,2012-01-02,rain
2,2012-01-03,rain
3,2012-01-04,rain
4,2012-01-05,rain


In [993]:
# Put all continuous variables into a list
continuous_lst = ['precipitation', 'temp_max', 'temp_min', 'wind']
# Create a seperate & smaller dataframe for our chosen variables. Use 'copy=True' so changes wont affect original
dfc2b = pd.DataFrame(dfc1, columns=continuous_lst, copy=True)
dfc2b.head()

Unnamed: 0,precipitation,temp_max,temp_min,wind
0,0.0,12.8,5.0,4.7
1,10.9,10.6,2.8,4.5
2,0.8,11.7,7.2,2.3
3,20.3,12.2,5.6,4.7
4,1.3,8.9,2.8,6.1


### Create new dataframe

In [994]:
# Create new df with variables we want to work with:
new_cols = ['date', 'precipitation', 'temp_max', 'temp_min', 'wind', 'weather']

df = df0[new_cols]
# df.head()

In [995]:
# Let's show all columns with missing data as well:
df[df.isnull().any(axis=1)] # any missing data in columns
df.isnull().any()

date             False
precipitation    False
temp_max         False
temp_min         False
wind             False
weather          False
dtype: bool

In [996]:
num_stdv = 1

# Define the labels dictionary
labels = {

}

# Create bounds for continuous labels


df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


# Creating Bayes Nets

In [997]:
# Define the hierarchy
weather_model = BayesianNetwork([
    ('weather', 'precipitation'),
    ('weather', 'wind'),
    ('precipitation', 'temp_max'),
    ('wind', 'temp_min')
])

# Learn CPDs using the dataset
weather_model.fit(df, estimator=MaximumLikelihoodEstimator)

# You can now print the learned CPDs
for cpd in weather_model.get_cpds():
    print(cpd)

# And, the states for each variables

+------------------+-----------+
| weather(drizzle) | 0.0362765 |
+------------------+-----------+
| weather(fog)     | 0.0691307 |
+------------------+-----------+
| weather(rain)    | 0.438741  |
+------------------+-----------+
| weather(snow)    | 0.017796  |
+------------------+-----------+
| weather(sun)     | 0.438056  |
+------------------+-----------+
+---------------------+-----+--------------+
| weather             | ... | weather(sun) |
+---------------------+-----+--------------+
| precipitation(0.0)  | ... | 1.0          |
+---------------------+-----+--------------+
| precipitation(0.3)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(0.5)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(0.8)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(1.0)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(1.3)  | ... | 0.0          |
+------

In [998]:
# Calculate Probabilities

# Weather does not have any parents so all we need are the marginal probabilities of observing each weather type

# Joint Propabilities
# Create dict where key=parent, value=child
# Dictionary mapping parent nodes to their respective child nodes
# Create dict where key=parent, value=child based on your model's structure
var_dict = {
    'weather': ['precipitation', 'wind'],
    'precipitation': ['temp_max'],
    'wind': ['temp_min']
}

# Fetch conditional distributions from the model and store results in a list
cpd_lst = []
for parent, children in var_dict.items():
    for child in children:
        cpd = weather_model.get_cpds(child)
        cpd_lst.append(cpd)

# Now handle potential NaN values that might occur due to no data points for some conditions
for cpd in cpd_lst:
    if np.isnan(cpd.values).any():
        # Replace NaN values with equal probability (assuming uniform distribution if no data available)
        cpd.values[np.isnan(cpd.values)] = 1 / cpd.variable_card  # Use variable_card to spread probability evenly

# Output the CPDs for verification
for cpd in cpd_lst:
    print(cpd)

+---------------------+-----+--------------+
| weather             | ... | weather(sun) |
+---------------------+-----+--------------+
| precipitation(0.0)  | ... | 1.0          |
+---------------------+-----+--------------+
| precipitation(0.3)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(0.5)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(0.8)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(1.0)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(1.3)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(1.5)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(1.8)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(2.0)  | ... | 0.0          |
+---------------------+-----+--------------+
| precipitation(2.3)  | ... | 0.0          |
+---------

In [999]:
# Creating tabular conditional probability distribution
from pgmpy.factors.discrete import TabularCPD

# CPD for Weather (no parents)
cpd_weather = TabularCPD(variable='weather', variable_card=5,  # Example: 5 types of weather
                         values=[[0.20], [0.20], [0.20], [0.20], [0.20]],  # Equal probabilities
                         state_names={'weather': ['drizzle', 'rain', 'sun', 'snow', 'fog']})

# CPD for Precipitation (dependent on Weather)
cpd_precipitation = TabularCPD(variable='precipitation', variable_card=3,  # Low, Medium, High
                               values=[[0.3, 0.2, 0.5, 0.1, 0.1],
                                       [0.4, 0.3, 0.2, 0.6, 0.2],
                                       [0.3, 0.5, 0.3, 0.3, 0.7]],
                               evidence=['weather'],
                               evidence_card=[5],
                               state_names={'weather': ['drizzle', 'rain', 'sun', 'snow', 'fog'],
                                             'precipitation': ['low', 'medium', 'high']})

# CPD for Wind (dependent on Weather)
cpd_wind = TabularCPD(variable='wind', variable_card=3,  # Low, Medium, High
                      values=[[0.5, 0.3, 0.2, 0.1, 0.4],
                              [0.3, 0.4, 0.4, 0.5, 0.3],
                              [0.2, 0.3, 0.4, 0.4, 0.3]],
                      evidence=['weather'],
                      evidence_card=[5],
                      state_names={'weather': ['drizzle', 'rain', 'sun', 'snow', 'fog'],
                                   'wind': ['low', 'medium', 'high']})

# CPD for Temp_max (dependent on Precipitation)
cpd_temp_max = TabularCPD(variable='temp_max', variable_card=3,  # Assuming 3 states for simplicity
                          values=[
                              [0.3, 0.6, 0.1],  # Probabilities for temp_max given low, medium, high precipitation
                              [0.4, 0.3, 0.3],
                              [0.3, 0.1, 0.6]
                          ],
                          evidence=['precipitation'],  # Only precipitation as a parent
                          evidence_card=[3],           # Assuming 3 states for precipitation
                          state_names={
                              'temp_max': ['low', 'medium', 'high'],
                              'precipitation': ['low', 'medium', 'high']
                          })

# CPD for Temp_min (dependent on Wind)
cpd_temp_min = TabularCPD(variable='temp_min', variable_card=3,  # Low, Medium, High
                          values=[[0.4, 0.3, 0.3],
                                  [0.3, 0.4, 0.4],
                                  [0.3, 0.3, 0.3]],
                          evidence=['wind'],
                          evidence_card=[3],
                          state_names={'wind': ['low', 'medium', 'high'],
                                       'temp_min': ['low', 'medium', 'high']})

# Add CPDs to the model
#weather_model.add_cpds(cpd_weather, cpd_precipitation, cpd_wind, cpd_temp_max, cpd_temp_min)


In [1000]:
# Add CPDs and factors to the model

# Assuming the model is already created with its structure
model = BayesianNetwork([
    ('weather', 'precipitation'),
    ('weather', 'wind'),
    ('precipitation', 'temp_max'),
    ('wind', 'temp_min')
])

# Assume CPDs for 'weather', 'precipitation', 'wind', 'temp_max', 'temp_min' have been defined
# e.g., cpd_weather, cpd_precipitation, cpd_wind, cpd_temp_max, cpd_temp_min

# Adding CPDs to the model
model.add_cpds(cpd_weather, cpd_precipitation, cpd_wind, cpd_temp_max, cpd_temp_min)

# Check if the model is consistent
assert model.check_model(), "The model has inconsistencies."

# If the assertion passes without raising an assertion error, print a success message
print("Model is consistent and ready for use!")


Model is consistent and ready for use!


In [1001]:
# Viewing nodes of the model
weather_model.nodes()

NodeView(('weather', 'precipitation', 'wind', 'temp_max', 'temp_min'))

In [1002]:
# Viewing edges of the model
weather_model.edges()

OutEdgeView([('weather', 'precipitation'), ('weather', 'wind'), ('precipitation', 'temp_max'), ('wind', 'temp_min')])

In [1003]:
from pgmpy.factors.discrete import TabularCPD

# Defining CPD for the 'weather' node
weather_cpd = TabularCPD(variable='weather', variable_card=3, values=[[0.5], [0.3], [0.2]],
                         state_names={'weather': ['sunny', 'cloudy', 'rainy']})

# Assuming 'weather' affects 'wind', define CPD for the 'wind' node
# Let's say the wind can be either 'low' or 'high', and its distribution depends on the weather.
wind_cpd = TabularCPD(variable='wind', variable_card=2, 
                      values=[[0.7, 0.2, 0.1], [0.3, 0.8, 0.9]], 
                      evidence=['weather'], evidence_card=[3],
                      state_names={'weather': ['sunny', 'cloudy', 'rainy'],
                                   'wind': ['low', 'high']})

weather_cpd.normalize()
wind_cpd.normalize()

# Print the probability table of the weather node
print(weather_cpd)

# Print the probability table of the wind node
print(wind_cpd)

+-----------------+-----+
| weather(sunny)  | 0.5 |
+-----------------+-----+
| weather(cloudy) | 0.3 |
+-----------------+-----+
| weather(rainy)  | 0.2 |
+-----------------+-----+
+------------+----------------+-----------------+----------------+
| weather    | weather(sunny) | weather(cloudy) | weather(rainy) |
+------------+----------------+-----------------+----------------+
| wind(low)  | 0.7            | 0.2             | 0.1            |
+------------+----------------+-----------------+----------------+
| wind(high) | 0.3            | 0.8             | 0.9            |
+------------+----------------+-----------------+----------------+


In [1004]:
# Independcies in the model

# Checking independcies of a particular node


# Task 1.2

In [1005]:
from pgmpy.inference import VariableElimination

In [1006]:
# Question 1: (a) What is the probability of high wind when the weather is sunny? (b) What is the probability of sunny weather when the wind is high?
# Assuming your model and CPDs are defined as `model`
inference = VariableElimination(model)

# (a) Probability of high wind when the weather is sunny
prob_high_wind_given_sunny = inference.query(variables=['wind'], evidence={'weather': 'sun'})
print("Probability of high wind given sunny weather:")
print(prob_high_wind_given_sunny)

# (b) Probability of sunny weather when the wind is high
prob_sunny_given_high_wind = inference.query(variables=['weather'], evidence={'wind': 'high'})
print("Probability of sunny weather given high wind:")
print(prob_sunny_given_high_wind)

Probability of high wind given sunny weather:
+--------------+-------------+
| wind         |   phi(wind) |
| wind(low)    |      0.2000 |
+--------------+-------------+
| wind(medium) |      0.4000 |
+--------------+-------------+
| wind(high)   |      0.4000 |
+--------------+-------------+
Probability of sunny weather given high wind:
+------------------+----------------+
| weather          |   phi(weather) |
| weather(drizzle) |         0.1250 |
+------------------+----------------+
| weather(rain)    |         0.1875 |
+------------------+----------------+
| weather(sun)     |         0.2500 |
+------------------+----------------+
| weather(snow)    |         0.2500 |
+------------------+----------------+
| weather(fog)     |         0.1875 |
+------------------+----------------+


In [1007]:
# Question 2:
# (a) Calculate all the possible joint probability and determine the best probable condition. Explain your results?
# Calculate the joint probability distribution for precipitation, wind, and weather
joint_probability = inference.query(variables=['precipitation', 'wind', 'weather'], joint=True)
print(joint_probability)

# (b) What is the most probable condition for precipitation, wind and weather, combined?
# Find the maximum probability and its index
max_prob_index = np.argmax(joint_probability.values)
max_prob_value = np.max(joint_probability.values)

# Decode the index to the corresponding states
# Numpy's unravel_index method converts a flat index to an index tuple for the given shape
index_tuple = np.unravel_index(max_prob_index, joint_probability.values.shape)

# Map these indices to the state names using the state names defined in the CPD
most_probable_states = {
    'precipitation': joint_probability.variables[0],  # the variable order might need verification
    'wind': joint_probability.variables[1],
    'weather': joint_probability.variables[2]
}

most_probable_condition = {
    variable: joint_probability.state_names[variable][index]
    for variable, index in zip(joint_probability.variables, index_tuple)
}

print("Most probable condition:", most_probable_condition)
print("With probability:", max_prob_value)


+-----------------------+--------------+------------------+-----------------------------------+
| precipitation         | wind         | weather          |   phi(precipitation,wind,weather) |
| precipitation(low)    | wind(low)    | weather(drizzle) |                            0.0300 |
+-----------------------+--------------+------------------+-----------------------------------+
| precipitation(low)    | wind(low)    | weather(rain)    |                            0.0120 |
+-----------------------+--------------+------------------+-----------------------------------+
| precipitation(low)    | wind(low)    | weather(sun)     |                            0.0200 |
+-----------------------+--------------+------------------+-----------------------------------+
| precipitation(low)    | wind(low)    | weather(snow)    |                            0.0020 |
+-----------------------+--------------+------------------+-----------------------------------+
| precipitation(low)    | wind(low)    |

In [1008]:
# Question 3. Find the probability associated with each weather, given that the precipitation is medium? Explain your result.
# Query for the probability distribution of weather given that precipitation is medium
inference = VariableElimination(model)

# Query for the probability distribution of weather given that precipitation is 'medium'
prob_weather_given_precip_medium = inference.query(variables=['weather'], evidence={'precipitation': 'medium'})

# Print the result
print("Probability distribution of weather given that precipitation is medium:")
print(prob_weather_given_precip_medium)


Probability distribution of weather given that precipitation is medium:
+------------------+----------------+
| weather          |   phi(weather) |
| weather(drizzle) |         0.2353 |
+------------------+----------------+
| weather(rain)    |         0.1765 |
+------------------+----------------+
| weather(sun)     |         0.1176 |
+------------------+----------------+
| weather(snow)    |         0.3529 |
+------------------+----------------+
| weather(fog)     |         0.1176 |
+------------------+----------------+


In [1009]:
# Question 4. What is the probability of each weather condition given that precipitation is medium and wind is low or medium? Explain your method and results. How does the result change with the addition of wind factor compared to question 3 of Task 1.2?


# Assuming 'model' is your BayesianModel instance that's already set up
inference = VariableElimination(model)

# Query for the probability distribution of weather given that precipitation is 'medium' and wind is 'low'
prob_weather_given_precip_medium_wind_low = inference.query(variables=['weather'], 
                                                            evidence={'precipitation': 'medium', 'wind': 'low'})

# Query for the probability distribution of weather given that precipitation is 'medium' and wind is 'medium'
prob_weather_given_precip_medium_wind_medium = inference.query(variables=['weather'], 
                                                               evidence={'precipitation': 'medium', 'wind': 'medium'})

# Print the results
print("Probability of weather given precipitation is medium and wind is low:")
print(prob_weather_given_precip_medium_wind_low)

print("Probability of weather given precipitation is medium and wind is medium:")
print(prob_weather_given_precip_medium_wind_medium)


Probability of weather given precipitation is medium and wind is low:
+------------------+----------------+
| weather          |   phi(weather) |
| weather(drizzle) |         0.4255 |
+------------------+----------------+
| weather(rain)    |         0.1915 |
+------------------+----------------+
| weather(sun)     |         0.0851 |
+------------------+----------------+
| weather(snow)    |         0.1277 |
+------------------+----------------+
| weather(fog)     |         0.1702 |
+------------------+----------------+
Probability of weather given precipitation is medium and wind is medium:
+------------------+----------------+
| weather          |   phi(weather) |
| weather(drizzle) |         0.1765 |
+------------------+----------------+
| weather(rain)    |         0.1765 |
+------------------+----------------+
| weather(sun)     |         0.1176 |
+------------------+----------------+
| weather(snow)    |         0.4412 |
+------------------+----------------+
| weather(fog)     | 

# Task 1.3 - Approximate Inference

## Likelihood Weighted Sample

Generates weighted sample(s) from joint distribution of the Bayesian Network, that comply with the given evidence.

In [1010]:
from pgmpy.factors.discrete import State
from pgmpy.sampling import BayesianModelSampling

sampler = BayesianModelSampling(weather_model)

# Generate samples from the model
samples = sampler.forward_sample(size=500000)  # Generates 100,000 samples from the network


  0%|          | 0/5 [00:00<?, ?it/s]

  df = pd.DataFrame.from_records(samples)


In [1011]:
# Repeat Q.1. (a) of Task 1.2 - What is the probability of high wind when the weather is sunny?
# Filter samples where weather is 'sunny'
sunny_samples = samples[samples['weather'] == 'sunny']

# From those, find where wind is 'high'
high_wind_given_sunny = sunny_samples[sunny_samples['wind'] == 'high']

# Calculate the probability
#probability_high_wind_given_sunny = len(high_wind_given_sunny) / len(sunny_samples)


In [1012]:
# Repeat Q.1. (b) of Task 1.2 - What is the probability of sunny weather when the wind is high?
# Filter samples where wind is 'high'
high_wind_samples = samples[samples['wind'] == 'high']

# From those, find where weather is 'sunny'
sunny_given_high_wind = high_wind_samples[high_wind_samples['weather'] == 'sun']

# Calculate the probability
if len(high_wind_samples) > 0:
    probability_sunny_given_high_wind = len(sunny_given_high_wind) / len(high_wind_samples)
    print(f"Probability of sunny weather when wind is high: {probability_sunny_given_high_wind}")
else:
    print("No high wind samples available to calculate the probability.")



No high wind samples available to calculate the probability.


## Rejection Sampling

In [1013]:
# Repeat Q.2 . (a) of Task 1.2 - Calculate all the possible joint probability and determine the best probable condition. Explain your results?
# List all variables in the model to compute the joint probability distribution
variables = model.nodes()
joint_probability = inference.query(variables=variables, joint=True)


# Find the index of the maximum probability value
max_prob_index = np.argmax(joint_probability.values)
max_prob_value = np.max(joint_probability.values)

# Decode this index to get the corresponding state names
index_tuple = np.unravel_index(max_prob_index, joint_probability.values.shape)
most_probable_condition = {var: joint_probability.state_names[var][index]
                           for var, index in zip(variables, index_tuple)}

print("Most probable condition:", most_probable_condition)
print("With probability:", max_prob_value)



Most probable condition: {'weather': 'snow', 'precipitation': 'medium', 'wind': 'medium', 'temp_max': 'low', 'temp_min': 'medium'}
With probability: 0.0144


In [1014]:
# Repeat Q.2 . (b) of Task 1.2 - What is the most probable condition for precipitation, wind and weather, combined?
# Compute the joint probability distribution for weather, precipitation, and wind
joint_prob_dist = inference.query(variables=['weather', 'precipitation', 'wind'], joint=True)


# Find the index of the maximum probability value
max_prob_index = np.argmax(joint_prob_dist.values)
max_prob_value = np.max(joint_prob_dist.values)

# Decode this index to get the corresponding state names
index_tuple = np.unravel_index(max_prob_index, joint_prob_dist.values.shape)
most_probable_condition = {var: joint_prob_dist.state_names[var][index]
                           for var, index in zip(joint_prob_dist.variables, index_tuple)}

print("Most probable condition (Weather, Precipitation, Wind):", most_probable_condition)
print("With probability:", max_prob_value)


Most probable condition (Weather, Precipitation, Wind): {'weather': 'snow', 'precipitation': 'medium', 'wind': 'medium'}
With probability: 0.059999999999999984


## Approx Inference

In [1015]:
from pgmpy.inference import ApproxInference

In [1016]:
# Repeat Q.3 of Task 1.2 - Find the probability associated with each weather, given that the precipitation is medium? Explain your result.
# Query for the probability distribution of weather given that precipitation is 'medium'
prob_weather_given_precip_medium = inference.query(variables=['weather'], evidence={'precipitation': 'medium'})

print("Probability distribution of weather given that precipitation is medium:")
print(prob_weather_given_precip_medium)



Probability distribution of weather given that precipitation is medium:
+------------------+----------------+
| weather          |   phi(weather) |
| weather(drizzle) |         0.2353 |
+------------------+----------------+
| weather(rain)    |         0.1765 |
+------------------+----------------+
| weather(sun)     |         0.1176 |
+------------------+----------------+
| weather(snow)    |         0.3529 |
+------------------+----------------+
| weather(fog)     |         0.1176 |
+------------------+----------------+


# Normal Sampling

In [1017]:
# Repeat Q.4 of Task 1.2 - What is the probability of each weather condition given that precipitation is medium and wind is low or medium? Explain your method and results. How does the result change with the addition of wind factor compared to question 3 of Task 1.2?

# Query for the probability distribution of weather given that precipitation is 'medium' and wind is 'low'
prob_weather_given_precip_medium_wind_low = inference.query(
    variables=['weather'], 
    evidence={'precipitation': 'medium', 'wind': 'low'}
)

# Query for the probability distribution of weather given that precipitation is 'medium' and wind is 'medium'
prob_weather_given_precip_medium_wind_medium = inference.query(
    variables=['weather'], 
    evidence={'precipitation': 'medium', 'wind': 'medium'}
)

print("Probability of weather given precipitation is medium and wind is low:")
print(prob_weather_given_precip_medium_wind_low)

print("Probability of weather given precipitation is medium and wind is medium:")
print(prob_weather_given_precip_medium_wind_medium)



Probability of weather given precipitation is medium and wind is low:
+------------------+----------------+
| weather          |   phi(weather) |
| weather(drizzle) |         0.4255 |
+------------------+----------------+
| weather(rain)    |         0.1915 |
+------------------+----------------+
| weather(sun)     |         0.0851 |
+------------------+----------------+
| weather(snow)    |         0.1277 |
+------------------+----------------+
| weather(fog)     |         0.1702 |
+------------------+----------------+
Probability of weather given precipitation is medium and wind is medium:
+------------------+----------------+
| weather          |   phi(weather) |
| weather(drizzle) |         0.1765 |
+------------------+----------------+
| weather(rain)    |         0.1765 |
+------------------+----------------+
| weather(sun)     |         0.1176 |
+------------------+----------------+
| weather(snow)    |         0.4412 |
+------------------+----------------+
| weather(fog)     | 

# Other hiearchies

In [1018]:
weather_model_h1 = BayesianNetwork([
    ('weather', 'precipitation'),
    ('weather', 'wind'),
    ('precipitation', 'temp_max'),
    ('precipitation', 'temp_min'),
    ('wind', 'temp_min'),
    ('wind', 'temp_max')
])

weather_model_h2 = BayesianNetwork([
    ('weather', 'wind'),
    ('wind', 'precipitation'),
    ('precipitation', 'temp_max'),
    ('precipitation', 'temp_min')
])


In [1019]:
from pgmpy.factors.discrete import TabularCPD

# Example CPD for Weather
cpd_weather = TabularCPD(variable='Weather', variable_card=3, values=[[0.3], [0.4], [0.3]])

# CPDs for other variables would also need to be defined similarly.

In [1021]:
# Assume CPDs: cpd_weather, cpd_precipitation, cpd_wind, cpd_temp_max, cpd_temp_min are already defined

# Adding CPDs to Hierarchy 1 Model
weather_model_h1.add_cpds(cpd_weather, cpd_precipitation, cpd_wind, cpd_temp_max, cpd_temp_min)

# Adding CPDs to Hierarchy 2 Model
weather_model_h2.add_cpds(cpd_weather, cpd_wind, cpd_precipitation, cpd_temp_max, cpd_temp_min)

# Validate the models
assert weather_model_h1.check_model(), "Hierarchy 1 Model has inconsistencies."
assert weather_model_h2.check_model(), "Hierarchy 2 Model has inconsistencies."

# Print that models are validated and ready for inference
print("Both models are correctly configured and ready for further analysis.")


KeyboardInterrupt: 

In [ ]:
from pgmpy.inference import VariableElimination

# Inference on Hierarchy 1
inference_h1 = VariableElimination(weather_model_h1)
joint_prob_h1 = inference_h1.query(variables=['Weather', 'Precipitation', 'Wind', 'Temp_max', 'Temp_min'], joint=True)

# Inference on Hierarchy 2
inference_h2 = VariableElimination(weather_model_h2)
joint_prob_h2 = inference_h2.query(variables=['Weather', 'Precipitation', 'Wind', 'Temp_max', 'Temp_min'], joint=True)

# Output the results for comparison
print("Joint Probability Distribution from Hierarchy 1:")
print(joint_prob_h1)

print("Joint Probability Distribution from Hierarchy 2:")
print(joint_prob_h2)
