# INF-2600-1 24V Artificial Intelligence: Assignment 3 Pre-code
This code implements a Bayesian Network model for Analyzing Sensor Data for Weather Prediction using the pgmpy library in Python.


### BUILDING THE STRUCTURE OF BAYESIAN NETWORK: Using PgmPy
#### Install Package: `!pip install pgmpy`

In [None]:
pip install pgmpy

In [2]:
# Including the necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD

# Factoring the dataset

In [3]:
# Import data, make a copy of the original

df0 = pd.read_csv('/content/seattle-weather.csv')
dfc1 = df0.copy()
dfc1.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [5]:
# Get characteristics of dataset including columns with missing data as well:
dfc1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1461 entries, 0 to 1460
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   date           1461 non-null   object 
 1   precipitation  1461 non-null   float64
 2   temp_max       1461 non-null   float64
 3   temp_min       1461 non-null   float64
 4   wind           1461 non-null   float64
 5   weather        1461 non-null   object 
dtypes: float64(4), object(2)
memory usage: 68.6+ KB


In [6]:
# Checking the unique values in the 'weather' column
unique_fields = dfc1['weather'].unique()
print(unique_fields)

['drizzle' 'rain' 'sun' 'snow' 'fog']


In [7]:
dfc1.describe()

Unnamed: 0,precipitation,temp_max,temp_min,wind
count,1461.0,1461.0,1461.0,1461.0
mean,3.029432,16.439083,8.234771,3.241136
std,6.680194,7.349758,5.023004,1.437825
min,0.0,-1.6,-7.1,0.4
25%,0.0,10.6,4.4,2.2
50%,0.0,15.6,8.3,3.0
75%,2.8,22.2,12.2,4.0
max,55.9,35.6,18.3,9.5


In [None]:
# Put categorical varaibles in a list
categorical_lst = ['date','weather']
# Create a seperate & smaller dataframe for categorical variables
dfc2a = pd.DataFrame(dfc1, columns=categorical_lst, copy=True)
dfc2a.head()

In [None]:
# Put all continuous variables into a list
continuous_lst = ['precipitation', 'temp_max', 'temp_min', 'wind']
# Create a seperate & smaller dataframe for our chosen variables. Use 'copy=True' so changes wont affect original
dfc2b = pd.DataFrame(dfc1, columns=continuous_lst, copy=True)
dfc2b.head()

### Create new dataframe

In [None]:
# Create new df with variables we want to work with:
new_cols = ['date', 'precipitation', 'temp_max', 'temp_min', 'wind', 'weather']

df = df0[new_cols]
# df.head()

In [None]:
# Let's show all columns with missing data as well:
df[df.isnull().any(axis=1)] # any missing data in columns
df.isnull().any()

In [14]:
num_stdv = 1

# Define the labels dictionary
labels = {

}

# Create bounds for continuous labels


df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,mid,mid,mid,high,drizzle
1,2012-01-02,high,mid,low,mid,rain
2,2012-01-03,mid,mid,mid,mid,rain
3,2012-01-04,high,mid,mid,high,rain
4,2012-01-05,mid,low,low,high,rain


# Creating Bayes Nets

In [16]:
# Define the hierarchy
weather_model = BayesianNetwork([


])

# And, the states for each variables

In [None]:
# Calculate Probabilities

# Weather does not have any parents so all we need are the marginal probabilities of observing each weather type

# Joint Propabilities
# Create dict where key=parent, value=child
var_dict = {

            }

# Create conditional distributions and store results in a list
cpd_lst = []
for key, value in var_dict.items():
    ### Define yourself

# Note that we get 3 Nan values in the above conditional distributions. This is because one of the type of precipitation (low) did not contain any relation with temp_max.
# Therefore, normalization, does not produce the intended result.
# To mitigate this, we replace Nan with the equal probability within the three values, i.e., 0.33
cpd_lst[2][:,0] = .33

cpd_lst

In [18]:
# Creating tabular conditional probability distribution



In [None]:
# Add CPDs and factors to the model


# Check if model is consistent


In [20]:
# Viewing nodes of the model
weather_model.nodes()

NodeView(('weather', 'precipitation', 'wind', 'temp_max', 'temp_min'))

In [21]:
# Viewing edges of the model
weather_model.edges()

OutEdgeView([('weather', 'precipitation'), ('weather', 'wind'), ('precipitation', 'temp_max'), ('wind', 'temp_min')])

In [None]:
# Print the probability table of the weather node
print(weather_cpd)

# Print the probability table of the wind node
print(wind_cpd)

In [50]:
# Independcies in the model

# Checking independcies of a particular node


# Task 1.2

In [25]:
from pgmpy.inference import VariableElimination

In [None]:
# Question 1: (a) What is the probability of high wind when the weather is sunny? (b) What is the probability of sunny weather when the wind is high?


In [49]:
# Question 2:
# (a) Calculate all the possible joint probability and determine the best probable condition. Explain your results?

# (b) What is the most probable condition for precipitation, wind and weather, combined?


In [None]:
# Question 3. Find the probability associated with each weather, given that the precipitation is medium? Explain your result.



In [None]:
# Question 4. What is the probability of each weather condition given that precipitation is medium and wind is low or medium? Explain your method and results. How does the result change with the addition of wind factor compared to question 3 of Task 1.2?



# Task 1.3 - Approximate Inference

## Likelihood Weighted Sample

Generates weighted sample(s) from joint distribution of the Bayesian Network, that comply with the given evidence.

In [30]:
from pgmpy.factors.discrete import State
from pgmpy.sampling import BayesianModelSampling

In [None]:
# Repeat Q.1. (a) of Task 1.2 - What is the probability of high wind when the weather is sunny?



In [None]:
# Repeat Q.1. (b) of Task 1.2 - What is the probability of sunny weather when the wind is high?



## Rejection Sampling

In [None]:
# Repeat Q.2 . (a) of Task 1.2 - Calculate all the possible joint probability and determine the best probable condition. Explain your results?



In [None]:
# Repeat Q.2 . (b) of Task 1.2 - What is the most probable condition for precipitation, wind and weather, combined?



## Approx Inference

In [35]:
from pgmpy.inference import ApproxInference

In [None]:
# Repeat Q.3 of Task 1.2 - Find the probability associated with each weather, given that the precipitation is medium? Explain your result.



# Normal Sampling

In [None]:
# Repeat Q.4 of Task 1.2 - What is the probability of each weather condition given that precipitation is medium and wind is low or medium? Explain your method and results. How does the result change with the addition of wind factor compared to question 3 of Task 1.2?

