In [1]:
import pandas as pd
from pgmpy.estimators import HillClimbSearch, BicScore
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

In [2]:
# Load data
data = pd.read_csv('precode/SPRICE_Norwegian_Maritime_Data.csv', low_memory=False)

selected_columns = ['Air_temp_Act', 'Rel_Humidity_act', 'Rel_Air_Pressure', 'Wind_Speed_avg', 'Wind_Direction_vct']
data = data[selected_columns]

In [3]:
# Create a BIC Score object and perform Hill Climbing search
bic = BicScore(data)
hc = HillClimbSearch(data)
best_model = hc.estimate()

print("Best model structure: ", best_model.edges())

  0%|          | 0/1000000 [00:00<?, ?it/s]

Best model structure:  [('Air_temp_Act', 'Rel_Air_Pressure'), ('Rel_Humidity_act', 'Rel_Air_Pressure'), ('Wind_Speed_avg', 'Rel_Air_Pressure'), ('Wind_Direction_vct', 'Rel_Air_Pressure')]


In [ ]:
model = BayesianNetwork(best_model.edges())

# Randomly sample a fraction of the data
sample_data = data.sample(frac=0.1, random_state=42)  # Adjust frac as needed


model.fit(sample_data, estimator=MaximumLikelihoodEstimator)

for cpd in model.get_cpds():
    print("CPD for {}: ".format(cpd.variable))
    print(cpd)


In [ ]:
inference = VariableElimination(model)
# Query example: Probability of high wind given sunny weather
result = inference.query(variables=['Wind_Speed_avg'], evidence={'Weather': 'Sunny'})
print(result)