In [21]:
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
import numpy as np
from pgmpy.models import BayesianModel

In [17]:
flat_model = BayesianNetwork([
    ('Distance', 'Price'), 
    ('Rooms', 'Price'), 
    ('Price', 'Quality'), 
    ('Price', 'UserPref'), 
    ('Rooms', 'UserPref'), 
    ('Distance', 'UserPref'), 
    ('Quality', 'UserPref')
])

In [18]:
from pgmpy.factors.discrete import TabularCPD

cpd_dist = TabularCPD(variable='Distance', variable_card=3, values=[[0.3], [0.5], [0.2]])
cpd_rooms = TabularCPD(variable='Rooms', variable_card=3, values=[[0.33], [0.33], [0.34]])
cpd_price = TabularCPD(
    variable='Price', 
    variable_card=3, 
    values=[
        [0.7, 0.4, 0.2, 0.3, 0.1, 0.1],
        [0.2, 0.4, 0.3, 0.3, 0.4, 0.2],
        [0.1, 0.2, 0.5, 0.4, 0.5, 0.7]
    ],
    evidence=['Distance', 'Rooms'],
    evidence_card=[2, 3]
)

# Assume that Quality is independent and depends only on the Price
cpd_quality = TabularCPD(
    variable='Quality',
    variable_card=2,
    values=[
        [0.5, 0.6, 0.7],
        [0.5, 0.4, 0.3]
    ],
    evidence=['Price'],
    evidence_card=[3]
)


  cpd_price = TabularCPD(
  cpd_quality = TabularCPD(


In [19]:
# Associating the parameters with the model structure.
flat_model.add_cpds(cpd_dist, cpd_rooms, cpd_price, cpd_quality)

# Checking if the cpds are valid for the model.
flat_model.check_model()

ValueError: No CPD associated with UserPref

In [26]:
flat_model.active_trail_nodes("Distance")

{'Distance': {'Distance', 'Rooms', 'UserPref'}}

In [27]:
flat_model.local_independencies("Quality")

(Quality ⟂ Rooms, Distance | Price)

In [25]:
# Create random values
random_values = np.random.rand(3, 81)

# Normalize the values so that they sum to 1 across each set of conditions
normalized_values = random_values / random_values.sum(axis=0)
values_test = normalized_values.tolist()

user_pref_cpd = TabularCPD(
    variable='UserPref', 
    variable_card=3, 
    values=values_test,
    evidence=['Price', 'Rooms', 'Distance', 'Quality'],
    evidence_card=[3, 3, 3, 3]
)
flat_model.add_cpds(user_pref_cpd)

  user_pref_cpd = TabularCPD(


In [37]:
# Define the structure
flat_model = BayesianModel([
    ('Distance', 'UserPref'), 
    ('Rooms', 'UserPref'), 
    ('Price', 'UserPref'),
    ('Quality', 'UserPref'),
    ('Rooms', 'Distance'), 
    ('Price', 'Quality'), 
])

cpd_distance = TabularCPD(variable='Distance', variable_card=3, values=[[0.33], [0.33], [0.34]])
cpd_rooms = TabularCPD(variable='Rooms', variable_card=3, values=[[0.33], [0.33], [0.34]])
cpd_price = TabularCPD(
    variable='Price', 
    variable_card=3, 
    values=[
        [0.7, 0.4, 0.2, 0.3, 0.1, 0.1],
        [0.2, 0.4, 0.3, 0.3, 0.4, 0.2],
        [0.1, 0.2, 0.5, 0.4, 0.5, 0.7]
    ],
    evidence=['Distance', 'Rooms'],
    evidence_card=[2, 3]
)

# Assume that Quality is independent and depends only on the Price
cpd_quality = TabularCPD(
    variable='Quality',
    variable_card=2,
    values=[
        [0.5, 0.6, 0.7],
        [0.5, 0.4, 0.3]
    ],
    evidence=['Price'],
    evidence_card=[3]
)
# Create random probabilities for UserPref
random_values = np.random.rand(3, 81)
normalized_values = random_values / random_values.sum(axis=0)

# Use these values in your CPD
cpd_userpref = TabularCPD(
    variable='UserPref', 
    variable_card=3, 
    values=normalized_values.tolist(),
    evidence=['Price', 'Rooms', 'Distance', 'Quality'],
    evidence_card=[3, 3, 3, 3]
)

# Add the CPDs to the model
flat_model.add_cpds(cpd_dist, cpd_rooms, cpd_price, cpd_quality, cpd_userpref)

  cpd_price = TabularCPD(
  cpd_quality = TabularCPD(
  cpd_userpref = TabularCPD(


In [38]:
def categorize_price(price):
    if price < 400:
        return 0  # 'Low'
    elif 400 <= price < 800:
        return 1  # 'Medium'
    else:
        return 2  # 'High'

def categorize_rooms(rooms):
    if rooms < 2:
        return 0  # 'Low'
    elif 2 <= rooms < 3:
        return 1  # 'Medium'
    else:
        return 2  # 'High'
def categorize_distance(distance):
    if distance < 10:
        return 0  # 'Low'
    elif 10 <= distance < 25:
        return 1  # 'Medium'
    else:
        return 2  # 'High'   
    
def categorize_quality(quality):
    if quality < 3.5:
        return 0  # 'Low'
    elif 3.5 <= quality < 4.5:
        return 1  # 'Medium'
    else:
        return 2  # 'High'       

In [39]:
price_A = categorize_price(700)
price_B = categorize_price(600)
# Assuming functions categorize_rooms, categorize_distance, and categorize_quality
rooms_A = categorize_rooms(2)
rooms_B = categorize_rooms(2)
distance_A = categorize_distance(12)
distance_B = categorize_distance(8)
quality_A = categorize_quality(4)
quality_B = categorize_quality(4.2)

infer = VariableElimination(flat_model)
prob_giv_A = infer.query(variables=['UserPref'], 
                         evidence={'Price': price_A, 'Rooms': rooms_A, 'Distance': distance_A, 'Quality': quality_A})

prob_giv_B = infer.query(variables=['UserPref'], 
                         evidence={'Price': price_B, 'Rooms': rooms_B, 'Distance': quality_B, 'Quality': quality_B})


ValueError: CPD associated with Distance doesn't have proper parents associated with it.

In [None]:
# 1. Define the Bayesian Network structure
flat_model = BayesianNetwork([
    ('SearchRooms', 'UserPref'),
    ('SearchSqft', 'UserPref'),
    ('SearchQuality', 'UserPref'),
    ('SearchPrice', 'UserPref'),
    ('IndustryRooms', 'Rooms'),
    ('IndustrySqft', 'Sqft'),
    ('IndustryQuality', 'Quality'),
    ('IndustryPrice', 'Price'),
    ('Rooms', 'UserPref'),
    ('Sqft', 'UserPref'),
    ('Quality', 'UserPref'),
    ('Price', 'UserPref')
])

# Sample size for history
N_history = 1000

# Set seed for reproducibility
np.random.seed(42)

# Create the dataframe for search history
df_history = pd.DataFrame({
    'SearchRooms': np.random.choice([1, 2, 3], p=[0.6, 0.3, 0.1], size=N_history),
    'SearchSqft': np.random.choice(['small', 'medium', 'large'], p=[0.2, 0.6, 0.2], size=N_history),
    'SearchQuality': np.random.choice(['low', 'medium', 'high'], p=[0.2, 0.6, 0.2], size=N_history),
    'SearchPrice': np.random.choice(['low', 'medium', 'high'], p=[0.1, 0.2, 0.7], size=N_history),
    'UserPref': np.random.choice(['like', 'neutral', 'dislike'], size=N_history)
})
# Sample size for industry data
N_industry = 500

# Create the dataframe for industry expertise
df_industry = pd.DataFrame({
    'IndustryRooms': np.random.choice([1, 2, 3], size=N_industry),
    'IndustrySqft': np.random.choice(['small', 'medium', 'large'], size=N_industry),
    'IndustryQuality': np.random.choice(['low', 'medium', 'high'], size=N_industry),
    'IndustryPrice': np.random.choice(['low', 'medium', 'high'], size=N_industry),
    'Rooms': np.random.choice([1, 2, 3], size=N_industry),
    'Sqft': np.random.choice(['small', 'medium', 'large'], size=N_industry),
    'Quality': np.random.choice(['low', 'medium', 'high'], size=N_industry),
    'Price': np.random.choice(['low', 'medium', 'high'], size=N_industry),
})

# Bayesian Network structure already defined as flat_model

# Calculate CPDs using df_history
mle_history = MaximumLikelihoodEstimator(flat_model, df_history)
cpd_searchrooms = mle_history.estimate_cpd('SearchRooms')
cpd_searchsqft = mle_history.estimate_cpd('SearchSqft')
cpd_searchquality = mle_history.estimate_cpd('SearchQuality')
cpd_searchprice = mle_history.estimate_cpd('SearchPrice')
cpd_userpref_from_history = mle_history.estimate_cpd('UserPref')

# Calculate CPDs using df_industry
mle_industry = MaximumLikelihoodEstimator(flat_model, df_industry)
cpd_industryrooms = mle_industry.estimate_cpd('IndustryRooms')
cpd_industrysqft = mle_industry.estimate_cpd('IndustrySqft')
cpd_industryquality = mle_industry.estimate_cpd('IndustryQuality')
cpd_industryprice = mle_industry.estimate_cpd('IndustryPrice')
cpd_rooms = mle_industry.estimate_cpd('Rooms')
cpd_sqft = mle_industry.estimate_cpd('Sqft')
cpd_quality = mle_industry.estimate_cpd('Quality')
cpd_price = mle_industry.estimate_cpd('Price')

# Add the CPDs to the model
flat_model.add_cpds(cpd_searchrooms, cpd_searchsqft, cpd_searchquality, cpd_searchprice,
                    cpd_userpref_from_history, cpd_industryrooms, cpd_industrysqft, cpd_industryquality,
                    cpd_industryprice, cpd_rooms, cpd_sqft, cpd_quality, cpd_price)

# Validate the model
assert flat_model.check_model()

# 3. Input user search parameters as evidence
# Sample evidence: user wants a 3-room flat, with 500 sqft, high quality, for €600
evidence = {
    'SearchRooms': 3,
    'SearchSqft': 500,
    'SearchQuality': 'high',
    'SearchPrice': 600
}

# 4. Query the network for the adjusted probabilities for UserPref
inference = VariableElimination(flat_model)
result = inference.query(variables=['UserPref'], evidence=evidence)

print(result)

In [None]:
 Computing the probability of bronc given smoke=no.
q = asia_infer.query(variables=["bronc"], evidence={"smoke": "no"})
print(q)

# Computing the joint probability of bronc and asia given smoke=yes
q = asia_infer.query(variables=["bronc", "asia"], evidence={"smoke": "yes"})
print(q)

# Computing the probabilities (not joint) of bronc and asia given smoke=no
q = asia_infer.query(variables=["bronc", "asia"], evidence={"smoke": "no"}, joint=False)
for factor in q.values():
    print(factor)