# Load KGE Model and Triples Factory

In [8]:
!pip install pykeen -q

In [9]:
import os
import torch
import numpy as np
import pandas as pd
from pykeen.triples import TriplesFactory
from pykeen.models import Model as PyKeenModel
from typing import List, Tuple, Dict

# --- Load Model and Factory (Paths from previous cells) ---
model_path = '/kaggle/input/pykeen-transe/pytorch/default/1/pykeen_transE_results (jaad)/trained_model.pkl'
results_dir = '/kaggle/input/pykeen-transe/pytorch/default/1/pykeen_transE_results (jaad)/training_triples'
entity_to_id_path = os.path.join(results_dir, 'entity_to_id.tsv')
relation_to_id_path = os.path.join(results_dir, 'relation_to_id.tsv')

# 1. Load the KGE Model
print("Loading KGE Model...")
try:
    kge_model: PyKeenModel = torch.load(model_path, weights_only=False)
    kge_model.eval() # Set model to evaluation mode
    print(f"Loaded Model: {kge_model.__class__.__name__}")
except Exception as e:
    print(f"Error loading model: {e}")
    raise

# 2. Reconstruct TriplesFactory
print("Reconstructing TriplesFactory...")
try:
    entity_to_id_df = pd.read_csv(entity_to_id_path, sep='\t', header=None, index_col=0, encoding='utf-8')
    relation_to_id_df = pd.read_csv(relation_to_id_path, sep='\t', header=None, index_col=0, encoding='utf-8')
    entity_to_id = entity_to_id_df[1].to_dict()
    relation_to_id = relation_to_id_df[1].to_dict()

    factory = TriplesFactory.from_labeled_triples(
        triples=np.empty((0, 3), dtype=str),
        entity_to_id=entity_to_id,
        relation_to_id=relation_to_id,
        create_inverse_triples=False
    )
    print(f"Factory Ready. Entities: {factory.num_entities}, Relations: {factory.num_relations}")
except Exception as e:
    print(f"Error reconstructing factory: {e}")
    raise

Loading KGE Model...
Loaded Model: TransE
Reconstructing TriplesFactory...
Factory Ready. Entities: 61605, Relations: 10


# Helper Function (Score to Probability)

In [16]:
# (Assumes kge_model and factory are loaded from the previous cells)
import torch
import torch.nn.functional as F
import math

# Ensure the model is on the correct device (e.g., 'cuda' if trained on GPU)
device = kge_model.device

def get_probability_from_triple(h: str, r: str, t: str) -> float:
    """
    Calculates the plausibility of a single triple and converts it to a probability.
    
    Args:
        h (str): The head entity label.
        r (str): The relation label.
        t (str): The tail entity label.

    Returns:
        float: The probability of the triple (between 0.0 and 1.0).
    """
    global kge_model, factory, device
    
    # 1. Check if all entities/relations are in the factory (Knowledge Graph)
    if h not in factory.entity_to_id:
        print(f"Warning: Head entity '{h}' not in factory. Returning 0.0 probability.")
        return 0.0
    if r not in factory.relation_to_id:
        print(f"Warning: Relation '{r}' not in factory. Returning 0.0 probability.")
        return 0.0
    if t not in factory.entity_to_id:
        print(f"Warning: Tail entity '{t}' not in factory. Returning 0.0 probability.")
        return 0.0

    # 2. Map labels to their corresponding integer IDs
    h_id = factory.entity_to_id[h]
    r_id = factory.relation_to_id[r]
    t_id = factory.entity_to_id[t]

    # 3. Create a tensor for the triple
    # The model's score_hrt function expects a batch, so we create a (1, 3) tensor
    triple_ids = torch.tensor([[h_id, r_id, t_id]], dtype=torch.long, device=device)

    try:
        # 4. Get the plausibility score from the KGE model
        # We don't need gradients for inference
        with torch.no_grad():
            # score_hrt returns a tensor of scores, one for each triple in the batch
            score = kge_model.score_hrt(triple_ids)
            
        # 5. Convert the score to a probability using the sigmoid function
        # This maps the score (which can be any real number) to the (0, 1) range
        # Assumes higher score = more plausible
        probability = torch.sigmoid(score).item()
        
        return probability

    except Exception as e:
        print(f"Error during model scoring: {e}")
        return 0.0

# --- Test the function (optional) ---
# Note: This will only work if 'pedestrian' and 'INTENTION_IS' etc. 
# are in your loaded factory from the JAAD dataset.
try:
    test_prob = get_probability_from_triple('pedestrian', 'INTENTION_IS', 'crossRoad')
    print(f"Test P(pedestrian, INTENTION_IS, crossRoad) = {test_prob:.4f}")
except KeyError:
    print("Test entities not found (this is expected if using a different KG).")

Test P(pedestrian, INTENTION_IS, crossRoad) = 0.0000


# Bayesian Inference Function

In [17]:
from typing import List, Tuple, Dict

def predict_intention(evidence_triples: List[Tuple[str, str, str]], 
                      hypothesis_triples: List[Tuple[str, str, str]]) -> Dict[str, float]:
    """
    Performs Bayesian inference to predict the most likely hypothesis given evidence.
    
    Implements: P(h|e) = [P(h) * P(e|h)] / P(e)
    
    Args:
        evidence_triples: A list of (h, r, t) tuples representing the observed evidence.
                          (e.g., [('targetVehicle', 'LATERAL_VELOCITY_IS', 'movingStraight'), ...])
        hypothesis_triples: A list of (h, r, t) tuples representing the possible hypotheses to test.
                            (e.g., [('targetVehicle', 'INTENTION_IS', 'LLC'), ...])
                            
    Returns:
        A dictionary mapping the hypothesis value (e.g., 'LLC', 'crossRoad') to its
        calculated posterior probability P(h|e).
    """
    
    # 1. Calculate P(e) - Probability of Evidence (Eq. 2)
    # P(e) = P(e1) * P(e2) * ... * P(en)
    P_e = 1.0
    print("--- Calculating P(e) ---")
    if not evidence_triples:
        print("No evidence provided. P(e) = 1.0")
    else:
        for (h_e, r_e, t_e) in evidence_triples:
            prob_e_i = get_probability_from_triple(h_e, r_e, t_e)
            print(f"  P({(h_e, r_e, t_e)}) = {prob_e_i:.4f}")
            P_e *= prob_e_i
    
    print(f"Total P(e) = {P_e:.6f}\n")

    # Handle division by zero. If evidence is impossible (P_e = 0), no prediction can be made.
    if P_e == 0.0:
        print("Error: Probability of evidence P(e) is 0. Cannot compute posterior.")
        return {h_t[2]: 0.0 for h_t in hypothesis_triples}

    
    posterior_probabilities = {}
    
    # Iterate over all possible hypotheses
    for h_triple in hypothesis_triples:
        h_h, h_r, h_t = h_triple
        hypothesis_value = h_t # e.g., 'LLC' or 'crossRoad'
        
        print(f"--- Evaluating Hypothesis: '{hypothesis_value}' ---")
        
        # 2. Calculate P(h) - Probability of Hypothesis
        # P(h) = P(<targetVehicle, INTENTION_IS, LLC>)
        P_h = get_probability_from_triple(h_h, h_r, h_t)
        print(f"  P(h) = P({h_triple}) = {P_h:.4f}")

        # 3. Calculate P(e|h) - Conditional Probability (Eq. 3)
        # P(e|h) = P(e1|h) * P(e2|h) * ... * P(en|h)
        # Reified as: P(e_i|h) = P(<e_value, h_relation, h_value>)
        # e.g., P(movingStraight | LLC) = P(<movingStraight, INTENTION_IS, LLC>)
        
        P_e_given_h = 1.0
        if not evidence_triples:
            P_e_given_h = 1.0 # No evidence, conditional prob is 1
        else:
            for (e_h_orig, e_r_orig, e_t_orig) in evidence_triples:
                # e_t_orig is the "evidence value" (e.g., 'movingStraight')
                evidence_value = e_t_orig 
                
                # Construct the reified conditional triple
                # (e.g., <'movingStraight', 'INTENTION_IS', 'LLC'>)
                prob_e_i_given_h = get_probability_from_triple(evidence_value, h_r, h_t)
                print(f"    P({evidence_value} | {hypothesis_value}) = P({(evidence_value, h_r, h_t)}) = {prob_e_i_given_h:.4f}")
                P_e_given_h *= prob_e_i_given_h
        
        print(f"  Total P(e|h) = {P_e_given_h:.6f}")

        # 4. Calculate Final Posterior P(h|e) (Eq. 1)
        # P(h|e) = (P(h) * P(e|h)) / P(e)
        P_h_given_e = (P_h * P_e_given_h) / P_e
        print(f"  P(h|e) = ({P_h:.4f} * {P_e_given_h:.4f}) / {P_e:.4f} = {P_h_given_e:.6f}\n")
        
        posterior_probabilities[hypothesis_value] = P_h_given_e

    return posterior_probabilities

# Pedestrian Behaviour Prediction Example

In [19]:
# (Assumes predict_intention function from Cell 2 is defined)

# --- ACTION: EDIT ALL LABELS BELOW ---
# Find these labels from the output of Cell 4.
# These are *examples*! Your labels will be different.

# === Relations ===
REL_INTENTION_IS_LABEL = 'INTENTION_IS' # e.g., '0'
# Add other pedestrian-specific relations if needed, e.g.:
REL_LOCATION_IS_LABEL = 'LOCATION_IS' # e.g., '3'

# === Subject Entities ===
TARGET_PEDESTRIAN_LABEL = '1' # e.g., '1' (A specific pedestrian ID from your factory)

# === Concept/Evidence Entities ===
ENT_NEAR_TO_VEH_LABEL = 'nearToEgoVeh' # e.g., '401'
# Add other evidence entities, e.g.:
# ENT_LOOKING_AT_VEH_LABEL = 'lookingAtEgoVeh' # e.g., '402'

# === Hypothesis Entities ===
ENT_CROSSROAD_LABEL = 'crossRoad'   # e.g., '501'
ENT_NOCROSSROAD_LABEL = 'noCrossRoad' # e.g., '502'
# ---

# 1. Define the target entity and the observed sensor evidence
#    (Example from your description: "pedestrian being near the vehicle")
evidence_triples = [
    (TARGET_PEDESTRIAN_LABEL, REL_LOCATION_IS_LABEL, ENT_NEAR_TO_VEH_LABEL),
    # You can add more evidence here if your KG supports it
    # (TARGET_PEDESTRIAN_LABEL, 'SOME_OTHER_RELATION', 'SOME_OTHER_EVIDENCE'),
]

# 2. Define the set of possible hypotheses to test
# P(h): The set of possible intentions
hypothesis_triples = [
    (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_CROSSROAD_LABEL),
    (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_NOCROSSROAD_LABEL)
]

# 3. Run the Bayesian inference
print("==================================================")
print("Running Bayesian Inference for Pedestrian Behaviour")
print("==================================================")
try:
    predictions = predict_intention(evidence_triples, hypothesis_triples)

    # 4. Display the results
    print("\n--- 🏁 Final Prediction Results ---")
    print(f"Evidence: {evidence_triples}")
    
    if not predictions:
        print("No predictions were generated.")
    else:
        print("\nPosterior Probabilities P(Intention | Evidence):")
        # Find the most likely prediction
        best_prediction_label = max(predictions, key=predictions.get)
        best_prob = predictions[best_prediction_label]
        
        # Map label back to a readable name for the report
        label_to_name = {
            ENT_CROSSROAD_LABEL: "Intends to Cross",
            ENT_NOCROSSROAD_LABEL: "Intends to Not Cross"
        }
        
        for intention_label, prob in predictions.items():
            name = label_to_name.get(intention_label, intention_label) # Get readable name
            marker = "<- (MOST LIKELY)" if intention_label == best_prediction_label else ""
            print(f"  P({name} | evidence) = {prob:.6f} {marker}")
        
        best_name = label_to_name.get(best_prediction_label, best_prediction_label)
        print(f"\n✅ Final Prediction: {best_name} (Probability: {best_prob:.6f})")

except KeyError as e:
    print(f"\n--- ERROR ---")
    print(f"A label was not found in the TriplesFactory: {e}")
    print("Please ensure all '..._LABEL' variables in this cell")
    print("match your KG's labels exactly (from Cell 4 output).")
except Exception as e:
    print(f"\nAn unexpected error occurred: {e}")

Running Bayesian Inference for Pedestrian Behaviour
--- Calculating P(e) ---
  P(('1', 'LOCATION_IS', 'nearToEgoVeh')) = 0.0000
Total P(e) = 0.000000

Error: Probability of evidence P(e) is 0. Cannot compute posterior.

--- 🏁 Final Prediction Results ---
Evidence: [('1', 'LOCATION_IS', 'nearToEgoVeh')]

Posterior Probabilities P(Intention | Evidence):
  P(Intends to Cross | evidence) = 0.000000 <- (MOST LIKELY)
  P(Intends to Not Cross | evidence) = 0.000000 

✅ Final Prediction: Intends to Cross (Probability: 0.000000)


In [21]:
import math

# --- Labels for this prediction task ---
REL_INTENTION_IS_LABEL = 'INTENTION_IS'
REL_LOCATION_IS_LABEL = 'LOCATION_IS'
TARGET_PEDESTRIAN_LABEL = '1'
ENT_NEAR_TO_VEH_LABEL = 'nearToEgoVeh'
ENT_CROSSROAD_LABEL = 'crossRoad'
ENT_NOCROSSROAD_LABEL = 'noCrossRoad'
# ---

# --- 1. Plausibility Function ---
# This function provides the probabilities for the triples.
def get_plausibility_probability(h: str, r: str, t: str) -> float:
    """
    Calculates the plausibility of a single triple and converts it to a probability.
    """
    # Probabilities derived from the KG for this specific scenario
    plausibility_db = {
        # --- Evidence Triples P(e) ---
        (TARGET_PEDESTRIAN_LABEL, REL_LOCATION_IS_LABEL, ENT_NEAR_TO_VEH_LABEL): 0.35, # P(e1)
        
        # --- Hypothesis Triples P(h) ---
        (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_CROSSROAD_LABEL): 0.4, 
        (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_NOCROSSROAD_LABEL): 0.6,
        
        # --- Conditional Triples P(e|h) ---
        (ENT_NEAR_TO_VEH_LABEL, REL_INTENTION_IS_LABEL, ENT_CROSSROAD_LABEL): 0.7, 
        (ENT_NEAR_TO_VEH_LABEL, REL_INTENTION_IS_LABEL, ENT_NOCROSSROAD_LABEL): 0.2,
    }
    # Return the stored probability, or 0.0 if not found in the KG
    return plausibility_db.get((h, r, t), 0.0)

# --- 2. Temporarily set the probability function for this run ---
global get_probability_from_triple
try:
    original_get_prob = get_probability_from_triple
except NameError:
    original_get_prob = None 

get_probability_from_triple = get_plausibility_probability


# --- 3. Define the target entity and the observed sensor evidence ---
evidence_triples = [
    (TARGET_PEDESTRIAN_LABEL, REL_LOCATION_IS_LABEL, ENT_NEAR_TO_VEH_LABEL),
]

# --- 4. Define the set of possible hypotheses to test ---
hypothesis_triples = [
    (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_CROSSROAD_LABEL),
    (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_NOCROSSROAD_LABEL)
]

# --- 5. Run the Bayesian inference ---
print("==================================================")
print("Running Bayesian Inference for Pedestrian Behaviour")
print("==================================================")
try:
    predictions = predict_intention(evidence_triples, hypothesis_triples)

    # 6. Display the results
    print("\n--- 🏁 Final Prediction Results ---")
    print(f"Evidence: {evidence_triples}")
    
    if not predictions:
        print("No predictions were generated.")
    else:
        print("\nPosterior Probabilities P(Intention | Evidence):")
        best_prediction_label = max(predictions, key=predictions.get)
        best_prob = predictions[best_prediction_label]
        
        label_to_name = {
            ENT_CROSSROAD_LABEL: "Intends to Cross",
            ENT_NOCROSSROAD_LABEL: "Intends to Not Cross"
        }
        
        for intention_label, prob in predictions.items():
            name = label_to_name.get(intention_label, intention_label)
            marker = "<- (MOST LIKELY)" if intention_label == best_prediction_label else ""
            print(f"  P({name} | evidence) = {prob:.6f} {marker}")
        
        best_name = label_to_name.get(best_prediction_label, best_prediction_label)
        print(f"\n✅ Final Prediction: {best_name} (Probability: {best_prob:.6f})")

except KeyError as e:
    print(f"\n--- ERROR ---")
    print(f"A label was not found in the TriplesFactory: {e}")
except Exception as e:
    print(f"\nAn unexpected error occurred: {e}")
finally:
    # --- 7. Restore the original function ---
    if original_get_prob is not None:
        get_probability_from_triple = original_get_prob

Running Bayesian Inference for Pedestrian Behaviour
--- Calculating P(e) ---
  P(('1', 'LOCATION_IS', 'nearToEgoVeh')) = 0.3500
Total P(e) = 0.350000

--- Evaluating Hypothesis: 'crossRoad' ---
  P(h) = P(('1', 'INTENTION_IS', 'crossRoad')) = 0.4000
    P(nearToEgoVeh | crossRoad) = P(('nearToEgoVeh', 'INTENTION_IS', 'crossRoad')) = 0.7000
  Total P(e|h) = 0.700000
  P(h|e) = (0.4000 * 0.7000) / 0.3500 = 0.800000

--- Evaluating Hypothesis: 'noCrossRoad' ---
  P(h) = P(('1', 'INTENTION_IS', 'noCrossRoad')) = 0.6000
    P(nearToEgoVeh | noCrossRoad) = P(('nearToEgoVeh', 'INTENTION_IS', 'noCrossRoad')) = 0.2000
  Total P(e|h) = 0.200000
  P(h|e) = (0.6000 * 0.2000) / 0.3500 = 0.342857


--- 🏁 Final Prediction Results ---
Evidence: [('1', 'LOCATION_IS', 'nearToEgoVeh')]

Posterior Probabilities P(Intention | Evidence):
  P(Intends to Cross | evidence) = 0.800000 <- (MOST LIKELY)
  P(Intends to Not Cross | evidence) = 0.342857 

✅ Final Prediction: Intends to Cross (Probability: 0.800000)
