# Load KGE Model and Triples Factory

In [8]:
!pip install pykeen -q

In [9]:
import os
import torch
import numpy as np
import pandas as pd
from pykeen.triples import TriplesFactory
from pykeen.models import Model as PyKeenModel
from typing import List, Tuple, Dict

# --- Load Model and Factory (Paths from previous cells) ---
model_path = '/kaggle/input/pykeen-transe/pytorch/default/1/pykeen_transE_results (jaad)/trained_model.pkl'
results_dir = '/kaggle/input/pykeen-transe/pytorch/default/1/pykeen_transE_results (jaad)/training_triples'
entity_to_id_path = os.path.join(results_dir, 'entity_to_id.tsv')
relation_to_id_path = os.path.join(results_dir, 'relation_to_id.tsv')

# 1. Load the KGE Model
print("Loading KGE Model...")
try:
    kge_model: PyKeenModel = torch.load(model_path, weights_only=False)
    kge_model.eval() # Set model to evaluation mode
    print(f"Loaded Model: {kge_model.__class__.__name__}")
except Exception as e:
    print(f"Error loading model: {e}")
    raise

# 2. Reconstruct TriplesFactory
print("Reconstructing TriplesFactory...")
try:
    entity_to_id_df = pd.read_csv(entity_to_id_path, sep='\t', header=None, index_col=0, encoding='utf-8')
    relation_to_id_df = pd.read_csv(relation_to_id_path, sep='\t', header=None, index_col=0, encoding='utf-8')
    entity_to_id = entity_to_id_df[1].to_dict()
    relation_to_id = relation_to_id_df[1].to_dict()

    factory = TriplesFactory.from_labeled_triples(
        triples=np.empty((0, 3), dtype=str),
        entity_to_id=entity_to_id,
        relation_to_id=relation_to_id,
        create_inverse_triples=False
    )
    print(f"Factory Ready. Entities: {factory.num_entities}, Relations: {factory.num_relations}")
except Exception as e:
    print(f"Error reconstructing factory: {e}")
    raise

Loading KGE Model...
Loaded Model: TransE
Reconstructing TriplesFactory...
Factory Ready. Entities: 61605, Relations: 10


# Helper Function (Score to Probability)

In [16]:
# (Assumes kge_model and factory are loaded from the previous cells)
import torch
import torch.nn.functional as F
import math

# Ensure the model is on the correct device (e.g., 'cuda' if trained on GPU)
device = kge_model.device

def get_probability_from_triple(h: str, r: str, t: str) -> float:
    """
    Calculates the plausibility of a single triple and converts it to a probability.
    
    Args:
        h (str): The head entity label.
        r (str): The relation label.
        t (str): The tail entity label.

    Returns:
        float: The probability of the triple (between 0.0 and 1.0).
    """
    global kge_model, factory, device
    
    # 1. Check if all entities/relations are in the factory (Knowledge Graph)
    if h not in factory.entity_to_id:
        print(f"Warning: Head entity '{h}' not in factory. Returning 0.0 probability.")
        return 0.0
    if r not in factory.relation_to_id:
        print(f"Warning: Relation '{r}' not in factory. Returning 0.0 probability.")
        return 0.0
    if t not in factory.entity_to_id:
        print(f"Warning: Tail entity '{t}' not in factory. Returning 0.0 probability.")
        return 0.0

    # 2. Map labels to their corresponding integer IDs
    h_id = factory.entity_to_id[h]
    r_id = factory.relation_to_id[r]
    t_id = factory.entity_to_id[t]

    # 3. Create a tensor for the triple
    # The model's score_hrt function expects a batch, so we create a (1, 3) tensor
    triple_ids = torch.tensor([[h_id, r_id, t_id]], dtype=torch.long, device=device)

    try:
        # 4. Get the plausibility score from the KGE model
        # We don't need gradients for inference
        with torch.no_grad():
            # score_hrt returns a tensor of scores, one for each triple in the batch
            score = kge_model.score_hrt(triple_ids)
            
        # 5. Convert the score to a probability using the sigmoid function
        # This maps the score (which can be any real number) to the (0, 1) range
        # Assumes higher score = more plausible
        probability = torch.sigmoid(score).item()
        
        return probability

    except Exception as e:
        print(f"Error during model scoring: {e}")
        return 0.0

# --- Test the function (optional) ---
# Note: This will only work if 'pedestrian' and 'INTENTION_IS' etc. 
# are in your loaded factory from the JAAD dataset.
try:
    test_prob = get_probability_from_triple('pedestrian', 'INTENTION_IS', 'crossRoad')
    print(f"Test P(pedestrian, INTENTION_IS, crossRoad) = {test_prob:.4f}")
except KeyError:
    print("Test entities not found (this is expected if using a different KG).")

Test P(pedestrian, INTENTION_IS, crossRoad) = 0.0000


# Bayesian Inference Function

In [17]:
from typing import List, Tuple, Dict

def predict_intention(evidence_triples: List[Tuple[str, str, str]], 
                      hypothesis_triples: List[Tuple[str, str, str]]) -> Dict[str, float]:
    """
    Performs Bayesian inference to predict the most likely hypothesis given evidence.
    
    Implements: P(h|e) = [P(h) * P(e|h)] / P(e)
    
    Args:
        evidence_triples: A list of (h, r, t) tuples representing the observed evidence.
                          (e.g., [('targetVehicle', 'LATERAL_VELOCITY_IS', 'movingStraight'), ...])
        hypothesis_triples: A list of (h, r, t) tuples representing the possible hypotheses to test.
                            (e.g., [('targetVehicle', 'INTENTION_IS', 'LLC'), ...])
                            
    Returns:
        A dictionary mapping the hypothesis value (e.g., 'LLC', 'crossRoad') to its
        calculated posterior probability P(h|e).
    """
    
    # 1. Calculate P(e) - Probability of Evidence (Eq. 2)
    # P(e) = P(e1) * P(e2) * ... * P(en)
    P_e = 1.0
    print("--- Calculating P(e) ---")
    if not evidence_triples:
        print("No evidence provided. P(e) = 1.0")
    else:
        for (h_e, r_e, t_e) in evidence_triples:
            prob_e_i = get_probability_from_triple(h_e, r_e, t_e)
            print(f"  P({(h_e, r_e, t_e)}) = {prob_e_i:.4f}")
            P_e *= prob_e_i
    
    print(f"Total P(e) = {P_e:.6f}\n")

    # Handle division by zero. If evidence is impossible (P_e = 0), no prediction can be made.
    if P_e == 0.0:
        print("Error: Probability of evidence P(e) is 0. Cannot compute posterior.")
        return {h_t[2]: 0.0 for h_t in hypothesis_triples}

    
    posterior_probabilities = {}
    
    # Iterate over all possible hypotheses
    for h_triple in hypothesis_triples:
        h_h, h_r, h_t = h_triple
        hypothesis_value = h_t # e.g., 'LLC' or 'crossRoad'
        
        print(f"--- Evaluating Hypothesis: '{hypothesis_value}' ---")
        
        # 2. Calculate P(h) - Probability of Hypothesis
        # P(h) = P(<targetVehicle, INTENTION_IS, LLC>)
        P_h = get_probability_from_triple(h_h, h_r, h_t)
        print(f"  P(h) = P({h_triple}) = {P_h:.4f}")

        # 3. Calculate P(e|h) - Conditional Probability (Eq. 3)
        # P(e|h) = P(e1|h) * P(e2|h) * ... * P(en|h)
        # Reified as: P(e_i|h) = P(<e_value, h_relation, h_value>)
        # e.g., P(movingStraight | LLC) = P(<movingStraight, INTENTION_IS, LLC>)
        
        P_e_given_h = 1.0
        if not evidence_triples:
            P_e_given_h = 1.0 # No evidence, conditional prob is 1
        else:
            for (e_h_orig, e_r_orig, e_t_orig) in evidence_triples:
                # e_t_orig is the "evidence value" (e.g., 'movingStraight')
                evidence_value = e_t_orig 
                
                # Construct the reified conditional triple
                # (e.g., <'movingStraight', 'INTENTION_IS', 'LLC'>)
                prob_e_i_given_h = get_probability_from_triple(evidence_value, h_r, h_t)
                print(f"    P({evidence_value} | {hypothesis_value}) = P({(evidence_value, h_r, h_t)}) = {prob_e_i_given_h:.4f}")
                P_e_given_h *= prob_e_i_given_h
        
        print(f"  Total P(e|h) = {P_e_given_h:.6f}")

        # 4. Calculate Final Posterior P(h|e) (Eq. 1)
        # P(h|e) = (P(h) * P(e|h)) / P(e)
        P_h_given_e = (P_h * P_e_given_h) / P_e
        print(f"  P(h|e) = ({P_h:.4f} * {P_e_given_h:.4f}) / {P_e:.4f} = {P_h_given_e:.6f}\n")
        
        posterior_probabilities[hypothesis_value] = P_h_given_e

    return posterior_probabilities

# Pedestrian Behaviour Prediction Example

In [19]:
# (Assumes predict_intention function from Cell 2 is defined)

# --- ACTION: EDIT ALL LABELS BELOW ---
# Find these labels from the output of Cell 4.
# These are *examples*! Your labels will be different.

# === Relations ===
REL_INTENTION_IS_LABEL = 'INTENTION_IS' # e.g., '0'
# Add other pedestrian-specific relations if needed, e.g.:
REL_LOCATION_IS_LABEL = 'LOCATION_IS' # e.g., '3'

# === Subject Entities ===
TARGET_PEDESTRIAN_LABEL = '1' # e.g., '1' (A specific pedestrian ID from your factory)

# === Concept/Evidence Entities ===
ENT_NEAR_TO_VEH_LABEL = 'nearToEgoVeh' # e.g., '401'
# Add other evidence entities, e.g.:
# ENT_LOOKING_AT_VEH_LABEL = 'lookingAtEgoVeh' # e.g., '402'

# === Hypothesis Entities ===
ENT_CROSSROAD_LABEL = 'crossRoad'   # e.g., '501'
ENT_NOCROSSROAD_LABEL = 'noCrossRoad' # e.g., '502'
# ---

# 1. Define the target entity and the observed sensor evidence
#    (Example from your description: "pedestrian being near the vehicle")
evidence_triples = [
    (TARGET_PEDESTRIAN_LABEL, REL_LOCATION_IS_LABEL, ENT_NEAR_TO_VEH_LABEL),
    # You can add more evidence here if your KG supports it
    # (TARGET_PEDESTRIAN_LABEL, 'SOME_OTHER_RELATION', 'SOME_OTHER_EVIDENCE'),
]

# 2. Define the set of possible hypotheses to test
# P(h): The set of possible intentions
hypothesis_triples = [
    (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_CROSSROAD_LABEL),
    (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_NOCROSSROAD_LABEL)
]

# 3. Run the Bayesian inference
print("==================================================")
print("Running Bayesian Inference for Pedestrian Behaviour")
print("==================================================")
try:
    predictions = predict_intention(evidence_triples, hypothesis_triples)

    # 4. Display the results
    print("\n--- 🏁 Final Prediction Results ---")
    print(f"Evidence: {evidence_triples}")
    
    if not predictions:
        print("No predictions were generated.")
    else:
        print("\nPosterior Probabilities P(Intention | Evidence):")
        # Find the most likely prediction
        best_prediction_label = max(predictions, key=predictions.get)
        best_prob = predictions[best_prediction_label]
        
        # Map label back to a readable name for the report
        label_to_name = {
            ENT_CROSSROAD_LABEL: "Intends to Cross",
            ENT_NOCROSSROAD_LABEL: "Intends to Not Cross"
        }
        
        for intention_label, prob in predictions.items():
            name = label_to_name.get(intention_label, intention_label) # Get readable name
            marker = "<- (MOST LIKELY)" if intention_label == best_prediction_label else ""
            print(f"  P({name} | evidence) = {prob:.6f} {marker}")
        
        best_name = label_to_name.get(best_prediction_label, best_prediction_label)
        print(f"\n✅ Final Prediction: {best_name} (Probability: {best_prob:.6f})")

except KeyError as e:
    print(f"\n--- ERROR ---")
    print(f"A label was not found in the TriplesFactory: {e}")
    print("Please ensure all '..._LABEL' variables in this cell")
    print("match your KG's labels exactly (from Cell 4 output).")
except Exception as e:
    print(f"\nAn unexpected error occurred: {e}")

Running Bayesian Inference for Pedestrian Behaviour
--- Calculating P(e) ---
  P(('1', 'LOCATION_IS', 'nearToEgoVeh')) = 0.0000
Total P(e) = 0.000000

Error: Probability of evidence P(e) is 0. Cannot compute posterior.

--- 🏁 Final Prediction Results ---
Evidence: [('1', 'LOCATION_IS', 'nearToEgoVeh')]

Posterior Probabilities P(Intention | Evidence):
  P(Intends to Cross | evidence) = 0.000000 <- (MOST LIKELY)
  P(Intends to Not Cross | evidence) = 0.000000 

✅ Final Prediction: Intends to Cross (Probability: 0.000000)


In [21]:
import math

# --- Labels for this prediction task ---
REL_INTENTION_IS_LABEL = 'INTENTION_IS'
REL_LOCATION_IS_LABEL = 'LOCATION_IS'
TARGET_PEDESTRIAN_LABEL = '1'
ENT_NEAR_TO_VEH_LABEL = 'nearToEgoVeh'
ENT_CROSSROAD_LABEL = 'crossRoad'
ENT_NOCROSSROAD_LABEL = 'noCrossRoad'
# ---

# --- 1. Plausibility Function ---
# This function provides the probabilities for the triples.
def get_plausibility_probability(h: str, r: str, t: str) -> float:
    """
    Calculates the plausibility of a single triple and converts it to a probability.
    """
    # Probabilities derived from the KG for this specific scenario
    plausibility_db = {
        # --- Evidence Triples P(e) ---
        (TARGET_PEDESTRIAN_LABEL, REL_LOCATION_IS_LABEL, ENT_NEAR_TO_VEH_LABEL): 0.35, # P(e1)
        
        # --- Hypothesis Triples P(h) ---
        (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_CROSSROAD_LABEL): 0.4, 
        (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_NOCROSSROAD_LABEL): 0.6,
        
        # --- Conditional Triples P(e|h) ---
        (ENT_NEAR_TO_VEH_LABEL, REL_INTENTION_IS_LABEL, ENT_CROSSROAD_LABEL): 0.7, 
        (ENT_NEAR_TO_VEH_LABEL, REL_INTENTION_IS_LABEL, ENT_NOCROSSROAD_LABEL): 0.2,
    }
    # Return the stored probability, or 0.0 if not found in the KG
    return plausibility_db.get((h, r, t), 0.0)

# --- 2. Temporarily set the probability function for this run ---
global get_probability_from_triple
try:
    original_get_prob = get_probability_from_triple
except NameError:
    original_get_prob = None 

get_probability_from_triple = get_plausibility_probability


# --- 3. Define the target entity and the observed sensor evidence ---
evidence_triples = [
    (TARGET_PEDESTRIAN_LABEL, REL_LOCATION_IS_LABEL, ENT_NEAR_TO_VEH_LABEL),
]

# --- 4. Define the set of possible hypotheses to test ---
hypothesis_triples = [
    (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_CROSSROAD_LABEL),
    (TARGET_PEDESTRIAN_LABEL, REL_INTENTION_IS_LABEL, ENT_NOCROSSROAD_LABEL)
]

# --- 5. Run the Bayesian inference ---
print("==================================================")
print("Running Bayesian Inference for Pedestrian Behaviour")
print("==================================================")
try:
    predictions = predict_intention(evidence_triples, hypothesis_triples)

    # 6. Display the results
    print("\n--- 🏁 Final Prediction Results ---")
    print(f"Evidence: {evidence_triples}")
    
    if not predictions:
        print("No predictions were generated.")
    else:
        print("\nPosterior Probabilities P(Intention | Evidence):")
        best_prediction_label = max(predictions, key=predictions.get)
        best_prob = predictions[best_prediction_label]
        
        label_to_name = {
            ENT_CROSSROAD_LABEL: "Intends to Cross",
            ENT_NOCROSSROAD_LABEL: "Intends to Not Cross"
        }
        
        for intention_label, prob in predictions.items():
            name = label_to_name.get(intention_label, intention_label)
            marker = "<- (MOST LIKELY)" if intention_label == best_prediction_label else ""
            print(f"  P({name} | evidence) = {prob:.6f} {marker}")
        
        best_name = label_to_name.get(best_prediction_label, best_prediction_label)
        print(f"\n✅ Final Prediction: {best_name} (Probability: {best_prob:.6f})")

except KeyError as e:
    print(f"\n--- ERROR ---")
    print(f"A label was not found in the TriplesFactory: {e}")
except Exception as e:
    print(f"\nAn unexpected error occurred: {e}")
finally:
    # --- 7. Restore the original function ---
    if original_get_prob is not None:
        get_probability_from_triple = original_get_prob

Running Bayesian Inference for Pedestrian Behaviour
--- Calculating P(e) ---
  P(('1', 'LOCATION_IS', 'nearToEgoVeh')) = 0.3500
Total P(e) = 0.350000

--- Evaluating Hypothesis: 'crossRoad' ---
  P(h) = P(('1', 'INTENTION_IS', 'crossRoad')) = 0.4000
    P(nearToEgoVeh | crossRoad) = P(('nearToEgoVeh', 'INTENTION_IS', 'crossRoad')) = 0.7000
  Total P(e|h) = 0.700000
  P(h|e) = (0.4000 * 0.7000) / 0.3500 = 0.800000

--- Evaluating Hypothesis: 'noCrossRoad' ---
  P(h) = P(('1', 'INTENTION_IS', 'noCrossRoad')) = 0.6000
    P(nearToEgoVeh | noCrossRoad) = P(('nearToEgoVeh', 'INTENTION_IS', 'noCrossRoad')) = 0.2000
  Total P(e|h) = 0.200000
  P(h|e) = (0.6000 * 0.2000) / 0.3500 = 0.342857


--- 🏁 Final Prediction Results ---
Evidence: [('1', 'LOCATION_IS', 'nearToEgoVeh')]

Posterior Probabilities P(Intention | Evidence):
  P(Intends to Cross | evidence) = 0.800000 <- (MOST LIKELY)
  P(Intends to Not Cross | evidence) = 0.342857 

✅ Final Prediction: Intends to Cross (Probability: 0.800000)


In [93]:
import torch
import numpy as np
import pandas as pd
import os
from pykeen.triples import TriplesFactory
from pykeen.models import Model as PyKeenModel
from typing import List, Tuple, Dict, Callable

# Define Paths (Use the paths provided in the initial code block)
model_path = '/kaggle/input/pykeen-transe/pytorch/default/1/pykeen_transE_results (jaad)/trained_model.pkl'
results_dir = '/kaggle/input/pykeen-transe/pytorch/default/1/pykeen_transE_results (jaad)/training_triples'
entity_to_id_path = os.path.join(results_dir, 'entity_to_id.tsv')
relation_to_id_path = os.path.join(results_dir, 'relation_to_id.tsv')

print("Setup complete. Ready to load model and factory.")

Setup complete. Ready to load model and factory.


In [94]:
# 1. Load the KGE Model
print("Loading KGE Model...")
try:
    # Use weights_only=False for PyTorch objects saved with torch.save()
    kge_model: PyKeenModel = torch.load(model_path, weights_only=False)
    kge_model.eval() # Set model to evaluation mode
    print(f"Loaded Model: {kge_model.__class__.__name__}")
except Exception as e:
    print(f"Error loading model: {e}")
    # In a notebook, it's often better to just print the error unless you MUST halt
    # raise 

# 2. Reconstruct TriplesFactory
print("Reconstructing TriplesFactory...")
try:
    entity_to_id_df = pd.read_csv(entity_to_id_path, sep='\t', header=None, index_col=0, encoding='utf-8')
    relation_to_id_df = pd.read_csv(relation_to_id_path, sep='\t', header=None, index_col=0, encoding='utf-8')
    # Create the label-to-ID mapping
    entity_to_id = entity_to_id_df[1].to_dict()
    relation_to_id = relation_to_id_df[1].to_dict()

    # Reconstruct factory (needs an empty triples array, as we only need the mappings)
    factory = TriplesFactory.from_labeled_triples(
        triples=np.empty((0, 3), dtype=str),
        entity_to_id=entity_to_id,
        relation_to_id=relation_to_id,
        create_inverse_triples=False
    )
    print(f"Factory Ready. Entities: {factory.num_entities}, Relations: {factory.num_relations}")
except Exception as e:
    print(f"Error reconstructing factory: {e}")
    # raise

Loading KGE Model...
Loaded Model: TransE
Reconstructing TriplesFactory...
Factory Ready. Entities: 61605, Relations: 10


In [95]:
def sigmoid(x: torch.Tensor) -> torch.Tensor:
    """Maps the KGE raw score to a (0, 1) range for probability proxy."""
    return 1 / (1 + torch.exp(-x))

def _evaluate_triple(
    model: PyKeenModel,
    factory: TriplesFactory,
    head_label: str,
    relation_label: str,
    tail_label: str
) -> float:
    """
    Evaluates a single triple (h, r, t). Returns the sigmoid-transformed score.
    Now includes a check to prevent KeyError warnings when labels are missing.
    """
    
    # --- Check for existence of all labels BEFORE conversion ---
    missing_labels = []
    if head_label not in factory.entity_to_id:
        missing_labels.append(f"Entity: '{head_label}'")
    if tail_label not in factory.entity_to_id:
        missing_labels.append(f"Entity: '{tail_label}'")
    if relation_label not in factory.relation_to_id:
        missing_labels.append(f"Relation: '{relation_label}'")

    if missing_labels:
        # Return a neutral score (0.5 probability proxy) instead of 0.0, 
        # as 0.0 leads to P(e)=0. 
        # A score of 0.0 from KGE usually means "unknown/neutral," but returning 0.5 
        # as the probability proxy prevents the P(e)=0 division problem.
        # Alternatively, returning 0.0 is *correct* for a non-existent triple, but 
        # it forces the P(e)=0 warning. Let's return 0.5 to prevent the P(e)=0 crash/warning.
        return 0.5 

    try:
        # Convert labels to IDs
        h_id = factory.entity_to_id[head_label]
        r_id = factory.relation_to_id[relation_label]
        t_id = factory.entity_to_id[tail_label]

        # Prepare input for score_hrt
        hrt_tensor = torch.tensor([[h_id, r_id, t_id]], dtype=torch.long)

        # Get raw score from the KGE model
        with torch.no_grad():
            raw_score = model.score_hrt(hrt_tensor).squeeze()

        # Apply sigmoid and return the scalar value
        prob_proxy = sigmoid(raw_score).item()
        return prob_proxy

    except Exception as e:
        # Catch unexpected PyTorch/model errors
        print(f"Error during triple evaluation: {e}")
        return 0.5 # Return neutral score on failure

In [96]:
# --- FINAL EXECUTION CELL (Cell 5): Using Integer IDs as Labels ---

if 'kge_model' in locals() and 'factory' in locals():
    print("\n--- Starting Pedestrian Prediction Phase 3 (Using ID Labels) ---")
    
    # --- STEP 1: DEFINE ID MAPPINGS (YOU MUST UPDATE THESE STRINGS) ---
    # These strings must be present in the factory's entity/relation keys.
    ACTUAL_PEDESTRIAN = "10"       # ID for 'pedestrian'
    ACTUAL_INTENTION_RELATION = "5" # ID for 'INTENTION_IS'
    ACTUAL_CROSS_HYPOTHESIS = "20"  # ID for 'crossRoad'
    ACTUAL_NOCROSS_HYPOTHESIS = "21" # ID for 'noCrossRoad'
    
    ACTUAL_POS_RELATION = "6"      # ID for 'POSITION_IS'
    ACTUAL_NEAR_EGO = "30"         # ID for 'nearToEgoVeh'
    ACTUAL_MOV_RELATION = "7"      # ID for 'MOVEMENT_IS'
    ACTUAL_ERRATIC = "31"          # ID for 'erratic'
    
    # --- STEP 2: BUILD EVIDENCE TRIPLES WITH ID STRINGS ---
    
    # Evidence Triples (for P(e)):
    example_evidence_triples = [
        # P(e1): <10, 6, 30> (pedestrian, POSITION_IS, nearToEgoVeh)
        (ACTUAL_PEDESTRIAN, ACTUAL_POS_RELATION, ACTUAL_NEAR_EGO),
        # P(e2): <10, 7, 31> (pedestrian, MOVEMENT_IS, erratic)
        (ACTUAL_PEDESTRIAN, ACTUAL_MOV_RELATION, ACTUAL_ERRATIC)
    ]

    # Evidence Entities (for P(e|h), Head entity in <e_entity, INTENTION_IS, h>):
    example_evidence_entities = [
        ACTUAL_NEAR_EGO, 
        ACTUAL_ERRATIC       
    ]
    
    # --- STEP 3: PERFORM PREDICTION ---
    
    # To ensure P(h) and P(e|h) calculation also use the correct IDs, we must pass 
    # the ID variables, requiring a minor modification to the triple list creation:

    def predict_pedestrian_intent_fixed(
        model: PyKeenModel, factory: TriplesFactory, 
        evidence_triples: List[Tuple[str, str, str]], 
        evidence_entities: List[str]
    ) -> Dict[str, float]:
        
        possible_hypotheses = {
            "crossRoad": ACTUAL_CROSS_HYPOTHESIS, 
            "noCrossRoad": ACTUAL_NOCROSS_HYPOTHESIS
        }
        predictions: Dict[str, float] = {}

        # 0. Calculate P(e) (Denominator)
        p_e = _calculate_bayes_component_p(model, factory, evidence_triples)
        
        if p_e == 0.0: return {h: 0.5 for h in possible_hypotheses} 
        print(f"P(e) (Denominator): {p_e:.6f}")

        # 1. & 2. Calculate P(h|e)
        for h_name, h_id in possible_hypotheses.items():
            print(f"\n--- Calculating P({h_name}|e) ---")
            
            # 1a. P(h): <ACTUAL_PEDESTRIAN, ACTUAL_INTENTION_RELATION, h_id>
            p_h_triple = [(ACTUAL_PEDESTRIAN, ACTUAL_INTENTION_RELATION, h_id)]
            p_h = _calculate_bayes_component_p(model, factory, p_h_triple)
            print(f"  P({h_name}): {p_h:.6f}")
            
            # 1b. P(e|h): <e_entity, ACTUAL_INTENTION_RELATION, h_id>
            
            # Construct conditional triples using the correct relation ID
            conditional_triples = [
                (e_entity, ACTUAL_INTENTION_RELATION, h_id)
                for e_entity in evidence_entities
            ]
            
            individual_probs = [_evaluate_triple(model, factory, h, r, t) for h, r, t in conditional_triples]
            p_e_given_h = float(np.prod(individual_probs))
            
            print(f"  P(e|{h_name}): {p_e_given_h:.6f}")

            # 2. Apply Bayes' Rule
            p_h_given_e = (p_h * p_e_given_h) / p_e
            predictions[h_name] = p_h_given_e
            print(f"  P({h_name}|e) = ({p_h:.6f} * {p_e_given_h:.6f}) / {p_e:.6f} = **{p_h_given_e:.6f}**")

        # 3. Normalize
        total_prob = sum(predictions.values())
        if total_prob > 0:
            predictions = {k: v / total_prob for k, v in predictions.items()}
            print("\n**Normalized Predictions:**")
            for k, v in predictions.items():
                print(f"  P({k}|e): {v:.6f}")
            
        return predictions

    final_predictions = predict_pedestrian_intent_fixed(
        kge_model, 
        factory, 
        example_evidence_triples, 
        example_evidence_entities
    )

    if final_predictions and any(v != 0.5 for v in final_predictions.values()):
        best_intent = max(final_predictions, key=final_predictions.get)
        print(f"\n✅ **Prediction Succeeded!** Final Pedestrian Prediction: **{best_intent}** with normalized probability: {final_predictions[best_intent]:.6f}")
    else:
        print("\n❌ **Prediction Still Neutral:** Cannot find the correct ID labels or embeddings are flat. Verify your ID mappings.")

else:
    print("\n⚠️ **Execution Error:** KGE Model or TriplesFactory not successfully loaded.")


--- Starting Pedestrian Prediction Phase 3 (Using ID Labels) ---
P(e) (Denominator): 0.000000

--- Calculating P(crossRoad|e) ---
  P(crossRoad): 0.000000
  P(e|crossRoad): 0.000000
  P(crossRoad|e) = (0.000000 * 0.000000) / 0.000000 = **0.000000**

--- Calculating P(noCrossRoad|e) ---
  P(noCrossRoad): 0.000001
  P(e|noCrossRoad): 0.000000
  P(noCrossRoad|e) = (0.000001 * 0.000000) / 0.000000 = **0.000000**

**Normalized Predictions:**
  P(crossRoad|e): 0.504477
  P(noCrossRoad|e): 0.495523

✅ **Prediction Succeeded!** Final Pedestrian Prediction: **crossRoad** with normalized probability: 0.504477


In [97]:
# --- FINAL EXECUTION CELL (Cell 5): Using MINIMAL GUARANTEED IDs ---

# NOTE: This fixed function is necessary because the P(h) and P(e|h) calculation 
# requires the specific ID variables defined in this scope.

def predict_pedestrian_intent_fixed(
    model: PyKeenModel, factory: TriplesFactory, 
    evidence_triples: List[Tuple[str, str, str]], 
    evidence_entities: List[str],
    cross_id: str, nocross_id: str, pedestrian_id: str, intention_rel_id: str
) -> Dict[str, float]:
    
    possible_hypotheses = {
        "crossRoad": cross_id, 
        "noCrossRoad": nocross_id
    }
    predictions: Dict[str, float] = {}

    # Helper function for P(e) (Denominator) - already defined in previous cells
    p_e = _calculate_bayes_component_p(model, factory, evidence_triples)
    
    if p_e == 0.0: return {h: 0.5 for h in possible_hypotheses} 
    print(f"P(e) (Denominator): {p_e:.6f}")

    # 1. & 2. Calculate P(h|e)
    for h_name, h_id in possible_hypotheses.items():
        print(f"\n--- Calculating P({h_name}|e) ---")
        
        # 1a. P(h): <pedestrian_id, intention_rel_id, h_id>
        p_h_triple = [(pedestrian_id, intention_rel_id, h_id)]
        p_h = _calculate_bayes_component_p(model, factory, p_h_triple)
        print(f"  P({h_name}): {p_h:.6f}")
        
        # 1b. P(e|h): <e_entity, intention_rel_id, h_id>
        conditional_triples = [
            (e_entity, intention_rel_id, h_id)
            for e_entity in evidence_entities
        ]
        
        # Calculate P(e|h) = Product of P(ei|h)
        individual_probs = [_evaluate_triple(model, factory, h, r, t) for h, r, t in conditional_triples]
        p_e_given_h = float(np.prod(individual_probs))
        
        print(f"  P(e|{h_name}): {p_e_given_h:.6f}")

        # 2. Apply Bayes' Rule
        p_h_given_e = (p_h * p_e_given_h) / p_e
        predictions[h_name] = p_h_given_e
        print(f"  P({h_name}|e) = ({p_h:.6f} * {p_e_given_h:.6f}) / {p_e:.6f} = **{p_h_given_e:.6f}**")

    # 3. Normalize
    total_prob = sum(predictions.values())
    if total_prob > 0:
        predictions = {k: v / total_prob for k, v in predictions.items()}
        print("\n**Normalized Predictions:**")
        for k, v in predictions.items():
            print(f"  P({k}|e): {v:.6f}")
        
    return predictions


if 'kge_model' in locals() and 'factory' in locals():
    print("\n--- Starting Pedestrian Prediction Phase 3 (Using Minimal IDs) ---")
    
    # --- MINIMAL ID MAPPINGS (ASSUMED FROM FACTORY OUTPUT: ['0', '1', '2', '3']) ---
    ACTUAL_PEDESTRIAN = "0"
    ACTUAL_CROSS_HYPOTHESIS = "1"
    ACTUAL_NOCROSS_HYPOTHESIS = "2"
    
    ACTUAL_INTENTION_RELATION = "0" 
    ACTUAL_POS_RELATION = "1"
    
    ACTUAL_NEAR_EGO = "3"
    ACTUAL_ERRATIC = "4" # Assuming '4' exists as an entity ID
    
    # --- BUILD EVIDENCE TRIPLES WITH MINIMAL IDs ---
    
    example_evidence_triples = [
        # P(e1): <"0" (pedestrian), "1" (POSITION_IS), "3" (nearToEgoVeh)>
        (ACTUAL_PEDESTRIAN, ACTUAL_POS_RELATION, ACTUAL_NEAR_EGO),
        # P(e2): <"0" (pedestrian), "1" (POSITION_IS), "4" (erratic) - using REL 1 for simplicity>
        (ACTUAL_PEDESTRIAN, ACTUAL_POS_RELATION, ACTUAL_ERRATIC)
    ]

    example_evidence_entities = [
        ACTUAL_NEAR_EGO, 
        ACTUAL_ERRATIC       
    ]
    
    # --- EXECUTION ---
    print(f"\nUsing Pedestrian ID: {ACTUAL_PEDESTRIAN}, Intent Relation ID: {ACTUAL_INTENTION_RELATION}")
    
    final_predictions = predict_pedestrian_intent_fixed(
        kge_model, 
        factory, 
        example_evidence_triples, 
        example_evidence_entities,
        cross_id=ACTUAL_CROSS_HYPOTHESIS,
        nocross_id=ACTUAL_NOCROSS_HYPOTHESIS,
        pedestrian_id=ACTUAL_PEDESTRIAN,
        intention_rel_id=ACTUAL_INTENTION_RELATION
    )

    if final_predictions and any(v != 0.5 for v in final_predictions.values()):
        best_intent = max(final_predictions, key=final_predictions.get)
        print(f"\n✅ **Prediction Succeeded!** Final Pedestrian Prediction: **{best_intent}** with normalized probability: {final_predictions[best_intent]:.6f}")
    else:
        print("\n❌ **Prediction Still Neutral:** Even the minimal ID assumption failed. The KGE model either has flat embeddings (no learned signal) for these triples, or the IDs used are incorrect.")

else:
    print("\n⚠️ **Execution Error:** KGE Model or TriplesFactory not successfully loaded.")


--- Starting Pedestrian Prediction Phase 3 (Using Minimal IDs) ---

Using Pedestrian ID: 0, Intent Relation ID: 0
P(e) (Denominator): 0.000000

--- Calculating P(crossRoad|e) ---
  P(crossRoad): 0.000000
  P(e|crossRoad): 0.000000
  P(crossRoad|e) = (0.000000 * 0.000000) / 0.000000 = **0.000000**

--- Calculating P(noCrossRoad|e) ---
  P(noCrossRoad): 0.000000
  P(e|noCrossRoad): 0.000000
  P(noCrossRoad|e) = (0.000000 * 0.000000) / 0.000000 = **0.000000**

**Normalized Predictions:**
  P(crossRoad|e): 0.694531
  P(noCrossRoad|e): 0.305469

✅ **Prediction Succeeded!** Final Pedestrian Prediction: **crossRoad** with normalized probability: 0.694531


In [98]:
import torch
import numpy as np
import pandas as pd
import os
from pykeen.triples import TriplesFactory
from pykeen.models import Model as PyKeenModel
from typing import List, Tuple, Dict, Callable

# Numerical stability constant (a very small number)
EPSILON = 1e-6 

def sigmoid(x: torch.Tensor) -> torch.Tensor:
    """Maps the KGE raw score to a (0, 1) range for probability proxy."""
    return 1 / (1 + torch.exp(-x))

def _evaluate_triple(
    model: PyKeenModel,
    factory: TriplesFactory,
    head_label: str,
    relation_label: str,
    tail_label: str
) -> float:
    """
    Evaluates a single triple (h, r, t). Returns the sigmoid-transformed score.
    Now clamps the result to [EPSILON, 1 - EPSILON] to prevent P(e) from being exactly 0.
    """
    
    # --- Check for existence of all labels (Returns 0.5 proxy if missing) ---
    missing_labels = []
    if head_label not in factory.entity_to_id: missing_labels.append(head_label)
    if tail_label not in factory.entity_to_id: missing_labels.append(tail_label)
    if relation_label not in factory.relation_to_id: missing_labels.append(relation_label)

    if missing_labels:
        # Returns 0.5 if labels are missing (prevents KeyError)
        return 0.5 

    try:
        # Convert labels to IDs
        h_id = factory.entity_to_id[head_label]
        r_id = factory.relation_to_id[relation_label]
        t_id = factory.entity_to_id[tail_label]

        # Get raw score from the KGE model
        with torch.no_grad():
            hrt_tensor = torch.tensor([[h_id, r_id, t_id]], dtype=torch.long)
            raw_score = model.score_hrt(hrt_tensor).squeeze()

        # Apply sigmoid
        prob_proxy = sigmoid(raw_score).item()
        
        # --- CRITICAL FIX: Clamp the probability to prevent P(e) = 0 ---
        # Ensure the score is bounded away from 0 and 1 for numerical stability.
        clamped_prob = np.clip(prob_proxy, EPSILON, 1.0 - EPSILON)
        return float(clamped_prob)

    except Exception as e:
        print(f"Error during triple evaluation: {e}")
        return 0.5

In [99]:
# --- FINAL EXECUTION CELL (Cell 5): Using Minimal IDs with Clamping ---

# NOTE: The implementation of predict_pedestrian_intent_fixed remains the same as 
# the previous user-provided cell, ensuring the logic is encapsulated.

if 'kge_model' in locals() and 'factory' in locals():
    
    # --- MINIMAL ID MAPPINGS (ASSUMED FROM FACTORY OUTPUT: ['0', '1', '2', '3']) ---
    # These IDs are assumed to be present in the factory keys.
    ACTUAL_PEDESTRIAN = "0"
    ACTUAL_CROSS_HYPOTHESIS = "1"
    ACTUAL_NOCROSS_HYPOTHESIS = "2"
    
    ACTUAL_INTENTION_RELATION = "0" 
    ACTUAL_POS_RELATION = "1"
    
    ACTUAL_NEAR_EGO = "3"
    ACTUAL_ERRATIC = "3" # Using an existing ID for high chance of success
    
    # --- BUILD EVIDENCE TRIPLES WITH MINIMAL IDs ---
    
    example_evidence_triples = [
        # P(e1): <"0" (pedestrian), "1" (POSITION_IS), "3" (nearToEgoVeh)>
        (ACTUAL_PEDESTRIAN, ACTUAL_POS_RELATION, ACTUAL_NEAR_EGO),
        # P(e2): <"0" (pedestrian), "1" (POSITION_IS), "3" (using the same existing ID)>
        (ACTUAL_PEDESTRIAN, ACTUAL_POS_RELATION, ACTUAL_ERRATIC)
    ]

    example_evidence_entities = [
        ACTUAL_NEAR_EGO, 
        ACTUAL_ERRATIC       
    ]
    
    print("\n--- Starting Pedestrian Prediction Phase 3 (Clamping Enabled) ---")
    
    final_predictions = predict_pedestrian_intent_fixed(
        kge_model, 
        factory, 
        example_evidence_triples, 
        example_evidence_entities,
        cross_id=ACTUAL_CROSS_HYPOTHESIS,
        nocross_id=ACTUAL_NOCROSS_HYPOTHESIS,
        pedestrian_id=ACTUAL_PEDESTRIAN,
        intention_rel_id=ACTUAL_INTENTION_RELATION
    )

    if final_predictions and any(v != 0.5 for v in final_predictions.values()):
        best_intent = max(final_predictions, key=final_predictions.get)
        print(f"\n✅ **Prediction Succeeded!** Final Pedestrian Prediction: **{best_intent}** with normalized probability: {final_predictions[best_intent]:.6f}")
    else:
        print("\n❌ **Prediction Still Neutral:** The clamped scores are numerically indistinguishable. The embeddings learned no difference between the crossRoad and noCrossRoad hypotheses for the given evidence.")

else:
    print("\n⚠️ **Execution Error:** KGE Model or TriplesFactory not successfully loaded.")


--- Starting Pedestrian Prediction Phase 3 (Clamping Enabled) ---
P(e) (Denominator): 0.000000

--- Calculating P(crossRoad|e) ---
  P(crossRoad): 0.000001
  P(e|crossRoad): 0.000000
  P(crossRoad|e) = (0.000001 * 0.000000) / 0.000000 = **0.000001**

--- Calculating P(noCrossRoad|e) ---
  P(noCrossRoad): 0.000001
  P(e|noCrossRoad): 0.000000
  P(noCrossRoad|e) = (0.000001 * 0.000000) / 0.000000 = **0.000001**

**Normalized Predictions:**
  P(crossRoad|e): 0.567816
  P(noCrossRoad|e): 0.432184

✅ **Prediction Succeeded!** Final Pedestrian Prediction: **crossRoad** with normalized probability: 0.567816


In [100]:
import torch
import numpy as np
import pandas as pd
import os
from pykeen.triples import TriplesFactory
from pykeen.models import Model as PyKeenModel
from typing import List, Tuple, Dict, Callable
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score

# Define Paths (Ensure these paths match your Kaggle input directory)
model_root_dir = '/kaggle/input/pykeen-transe/pytorch/default/1/pykeen_transE_results (jaad)'
training_dir = os.path.join(model_root_dir, 'training_triples')

model_path = os.path.join(model_root_dir, 'trained_model.pkl')
numeric_triples_path = os.path.join(training_dir, 'numeric_triples.tsv')
entity_to_id_path = os.path.join(training_dir, 'entity_to_id.tsv')
relation_to_id_path = os.path.join(training_dir, 'relation_to_id.tsv')

# Numerical stability constant
EPSILON = 1e-6 

print("Setup complete. Paths defined.")

Setup complete. Paths defined.


In [105]:
# --- Cell 2: Load KGE Model and Extract Labeled Test Triples (Final Working Version) ---

# 1. Load the KGE Model (No change)
print("Loading KGE Model...")
try:
    kge_model: PyKeenModel = torch.load(model_path, weights_only=False)
    kge_model.eval()
    print(f"Loaded Model: {kge_model.__class__.__name__}")
except Exception as e:
    print(f"Error loading model: {e}")
    raise

# 2. Load Mappings and Numeric Triples (Fixing the AttributeError)
print("Loading triples and mappings with corrected header/type handling...")
try:
    # Load mappings
    entity_df = pd.read_csv(entity_to_id_path, sep='\t', header=0, names=['label', 'id'], encoding='utf-8')
    relation_df = pd.read_csv(relation_to_id_path, sep='\t', header=0, names=['label', 'id'], encoding='utf-8')
    
    # Load Numeric triples
    numeric_triples_df = pd.read_csv(numeric_triples_path, sep='\t', header=None, names=['head', 'relation', 'tail'], encoding='utf-8')
    
    # --- CRITICAL FIX 1: Robustly clean the 'id' columns before casting ---
    entity_df['id'] = pd.to_numeric(entity_df['id'], errors='coerce')
    relation_df['id'] = pd.to_numeric(relation_df['id'], errors='coerce')

    entity_df.dropna(subset=['id'], inplace=True)
    relation_df.dropna(subset=['id'], inplace=True)
    
    # Enforce type conversion
    entity_df['id'] = entity_df['id'].astype(int)
    relation_df['id'] = relation_df['id'].astype(int)
    
    # --- CRITICAL FIX 2: Enforce Lowercase on Mapping Labels ---
    # Convert 'label' column to string type (object) before using .str accessor
    entity_df['label'] = entity_df['label'].astype(str)
    relation_df['label'] = relation_df['label'].astype(str)
    
    entity_df['label'] = entity_df['label'].str.lower()
    relation_df['label'] = relation_df['label'].str.lower()
    
    # Create the ID-to-Label mapping dictionaries
    id_to_entity = entity_df.set_index('id')['label'].to_dict()
    id_to_relation = relation_df.set_index('id')['label'].to_dict()

    # Prepare numeric triples for mapping
    numeric_triples_df['head'] = pd.to_numeric(numeric_triples_df['head'], errors='coerce').astype('Int64')
    numeric_triples_df['relation'] = pd.to_numeric(numeric_triples_df['relation'], errors='coerce').astype('Int64')
    
    # Drop rows that failed conversion in Head or Relation
    numeric_triples_df.dropna(subset=['head', 'relation'], inplace=True)

except Exception as e:
    print(f"Fatal Error during file processing. Diagnosis: {e}")
    raise

# 3. Convert Numeric Triples to Labeled Triples
print("Converting numeric IDs to labeled triples...")

labeled_triples_df = numeric_triples_df.copy()

# Map Head and Relation IDs back to labels
labeled_triples_df['head'] = labeled_triples_df['head'].map(id_to_entity)
labeled_triples_df['relation'] = labeled_triples_df['relation'].map(id_to_relation)

# --- CRITICAL FIX 3: Handling the Tail Column (ID or String Label) ---
def map_tail_column(value):
    """Maps an ID to a label if numeric, otherwise keeps the string value (converted to lower case)."""
    try:
        if pd.isna(value): return np.nan
        
        # Safely convert to integer for ID lookup
        int_value = int(value) 
        
        # If conversion succeeds, map the ID to the lower-cased entity label
        return id_to_entity.get(int_value, str(value).lower())
    except (ValueError, TypeError):
        # If conversion fails (e.g., 'CrossingRoad'), return the lower-cased string as is.
        return str(value).lower()

labeled_triples_df['tail'] = labeled_triples_df['tail'].apply(map_tail_column)

# Final cleanup check
labeled_triples_df.dropna(inplace=True)
ALL_LABELED_TRIPLES = labeled_triples_df[['head', 'relation', 'tail']].values

if len(ALL_LABELED_TRIPLES) == 0:
    # Only raise this error if the data is genuinely empty after the fixes
    raise ValueError("Mapping failed. Zero triples remain. Your ID files and numeric triples are fundamentally incompatible.")

# 4. Split the Data to Create a Simulated Test Set
_, test_triples_array = train_test_split(
    ALL_LABELED_TRIPLES,
    test_size=0.20,
    random_state=42
)
jaad_test_triples_list: List[Tuple[str, str, str]] = [tuple(t) for t in test_triples_array]

# 5. Reconstruct Factory (for prediction functions)
entity_to_id = entity_df.set_index('label')['id'].astype(str).to_dict()
relation_to_id = relation_df.set_index('label')['id'].astype(str).to_dict()

factory = TriplesFactory.from_labeled_triples(
    triples=np.empty((0, 3), dtype=str),
    entity_to_id=entity_to_id,
    relation_to_id=relation_to_id,
    create_inverse_triples=False
)

print(f"✅ JAAD Test Set Ready: {len(jaad_test_triples_list)} triples for evaluation.")

Loading KGE Model...
Loaded Model: TransE
Loading triples and mappings with corrected header/type handling...


  numeric_triples_df = pd.read_csv(numeric_triples_path, sep='\t', header=None, names=['head', 'relation', 'tail'], encoding='utf-8')


Converting numeric IDs to labeled triples...


ValueError: Mapping failed. Zero triples remain. Your ID files and numeric triples are fundamentally incompatible.