# Model Demo

Inputs must include the entity delimiters "[E1S]" and "[E1E]" around occurences of the first entity, and "[E2S]" and "[E2E]" around occurences of the second entity.

In [1]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Set model paths
MODEL_PATH = "cnn/cnn_model.h5"
TOKENIZER_PATH = "cnn/cnn_tokenizer.json"
RELATION_TO_DESCRIPTION_PATH = "../dataset/pid2name_filtered.json"

# Load relation to description mapping
with open(RELATION_TO_DESCRIPTION_PATH, "r") as f:
    relation_to_desc = json.load(f)

# Create ID to relation mapping
relation_id_map = {relation: idx for idx, relation in enumerate(sorted(relation_to_desc.keys()))}
id_relation_map = {idx: relation for relation, idx in relation_id_map.items()}

# Load trained tokenizer
with open(TOKENIZER_PATH, "r") as f:
    word_index = json.load(f)

# Load tokenizer
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000, oov_token="<OOV>", filters="")
tokenizer.word_index = word_index

# Load trained model
print(f"Loading model from {MODEL_PATH}")
model = tf.keras.models.load_model(MODEL_PATH)
model.summary()



Loading model from cnn/cnn_model.h5


In [2]:
def predict_relation(input_text, top_k=10):
    """
    Predicts the relation between entities in the input sentence.
    Args:
        input_text (str): The input sentence containing entity markers.
        top_k (int): Number of top predictions to display.
    Returns:
        None (prints results)
    """
    sequence = tokenizer.texts_to_sequences([input_text])
    sequence_padded = pad_sequences(sequence, maxlen=100, padding="post")

    # Extract first row from batch
    predictions = model.predict(sequence_padded)[0]

    # Get top-k relation IDs and probabilities (sorted descending)
    top_indices = np.argsort(predictions)[-top_k:][::-1]

    top_probs = predictions[top_indices] * 100

    # Print results
    print("\nPredictions:")
    for i in range(top_k):
        relation_id = top_indices[i]
        relation_name, relation_desc = relation_to_desc[id_relation_map[relation_id]]
        confidence = top_probs[i]
        print(f"{confidence:.2f}% - {relation_name} ({relation_desc})")

In [3]:
while True:
    inp = input("\nInput (press Enter to exit): ")
    if inp.strip() == "":
        break

    # Predict and display results
    predict_relation(inp)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step

Predictions:
83.52% - member of (organization or club to which the subject belongs. Do not use for membership in ethnic or social groups, nor for holding a position such as a member of parliament (use P39 for that).)
10.93% - platform (platform for which a work was developed or released, or the specific platform version of a software product)
1.58% - sport (sport in which the subject participates or belongs to)
1.29% - operating system (operating system (OS) on which a software works or the OS installed on hardware)
0.55% - sports season of league or competition (property that shows the competition of which the item is a season. Use P5138 for "season of club or team".)
0.48% - competition class (official classification by a regulating body under which the subject (events, teams, participants, or equipment) qualifies for inclusion)
0.30% - member of political party (the political party of which this politician is o