In [None]:
!pip install -U datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
  Attempting uninstall: datasets
    Found existing installation: datasets 2.14.4
    Uninstalling datasets-2.14.4:
      Successfully uninstalled datasets-2.14.4
[31mERROR: pip's dependency r

In [None]:
!pip install pykeen

Collecting pykeen
  Downloading pykeen-1.11.1-py3-none-any.whl.metadata (85 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/85.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.9/85.9 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dataclasses-json (from pykeen)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting click_default_group (from pykeen)
  Downloading click_default_group-1.2.4-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting optuna>=2.0.0 (from pykeen)
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting more_click (from pykeen)
  Downloading more_click-0.1.2-py3-none-any.whl.metadata (4.3 kB)
Collecting pystow>=0.4.3 (from pykeen)
  Downloading pystow-0.7.0-py3-none-any.whl.metadata (17 kB)
Collecting docdata>=0.0.5 (from pykeen)
  Downloading docdata-0.0.5-py3-none-any.whl.metadata (13 kB)
Collecting class_resolver>=0.6.0 (from 

In [None]:
import pandas as pd
import numpy as np
from datasets import load_dataset
from sklearn.linear_model import LogisticRegression # Kept for reference, but not used for main models
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, f1_score
import time # To measure training times
import torch # PyKEEN uses PyTorch
import os # For creating directories

# TensorFlow / Keras imports for LSTMs
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Bidirectional, Dense, Concatenate, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

# PyKEEN imports
from pykeen.pipeline import pipeline
from pykeen.triples import TriplesFactory
from pykeen.models import TransE, RotatE, DistMult, ComplEx

# --- 0. Configuration & Setup ---
MODEL_SAVE_DIR = "saved_pykeen_models"
if not os.path.exists(MODEL_SAVE_DIR):
    os.makedirs(MODEL_SAVE_DIR)
    print(f"Created directory: {MODEL_SAVE_DIR}")

# Text Preprocessing Config
MAX_NUM_WORDS = 20000  # Max words for tokenizer
MAX_SEQUENCE_LENGTH = 100 # Max length of text sequences
WORD_EMBEDDING_DIM = 100 # Dimension for word embeddings in LSTM/BiLSTM

# KGE Config
KGE_EMBEDDING_DIM = 100 # Dimension for KGE statement embeddings
KGE_EPOCHS = 30         # Epochs for KGE model training (low for demo)

# Neural Network Classifier Config
NN_EPOCHS = 10          # Epochs for training LSTM/BiLSTM models (low for demo)
NN_BATCH_SIZE = 64

# --- 1. Load LIAR Dataset ---
print("Step 1: Loading LIAR Dataset from Hugging Face...")
try:
    liar_dataset = load_dataset('liar', download_mode="force_redownload")
    train_hf = liar_dataset['train']
    valid_hf = liar_dataset['validation']
    test_hf = liar_dataset['test']

    train_df = pd.DataFrame(train_hf)
    valid_df = pd.DataFrame(valid_hf) # Will be used for NN validation
    test_df = pd.DataFrame(test_hf)
    print(f"Loaded {len(train_df)} training, {len(valid_df)} validation, and {len(test_df)} test statements.")
except Exception as e:
    print(f"Error loading dataset from Hugging Face: {e}")
    exit()

# --- 2. Prepare Text Data for LSTMs ---
print("\nStep 2: Preparing text data for LSTM/BiLSTM models...")
train_texts = train_df['statement'].astype(str).values
valid_texts = valid_df['statement'].astype(str).values
test_texts = test_df['statement'].astype(str).values

tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, oov_token="<unk>")
tokenizer.fit_on_texts(train_texts)

train_sequences = tokenizer.texts_to_sequences(train_texts)
valid_sequences = tokenizer.texts_to_sequences(valid_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)

word_index = tokenizer.word_index
print(f"Found {len(word_index)} unique tokens.")

X_train_text_pad = pad_sequences(train_sequences, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')
X_valid_text_pad = pad_sequences(valid_sequences, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')
X_test_text_pad = pad_sequences(test_sequences, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')

print(f"Shape of padded training text sequences: {X_train_text_pad.shape}")
print(f"Shape of padded validation text sequences: {X_valid_text_pad.shape}")
print(f"Shape of padded test text sequences: {X_test_text_pad.shape}")


# --- 3. Construct Knowledge Graph Triplets (as before) ---
print("\nStep 3: Constructing Knowledge Graph triplets...")
combined_df_for_kg = pd.concat([train_df, valid_df, test_df], ignore_index=True) # Use all data for KG vocab
kg_triplets_list = []

def normalize_entity(entity_str, prefix=""):
    if pd.isna(entity_str) or not isinstance(entity_str, str) or entity_str.strip() == "":
        return f"{prefix}unknown" if prefix else "unknown"
    normalized_str = entity_str.lower().replace(" ", "_").replace("-", "_").replace("'", "").replace(".", "").replace(",", "_").replace(":", "").replace(";", "").strip()
    return prefix + normalized_str if prefix else normalized_str

def discretize_count(count):
    if pd.isna(count) or count == 0: return "none"
    elif count <= 2: return "low"
    elif count <= 10: return "medium"
    else: return "high"

for _, row in combined_df_for_kg.iterrows():
    stmt_id_entity = normalize_entity(str(row['id']), "stmt_")
    speaker_entity = normalize_entity(row['speaker'], "spkr_")
    if speaker_entity != "spkr_unknown": kg_triplets_list.append((stmt_id_entity, 'has_speaker', speaker_entity))
    subject_entity_str = str(row['subject'])
    if pd.notna(subject_entity_str) and subject_entity_str.strip() != "":
        for sub in subject_entity_str.split(','):
            norm_sub = normalize_entity(sub.strip(), "subj_")
            if norm_sub != "subj_unknown" and norm_sub != "subj_": kg_triplets_list.append((stmt_id_entity, 'has_subject', norm_sub))
    context_entity = normalize_entity(row['context'], "ctx_")
    if context_entity != "ctx_unknown": kg_triplets_list.append((stmt_id_entity, 'stated_in_context', context_entity))
    if speaker_entity != "spkr_unknown":
        for col, rel_name_base in {
            'job_title': 'has_job_title', 'state_info': 'from_state', 'party_affiliation': 'affiliated_with_party'
        }.items():
            val_entity = normalize_entity(row.get(col), f"{col.split('_')[0]}_")
            if val_entity != f"{col.split('_')[0]}_unknown": kg_triplets_list.append((speaker_entity, rel_name_base, val_entity))
        for col, rel_name in {
            'barely_true_counts': 'has_barely_true_profile', 'false_counts': 'has_false_profile',
            'half_true_counts': 'has_half_true_profile', 'mostly_true_counts': 'has_mostly_true_profile',
            'pants_on_fire_counts': 'has_pants_fire_profile'
        }.items():
            if col in row and pd.notna(row[col]):
                profile_entity = normalize_entity(f"profile_{discretize_count(row[col])}", "ch_")
                kg_triplets_list.append((speaker_entity, rel_name, profile_entity))

kg_triplets_np = np.array(list(set(kg_triplets_list)), dtype=str)
if kg_triplets_np.shape[0] == 0: print("No triplets generated. Exiting."); exit()
print(f"Generated {len(kg_triplets_np)} unique KG triplets.")
tf_kg = TriplesFactory.from_labeled_triples(kg_triplets_np)


# --- 4. Prepare Labels for Neural Network (One-Hot Encoded) ---
y_train_labels_orig = train_df['label'].values
y_valid_labels_orig = valid_df['label'].values
y_test_labels_orig = test_df['label'].values

num_classes = len(np.unique(np.concatenate((y_train_labels_orig, y_valid_labels_orig, y_test_labels_orig))))
print(f"Number of classes: {num_classes}")

y_train_nn = to_categorical(y_train_labels_orig, num_classes=num_classes)
y_valid_nn = to_categorical(y_valid_labels_orig, num_classes=num_classes)
y_test_nn = to_categorical(y_test_labels_orig, num_classes=num_classes)

label_mapping = {0: 'pants-fire', 1: 'false', 2: 'barely-true', 3: 'half-true', 4: 'mostly-true', 5: 'true'}
class_names_report = [label_mapping.get(i, f'unknown_{i}') for i in range(num_classes)]


# --- 5. Loop Through KGE Models, Train KGE, Extract KGE Embeddings, Build & Train Hybrid NN ---
kge_models_to_test_pykeen = {
    "TransE": TransE, "RotatE": RotatE, "DistMult": DistMult, "ComplEx": ComplEx
}
results_comparison = {}
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"PyKEEN will use device: {device}")

for kge_model_name, KGEModelClass_pykeen in kge_models_to_test_pykeen.items():
    print(f"\n===== Processing KGE Model: {kge_model_name} (PyKEEN) =====")
    start_time_kge = time.time()
    print(f"Training {kge_model_name} with k={KGE_EMBEDDING_DIM}, epochs={KGE_EPOCHS}...")

    pipeline_kwargs = dict(model=KGEModelClass_pykeen, model_kwargs=dict(embedding_dim=KGE_EMBEDDING_DIM),
                           training_kwargs=dict(num_epochs=KGE_EPOCHS, batch_size=2048, use_tqdm_batch=False), # Increased batch size
                           random_seed=42, device=device)
    if kge_model_name == "RotatE": pipeline_kwargs.update({'loss': 'MarginRankingLoss', 'loss_kwargs': {'margin': 1.0}, 'optimizer_kwargs': {'lr': 0.00005}})
    elif kge_model_name == "ComplEx": pipeline_kwargs.update({'loss': 'NegativeLogLikelihood', 'optimizer_kwargs': {'lr': 0.0001}})
    elif kge_model_name == "TransE": pipeline_kwargs.update({'loss': 'MarginRankingLoss', 'loss_kwargs': {'margin': 1.0}, 'optimizer_kwargs': {'lr': 0.001}})
    elif kge_model_name == "DistMult": pipeline_kwargs.update({'loss': 'NegativeLogLikelihood', 'optimizer_kwargs': {'lr': 0.001}})

    try:
        pykeen_result = pipeline(training=tf_kg, testing=tf_kg, **pipeline_kwargs)
        trained_kge_model = pykeen_result.model
        model_save_path = os.path.join(MODEL_SAVE_DIR, f"{kge_model_name}_pykeen_model.pt")
        torch.save(trained_kge_model, model_save_path)
        print(f"Saved {kge_model_name} model to {model_save_path}")
    except Exception as e:
        print(f"Error training/saving {kge_model_name} with PyKEEN: {e}. Skipping.")
        results_comparison[f"{kge_model_name}_LSTM"] = {'accuracy': 'KGE Error', 'f1_macro': 'KGE Error', 'report': str(e)}
        results_comparison[f"{kge_model_name}_BiLSTM"] = {'accuracy': 'KGE Error', 'f1_macro': 'KGE Error', 'report': str(e)}
        continue
    kge_training_time = time.time() - start_time_kge
    print(f"{kge_model_name} (PyKEEN) training complete. Time: {kge_training_time:.2f}s.")

    # Extract KGE Embeddings
    entity_embedding_layer = trained_kge_model.entity_representations[0]
    raw_embeddings_tensor = entity_embedding_layer(indices=None).detach().cpu()
    kge_feature_dim_for_nn = KGE_EMBEDDING_DIM
    if raw_embeddings_tensor.is_complex():
        print(f"  {kge_model_name} complex embeddings: concatenating real/imag parts.")
        entity_representations_np = np.concatenate((raw_embeddings_tensor.real.numpy(), raw_embeddings_tensor.imag.numpy()), axis=1)
        kge_feature_dim_for_nn = KGE_EMBEDDING_DIM * 2
    else:
        entity_representations_np = raw_embeddings_tensor.numpy()
        kge_feature_dim_for_nn = entity_representations_np.shape[1]

    entity_to_embedding_pykeen = {label: entity_representations_np[id_] for id_, label in tf_kg.entity_id_to_label.items()}
    default_kge_embedding = np.zeros(kge_feature_dim_for_nn)

    def get_statement_kge_embeddings(df, embeddings_dict):
        return np.array([embeddings_dict.get(normalize_entity(str(id_val), "stmt_"), default_kge_embedding) for id_val in df['id']])

    X_train_kge = get_statement_kge_embeddings(train_df, entity_to_embedding_pykeen)
    X_valid_kge = get_statement_kge_embeddings(valid_df, entity_to_embedding_pykeen)
    X_test_kge = get_statement_kge_embeddings(test_df, entity_to_embedding_pykeen)
    print(f"Shape of KGE embeddings for NN training: {X_train_kge.shape}")


    # Loop for LSTM and BiLSTM variants
    for nn_type in ["LSTM", "BiLSTM"]:
        print(f"\n--- Training Hybrid {nn_type} model with {kge_model_name} embeddings ---")
        start_time_nn = time.time()

        # Define Model Architecture
        text_input = Input(shape=(MAX_SEQUENCE_LENGTH,), name='text_input')
        word_embed_layer = Embedding(input_dim=min(MAX_NUM_WORDS, len(word_index) + 1),
                                     output_dim=WORD_EMBEDDING_DIM,
                                     input_length=MAX_SEQUENCE_LENGTH,
                                     name='word_embedding')(text_input)

        if nn_type == "LSTM":
            text_features = LSTM(128, dropout=0.2, recurrent_dropout=0.2, name='lstm_layer')(word_embed_layer)
        else: # BiLSTM
            text_features = Bidirectional(LSTM(64, dropout=0.2, recurrent_dropout=0.2), name='bilstm_layer')(word_embed_layer) # Halved units for BiLSTM

        kge_input = Input(shape=(kge_feature_dim_for_nn,), name='kge_input')

        concatenated_features = Concatenate(name='concat_layer')([text_features, kge_input])

        dense_layer = Dense(128, activation='relu', name='dense_1')(concatenated_features)
        dropout_layer = Dropout(0.5, name='dropout_1')(dense_layer)
        output_layer = Dense(num_classes, activation='softmax', name='output_layer')(dropout_layer)

        hybrid_model = Model(inputs=[text_input, kge_input], outputs=output_layer)
        hybrid_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        # hybrid_model.summary() # Optional: print model summary

        print(f"Fitting Hybrid {nn_type} + {kge_model_name} model...")
        history = hybrid_model.fit(
            [X_train_text_pad, X_train_kge], y_train_nn,
            validation_data=([X_valid_text_pad, X_valid_kge], y_valid_nn),
            epochs=NN_EPOCHS,
            batch_size=NN_BATCH_SIZE,
            verbose=1 # Set to 1 or 2 for training progress
        )

        nn_training_time = time.time() - start_time_nn
        print(f"Hybrid {nn_type} + {kge_model_name} training complete. Time: {nn_training_time:.2f}s.")

        # Evaluate
        loss, accuracy = hybrid_model.evaluate([X_test_text_pad, X_test_kge], y_test_nn, verbose=0)
        y_pred_proba_nn = hybrid_model.predict([X_test_text_pad, X_test_kge])
        y_pred_nn = np.argmax(y_pred_proba_nn, axis=1)

        f1_macro_nn = f1_score(y_test_labels_orig, y_pred_nn, average='macro', zero_division=0)
        report_nn = classification_report(y_test_labels_orig, y_pred_nn, target_names=class_names_report, zero_division=0)

        current_model_key = f"{kge_model_name}_{nn_type}"
        results_comparison[current_model_key] = {
            'accuracy': accuracy, 'f1_macro': f1_macro_nn, 'report': report_nn,
            'training_time_kge': kge_training_time if nn_type == "LSTM" else 0, # Avoid double counting KGE time
            'training_time_nn': nn_training_time
        }
        print(f"Results for {current_model_key}: Accuracy={accuracy:.4f}, F1-Macro={f1_macro_nn:.4f}")
        tf.keras.backend.clear_session() # Clear Keras session to free memory

# --- 6. Compare All Results ---
print("\n\n--- Overall Results Comparison (KGE + LSTM/BiLSTM) ---")
for model_name_key, metrics in results_comparison.items():
    print(f"\n--- {model_name_key} ---")
    if isinstance(metrics['accuracy'], str) and "Error" in metrics['accuracy']:
        print(f"  Error: {metrics['report']}")
    else:
        if metrics.get('training_time_kge', 0) > 0: # Print KGE time only once per KGE model
             print(f"  KGE Training Time (common for LSTM/BiLSTM): {metrics['training_time_kge']:.2f} seconds")
        print(f"  NN Training Time: {metrics['training_time_nn']:.2f} seconds")
        print(f"  Test Accuracy: {metrics['accuracy']:.4f}")
        print(f"  Test F1-Score (Macro): {metrics['f1_macro']:.4f}")
        print("  Classification Report on Test Set:\n", metrics['report'])

summary_data = []
for model_name_key, metrics in results_comparison.items():
    acc = metrics.get('accuracy', 'N/A')
    f1 = metrics.get('f1_macro', 'N/A')
    summary_data.append({
        'Model Configuration': model_name_key,
        'Accuracy': f"{acc:.4f}" if isinstance(acc, float) else acc,
        'F1 (Macro)': f"{f1:.4f}" if isinstance(f1, float) else f1,
        'KGE Time (s)': f"{metrics.get('training_time_kge', 0):.2f}" if metrics.get('training_time_kge', 0) > 0 else "-",
        'NN Time (s)': f"{metrics.get('training_time_nn', 0):.2f}"
    })
summary_df = pd.DataFrame(summary_data)
print("\n--- Summary Table (KGE + LSTM/BiLSTM) ---")
try:
    print(summary_df.to_string(index=False))
except AttributeError:
    print(summary_df)

INFO:pykeen.utils:Using opt_einsum


Created directory: saved_pykeen_models
Step 1: Loading LIAR Dataset from Hugging Face...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/5.16k [00:00<?, ?B/s]

liar.py:   0%|          | 0.00/6.41k [00:00<?, ?B/s]

liar.py:   0%|          | 0.00/6.41k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.16k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.01M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/10269 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1283 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1284 [00:00<?, ? examples/s]

Loaded 10269 training, 1284 validation, and 1283 test statements.

Step 2: Preparing text data for LSTM/BiLSTM models...
Found 12348 unique tokens.
Shape of padded training text sequences: (10269, 100)
Shape of padded validation text sequences: (1284, 100)
Shape of padded test text sequences: (1283, 100)

Step 3: Constructing Knowledge Graph triplets...
Generated 77666 unique KG triplets.


INFO:pykeen.pipeline.api:Using device: cuda
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()


Number of classes: 6
PyKEEN will use device: cuda

===== Processing KGE Model: TransE (PyKEEN) =====
Training TransE with k=100, epochs=30...


Training epochs on cuda:0:   0%|          | 0/30 [00:00<?, ?epoch/s]

Evaluating on cuda:0:   0%|          | 0.00/77.7k [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 14.56s seconds


Saved TransE model to saved_pykeen_models/TransE_pykeen_model.pt
TransE (PyKEEN) training complete. Time: 30.25s.
Shape of KGE embeddings for NN training: (10269, 100)

--- Training Hybrid LSTM model with TransE embeddings ---
Fitting Hybrid LSTM + TransE model...
Epoch 1/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 250ms/step - accuracy: 0.1902 - loss: 1.7757 - val_accuracy: 0.2079 - val_loss: 1.7670
Epoch 2/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 246ms/step - accuracy: 0.2043 - loss: 1.7582 - val_accuracy: 0.2002 - val_loss: 1.7683
Epoch 3/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 249ms/step - accuracy: 0.2123 - loss: 1.7504 - val_accuracy: 0.1916 - val_loss: 1.7647
Epoch 4/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 249ms/step - accuracy: 0.2317 - loss: 1.7464 - val_accuracy: 0.1861 - val_loss: 1.7642
Epoch 5/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[



[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 400ms/step - accuracy: 0.1950 - loss: 1.7696 - val_accuracy: 0.2375 - val_loss: 1.7313
Epoch 2/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 389ms/step - accuracy: 0.2583 - loss: 1.6954 - val_accuracy: 0.2531 - val_loss: 1.6995
Epoch 3/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 390ms/step - accuracy: 0.3405 - loss: 1.5399 - val_accuracy: 0.2243 - val_loss: 1.7427
Epoch 4/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 401ms/step - accuracy: 0.4867 - loss: 1.3041 - val_accuracy: 0.2313 - val_loss: 1.9053
Epoch 5/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 394ms/step - accuracy: 0.6220 - loss: 1.0236 - val_accuracy: 0.2274 - val_loss: 2.1951
Epoch 6/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 399ms/step - accuracy: 0.7223 - loss: 0.7932 - val_accuracy: 0.2235 - val_loss: 2.4884
Epoch 7/10
[1m161/16

INFO:pykeen.pipeline.api:Using device: cuda
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()



===== Processing KGE Model: RotatE (PyKEEN) =====
Training RotatE with k=100, epochs=30...


Training epochs on cuda:0:   0%|          | 0/30 [00:00<?, ?epoch/s]

Evaluating on cuda:0:   0%|          | 0.00/77.7k [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 36.45s seconds


Saved RotatE model to saved_pykeen_models/RotatE_pykeen_model.pt
RotatE (PyKEEN) training complete. Time: 54.50s.
  RotatE complex embeddings: concatenating real/imag parts.
Shape of KGE embeddings for NN training: (10269, 200)

--- Training Hybrid LSTM model with RotatE embeddings ---
Fitting Hybrid LSTM + RotatE model...
Epoch 1/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 244ms/step - accuracy: 0.1925 - loss: 1.7759 - val_accuracy: 0.2157 - val_loss: 1.7618
Epoch 2/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 238ms/step - accuracy: 0.2153 - loss: 1.7562 - val_accuracy: 0.2048 - val_loss: 1.7649
Epoch 3/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 244ms/step - accuracy: 0.2281 - loss: 1.7469 - val_accuracy: 0.2064 - val_loss: 1.7679
Epoch 4/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 242ms/step - accuracy: 0.2516 - loss: 1.7307 - val_accuracy: 0.2025 - val_loss: 1.7681
Epoch 5/10
[1m



[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 403ms/step - accuracy: 0.2000 - loss: 1.7716 - val_accuracy: 0.2508 - val_loss: 1.7392
Epoch 2/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 397ms/step - accuracy: 0.2734 - loss: 1.6891 - val_accuracy: 0.2383 - val_loss: 1.7198
Epoch 3/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 401ms/step - accuracy: 0.3541 - loss: 1.5419 - val_accuracy: 0.2274 - val_loss: 1.7639
Epoch 4/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 398ms/step - accuracy: 0.4893 - loss: 1.2871 - val_accuracy: 0.2407 - val_loss: 1.8867
Epoch 5/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 400ms/step - accuracy: 0.6214 - loss: 1.0282 - val_accuracy: 0.2266 - val_loss: 2.0952
Epoch 6/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 399ms/step - accuracy: 0.7159 - loss: 0.8105 - val_accuracy: 0.2336 - val_loss: 2.3941
Epoch 7/10
[1m161/16

INFO:pykeen.pipeline.api:Using device: cuda
INFO:pykeen.nn.representation:Inferred unique=False for Embedding()
INFO:pykeen.nn.representation:Inferred unique=False for Embedding(
  (regularizer): LpRegularizer()
)



===== Processing KGE Model: DistMult (PyKEEN) =====
Training DistMult with k=100, epochs=30...


Training epochs on cuda:0:   0%|          | 0/30 [00:00<?, ?epoch/s]

Evaluating on cuda:0:   0%|          | 0.00/77.7k [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 14.79s seconds


Saved DistMult model to saved_pykeen_models/DistMult_pykeen_model.pt
DistMult (PyKEEN) training complete. Time: 33.47s.
Shape of KGE embeddings for NN training: (10269, 100)

--- Training Hybrid LSTM model with DistMult embeddings ---
Fitting Hybrid LSTM + DistMult model...
Epoch 1/10




[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 260ms/step - accuracy: 0.1820 - loss: 1.7766 - val_accuracy: 0.1986 - val_loss: 1.7656
Epoch 2/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 255ms/step - accuracy: 0.2081 - loss: 1.7653 - val_accuracy: 0.1900 - val_loss: 1.7663
Epoch 3/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 257ms/step - accuracy: 0.2180 - loss: 1.7551 - val_accuracy: 0.1877 - val_loss: 1.7678
Epoch 4/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 257ms/step - accuracy: 0.2333 - loss: 1.7405 - val_accuracy: 0.1861 - val_loss: 1.7690
Epoch 5/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 260ms/step - accuracy: 0.2431 - loss: 1.7396 - val_accuracy: 0.1986 - val_loss: 1.7728
Epoch 6/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 260ms/step - accuracy: 0.2405 - loss: 1.7320 - val_accuracy: 0.1861 - val_loss: 1.7771
Epoch 7/10
[1m161/16



[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 414ms/step - accuracy: 0.1932 - loss: 1.7719 - val_accuracy: 0.2383 - val_loss: 1.7383
Epoch 2/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 410ms/step - accuracy: 0.2670 - loss: 1.6971 - val_accuracy: 0.2547 - val_loss: 1.7061
Epoch 3/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 406ms/step - accuracy: 0.3496 - loss: 1.5363 - val_accuracy: 0.2344 - val_loss: 1.7927
Epoch 4/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 407ms/step - accuracy: 0.5068 - loss: 1.2747 - val_accuracy: 0.2422 - val_loss: 1.9914
Epoch 5/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 413ms/step - accuracy: 0.6506 - loss: 0.9711 - val_accuracy: 0.2344 - val_loss: 2.1758
Epoch 6/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 403ms/step - accuracy: 0.7411 - loss: 0.7496 - val_accuracy: 0.2165 - val_loss: 2.5126
Epoch 7/10
[1m161/16

INFO:pykeen.pipeline.api:Using device: cuda
INFO:pykeen.nn.representation:Inferred unique=False for Embedding(
  (regularizer): LpRegularizer()
)
INFO:pykeen.nn.representation:Inferred unique=False for Embedding(
  (regularizer): LpRegularizer()
)



===== Processing KGE Model: ComplEx (PyKEEN) =====
Training ComplEx with k=100, epochs=30...


Training epochs on cuda:0:   0%|          | 0/30 [00:00<?, ?epoch/s]

Evaluating on cuda:0:   0%|          | 0.00/77.7k [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 20.13s seconds


Saved ComplEx model to saved_pykeen_models/ComplEx_pykeen_model.pt
ComplEx (PyKEEN) training complete. Time: 43.08s.
  ComplEx complex embeddings: concatenating real/imag parts.
Shape of KGE embeddings for NN training: (10269, 200)

--- Training Hybrid LSTM model with ComplEx embeddings ---
Fitting Hybrid LSTM + ComplEx model...
Epoch 1/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 259ms/step - accuracy: 0.1850 - loss: 2.0400 - val_accuracy: 0.1877 - val_loss: 1.7665
Epoch 2/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 247ms/step - accuracy: 0.2026 - loss: 1.7634 - val_accuracy: 0.1885 - val_loss: 1.7676
Epoch 3/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 251ms/step - accuracy: 0.2206 - loss: 1.7497 - val_accuracy: 0.1900 - val_loss: 1.7642
Epoch 4/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 253ms/step - accuracy: 0.2237 - loss: 1.7409 - val_accuracy: 0.2002 - val_loss: 1.7659
Epoch 5/1



[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 410ms/step - accuracy: 0.1803 - loss: 2.0277 - val_accuracy: 0.1893 - val_loss: 1.7718
Epoch 2/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 400ms/step - accuracy: 0.2033 - loss: 1.7624 - val_accuracy: 0.1908 - val_loss: 1.7740
Epoch 3/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 404ms/step - accuracy: 0.2150 - loss: 1.7552 - val_accuracy: 0.1970 - val_loss: 1.7758
Epoch 4/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 404ms/step - accuracy: 0.2256 - loss: 1.7434 - val_accuracy: 0.2009 - val_loss: 1.7829
Epoch 5/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 405ms/step - accuracy: 0.2509 - loss: 1.7131 - val_accuracy: 0.1838 - val_loss: 1.7817
Epoch 6/10
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 403ms/step - accuracy: 0.2711 - loss: 1.6930 - val_accuracy: 0.2220 - val_loss: 1.7658
Epoch 7/10
[1m161/16