In [4]:
import json
import torch

import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.manifold import TSNE
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

MODELS = [
    "Alibaba-NLP/gte-large-en-v1.5",
    "intfloat/multilingual-e5-large",
    "sentence-transformers/all-mpnet-base-v2",
    "sentence-transformers/all-MiniLM-L6-v2"
]

DATA_PATH = '../data'
RELATIONS_JSON_PATH = '../data/relations.json'

In [3]:
file_path = "../data/Alibaba-NLP_gte-large-en-v1.5.pt"

# Load the data
try:
    data = torch.load(file_path, map_location="cpu")  # Load on CPU to avoid GPU issues
    print("Data loaded successfully!")

    # Print the type of the loaded object
    print("Loaded data type:", type(data))
    print("List length:", len(data))
    print("First item:", data[0])

except Exception as e:
    print(f"Error loading file: {e}")

Data loaded successfully!
Loaded data type: <class 'list'>
List length: 142100
First item: {'sentence': 'The table is above the chair.', 'relation': 'above', 'subject': 'table', 'object': 'chair', 'embedding': array([ 0.32811892,  0.24800315,  0.28813255, ..., -0.2839055 ,
       -0.95515853, -0.04943762], shape=(1024,), dtype=float32)}


In [5]:
with open(RELATIONS_JSON_PATH, 'r') as f:
    relations = json.load(f)['spatial_relations']

In [6]:
def get_relations_lookup(relations):
    relations_lookup = {}
    for category, category_pairs in relations.items():
        for first, second in category_pairs:
            relations_lookup[first] = {'category': category, 'opposite': second, 'position': 0}
            relations_lookup[second] = {'category': category, 'opposite': first, 'position': 1}
    return relations_lookup

relations_lookup = get_relations_lookup(relations)

In [7]:
def load_embeddings_for_model(model_name):
    #label = "subject", "object", or "relation"
    embeddings = []
    sub_labels = []
    ob_labels = []
    rel_labels = []
    raw_data = torch.load(f'{DATA_PATH}/{model_name.replace("/", "_")}.pt', weights_only=False)
    for data_point in raw_data:
        embeddings.append(data_point['embedding'])
        sub_labels.append(data_point['subject'])
        ob_labels.append(data_point['object'])
        rel_labels.append(data_point['relation'])
    return np.array(embeddings), np.array(sub_labels).reshape(-1, 1), np.array(ob_labels).reshape(-1, 1), np.array(rel_labels).reshape(-1, 1)

In [8]:
X, y_sub, y_ob, y_rel = load_embeddings_for_model(MODELS[0])
print(X[0],y_sub[0],y_ob[0],y_rel[0])

[ 0.32811892  0.24800315  0.28813255 ... -0.2839055  -0.95515853
 -0.04943762] ['table'] ['chair'] ['above']


In [11]:
results = {}
models = {}
one_hot_encoder = OneHotEncoder()

for model_name in MODELS:
    print(f"Training probe for {model_name}...")
    X, y_sub, y_ob, y_rel = load_embeddings_for_model(model_name)
    y_sub_encoded = one_hot_encoder.fit_transform(y_sub).todense()
    y_ob_encoded = one_hot_encoder.fit_transform(y_ob).todense()
    y_rel_encoded = one_hot_encoder.fit_transform(y_rel).todense()
    X_train, X_test, y_sub_train, y_sub_test, y_ob_train, y_ob_test, y_rel_train, y_rel_test = \
        train_test_split(X, y_sub_encoded, y_ob_encoded, y_rel_encoded, test_size=0.2) 

    y_sub_train = np.asarray(y_sub_train)
    y_sub_test = np.asarray(y_sub_test)
    y_ob_train = np.asarray(y_ob_train)
    y_ob_test = np.asarray(y_ob_test)
    y_rel_train = np.asarray(y_rel_train)
    y_rel_test = np.asarray(y_rel_test)
    
    # Train probes
    clf_sub = MLPClassifier(activation='identity')
    clf_sub.fit(X_train, y_sub_train)

    clf_ob = MLPClassifier(activation='identity')
    clf_ob.fit(X_train, y_ob_train)

    clf_rel = MLPClassifier(activation='identity')
    clf_rel.fit(X_train, y_rel_train)
    models[model_name] = (clf_sub, clf_ob, clf_rel)

    y_sub_pred = clf_sub.predict(X_test)
    y_ob_pred = clf_ob.predict(X_test)
    y_rel_pred = clf_rel.predict(X_test)

    accuracy_sub = accuracy_score(y_sub_test, y_sub_pred)
    accuracy_ob = accuracy_score(y_ob_test, y_ob_pred)
    accuracy_rel = accuracy_score(y_rel_test, y_rel_pred)
    
    # Check correctness for each component separately
    sub_correct = np.argmax(y_sub_pred, axis=1) == np.argmax(y_sub_test, axis=1)
    ob_correct = np.argmax(y_ob_pred, axis=1) == np.argmax(y_ob_test, axis=1)
    rel_correct = np.argmax(y_rel_pred, axis=1) == np.argmax(y_rel_test, axis=1)

    # Compute joint correctness (all three correct for each example)
    joint_correct = sub_correct & ob_correct & rel_correct

    # Compute joint accuracy
    joint_accuracy = np.mean(joint_correct)


    results[model_name] = {"accuracy_sub": accuracy_sub, "accuracy_ob": accuracy_ob,
                           "accuracy_rel": accuracy_rel, "joint_accuracy": joint_accuracy}
    print(f"Accuracy for {model_name}: Joint {joint_accuracy:.2f}, Subject {accuracy_sub:.2f}, \
          Object {accuracy_ob:.2f}, Relation {accuracy_rel:.2f}")

Training probe for Alibaba-NLP/gte-large-en-v1.5...
Accuracy for Alibaba-NLP/gte-large-en-v1.5: Joint 0.99, Subject 1.00,           Object 0.99, Relation 0.99
Training probe for intfloat/multilingual-e5-large...
Accuracy for intfloat/multilingual-e5-large: Joint 1.00, Subject 1.00,           Object 1.00, Relation 1.00
Training probe for sentence-transformers/all-mpnet-base-v2...
Accuracy for sentence-transformers/all-mpnet-base-v2: Joint 1.00, Subject 1.00,           Object 1.00, Relation 1.00
Training probe for sentence-transformers/all-MiniLM-L6-v2...
Accuracy for sentence-transformers/all-MiniLM-L6-v2: Joint 1.00, Subject 1.00,           Object 1.00, Relation 1.00
