In [1]:
%cd implicature

/gws/nopw/j04/ai4er/users/maiush/implicature


In [2]:
import numpy as np
import pandas as pd
import torch as t
from sklearn.linear_model import LogisticRegression

from constants import results_path

from typing import Tuple

In [3]:
models = ["gemma-2-2b", "llama-3.1-8b", "mistral-nemo-12b", "qwen-2-72b"]

In [4]:
for model_root in models:
    print(f"\n{model_root}")
    model_instruct = f"{model_root}-base"
    model_base = f"{model_root}-instruct"

    # load labels
    train_labels = pd.read_json("train_data.jsonl", orient="records", lines=True)["implicature"] == "yes"
    train_labels = (train_labels * 1).to_numpy()
    test_labels = pd.read_json("test_data.jsonl", orient="records", lines=True)["implicature"] == "yes"
    test_labels = (test_labels * 1).to_numpy()
    # double them up - since we train on data from two models
    train_len, test_len = len(train_labels), len(test_labels)
    train_labels = np.concatenate([train_labels, train_labels])
    test_labels = np.concatenate([test_labels, test_labels])

    # instantiate classifier
    lr = LogisticRegression(max_iter=1000)

    # load data
    x_train_yes = t.concat([
        t.load(f"{results_path}/{model_name}/harvest_yes_train.pt", weights_only=True)
        for model_name in [model_instruct, model_base]
    ], dim=0)
    x_train_no = t.concat([
        t.load(f"{results_path}/{model_name}/harvest_no_train.pt", weights_only=True)
        for model_name in [model_instruct, model_base]
    ], dim=0)
    x_test_yes = t.concat([
        t.load(f"{results_path}/{model_name}/harvest_yes_test.pt", weights_only=True)
        for model_name in [model_instruct, model_base]
    ], dim=0)
    x_test_no = t.concat([
        t.load(f"{results_path}/{model_name}/harvest_no_test.pt", weights_only=True)
        for model_name in [model_instruct, model_base]
    ], dim=0)

    # centering data
    # yes_mean = x_train_yes.mean(dim=0, keepdim=True)
    # no_mean = x_train_no.mean(dim=0, keepdim=True)
    # x_train_yes = x_train_yes - yes_mean
    # x_train_no = x_train_no - no_mean
    # x_test_yes = x_test_yes - yes_mean
    # x_test_no = x_test_no - no_mean

    # contrast pair differences
    x_train = (x_train_yes - x_train_no).float().numpy()
    x_test = (x_test_yes - x_test_no).float().numpy()

    # fit model
    lr.fit(x_train, train_labels)

    scores = (lr.score(x_train, train_labels), lr.score(x_test, test_labels))
    print(f"train accuracy: {scores[0]:.3f}")
    print(f"test accuracy: {scores[1]:.3f}")
    inst_acc = lr.score(x_test[:test_len], test_labels[:test_len])
    base_acc = lr.score(x_test[test_len:], test_labels[test_len:])
    print(f"test accuracy (instruct): {inst_acc:.3f}")
    print(f"test accuracy (base): {base_acc:.3f}")


gemma-2-2b
train accuracy: 1.000
test accuracy: 0.743
test accuracy (instruct): 0.712
test accuracy (base): 0.773

llama-3.1-8b
train accuracy: 1.000
test accuracy: 0.789
test accuracy (instruct): 0.780
test accuracy (base): 0.798

mistral-nemo-12b
train accuracy: 1.000
test accuracy: 0.816
test accuracy (instruct): 0.805
test accuracy (base): 0.827

qwen-2-72b
train accuracy: 1.000
test accuracy: 0.838
test accuracy (instruct): 0.840
test accuracy (base): 0.837
