This loads the code repo in https://github.com/evandez/relations & described in https://lre.baulab.info/ but adapts it to current use case

## Setup

In [None]:
from google.colab import userdata
import os
import sys
import subprocess
# Create the personal access token here: https://github.com/settings/tokens?type=beta
# Add it to the colab's secrets with the name GITHU_PAC
# Restart the runtime after you add the secret.
personal_access_token = userdata.get('GITHUB_PAC')

owner = "mbmccoy"  # The owner of the repo
repo_name = "relations"
branch_name = "mbm-investigation"

original_pwd = "/content"
repo_path = f'{original_pwd}/{repo_name}'
repo_url = f'github.com/{owner}/{repo_name}.git'

if not os.path.exists(repo_path):
    # Clone the repo if it does not exist
    !git clone -b {branch_name} https://{personal_access_token}@{repo_url} {repo_path}
else:
    # Change directory and pull latest changes if repo already exists
    os.chdir(f"{repo_path}")
    !git pull
    os.chdir(f"{original_pwd}")  # Change back to the original directory if necessary

sys.path.append('..')
sys.path.append(f"{repo_path}")

# Get the current git commit for metadata at directory repo_path

# Get the current git commit for metadata at directory repo_path
os.chdir(f"{repo_path}")
commit_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip().decode('utf-8')
os.chdir(f"{original_pwd}")
print(f"git commit: {commit_hash}")


In [None]:
#installs
!pip install git+https://github.com/davidbau/baukit dataclasses-json protobuf types-requests h5py
#!pip install protobuf #==3.20.0
#!pip install types-requests
#!pip install black
#!pip install isort
#!pip install mypy

In [None]:
#imports
import os
from google.colab import drive, files
import accelerate
import dataclasses_json
import huggingface_hub
import matplotlib
import numpy
import requests
import seaborn
import sentencepiece
import tqdm
import transformers
import torch
import plotly
#import black
#import isort
#import mypy
#import pytest
import sys
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.getLogger("src.operators").setLevel(logging.DEBUG)


In [None]:
#mount drive
drive.mount('/content/drive3', force_remount=True)
%load_ext autoreload
%autoreload 2

In [None]:
#paths
#sys.path.append('..')
#sys.path.append('/content/drive/Shareddrives/MoralLearning/RelationalComposition/relations/')


In [None]:
#import local packages
from src import models, data, lens, functional
from src.utils import experiment_utils
from baukit import Menu, show

In [None]:
#set up paths for repos & install via git (only needed once -- don't repeat since we are editing the repo code, this is for reference only)
# !cd '/content/drive/Shareddrives/MoralLearning/RelationalComposition/'
# !git clone https://github.com/evandez/relations/


In [None]:
# this shortcut was recommened but doesn't easily work with colab -- instead we install each dependency individually.
# !pip install invoke
# import invoke
# os.chdir('/content/drive/Shareddrives/MoralLearning/RelationalComposition/relations/')
# !invoke install

In [None]:
# below packages install but do not import in colab - maybe they are not needed so let's hope.
# import protobuf
# import types_requests

In [None]:
# we can store tokens this way but for the repo, they assume we use Colab secrets so we can lean on that for now
# !pip install dotenv
# import dotenv
# dotenv.load_dotenv('/content/drive/Shareddrives/MoralLearning/RelationalComposition/vars.env')

Do basic things, copied from https://github.com/evandez/relations/blob/main/demo/demo.ipynb

In [None]:
#load a model
device = "cuda:0"
#device = "cpu"
# nb technically we can use llama but the repo is not quite functional, needs fixes. the main results are reported on gptj so we can continue with that
# it takes some time to load this model, it may be best to use a GPU
mt = models.load_model("gptj", device=device, fp16=True)
print(f"dtype: {mt.model.dtype}, device: {mt.model.device}, memory: {mt.model.get_memory_footprint()}")

In [None]:
#the set of relations tested in Hernandez et al 2024
dataset_orig = data.load_dataset()

In [None]:
from src.data import load_relation_dict, load_dataset, RelationDataset
from tempfile import NamedTemporaryFile
import json

json_file = "/content/drive3/Shareddrives/MoralLearning/RelationalComposition/has_color.json"
with open(json_file) as f:
    relation_json = json.load(f)

#relation_json = json.loads("""
#{"name": "has_color", "prompt_templates": ["On the outside, the color of {} is"], "properties": {"relation_type": "commonsense_new", "domain_name": "objects", "range_name": "color", "symmetric": "false"}, "samples": [{"subject": "barn", "object": "red"}, {"subject": "barrel ", "object": "brown"}, {"subject": "bathrub", "object": "white"}, {"subject": "brick", "object": "red"}, {"subject": "emerald", "object": "green"}, {"subject": "spoon", "object": "silver"}, {"subject": "stone", "object": "grey"}, {"subject": "toilet", "object": "white"}, {"subject": "trumpet", "object": "gold"}, {"subject": "tuba", "object": "gold"}, {"subject": "wagon", "object": "red"}, {"subject": "cauliflower", "object": "white"}, {"subject": "carrot", "object": "orange"}, {"subject": "cauliflower", "object": "white"}, {"subject": "carrot", "object": "orange"}, {"subject": "celery", "object": "green"}, {"subject": "corn", "object": "yellow"}, {"subject": "cucumber", "object": "green"}, {"subject": "lettuce", "object": "green"}, {"subject": "pumpkin", "object": "orange"}, {"subject": "spinach", "object": "green"}, {"subject": "zucchini", "object": "green"}, {"subject": "cherry", "object": "red"}, {"subject": "cranberry", "object": "red"}, {"subject": "eggplant", "object": "purple"}, {"subject": "lemon", "object": "yellow"}, {"subject": "lime", "object": "green"}, {"subject": "mandarin", "object": "orange"}, {"subject": "pineapple", "object": "yellow"}, {"subject": "plum", "object": "purple"}, {"subject": "raspberry", "object": "red"}, {"subject": "strawberry", "object": "red"}, {"subject": "tomato", "object": "red"}, {"subject": "turtle", "object": "green"}, {"subject": "crocodile", "object": "green"}, {"subject": "frog", "object": "green"}, {"subject": "dove", "object": "white"}, {"subject": "canary", "object": "yellow"}, {"subject": "crow", "object": "black "}, {"subject": "flamingo", "object": "pink"}, {"subject": "beaver", "object": "brown"}, {"subject": "goat", "object": "white"}, {"subject": "fox", "object": "red"}, {"subject": "panther", "object": "black "}]}
#""")

if "prompt_templates_zs" not in relation_json:
  relation_json["prompt_templates_zs"] =relation_json["prompt_templates"]

with NamedTemporaryFile(suffix=".json", mode="w") as f:
    json.dump(relation_json, f)
    f.flush()
    dataset_new = load_dataset(f.name)

dataset = RelationDataset(dataset_new.relations+dataset_orig.relations)

In [None]:
import src.data as data
import src.functional as functional

from src.operators import JacobianIclMeanEstimator
from dataclasses import dataclass
from typing import List
import numpy as np
from typing import TypedDict

class SubjectFaithfulnessMetadata(TypedDict):
    prediction_subject: str
    """Subject of the prediction."""

    faithfulness: List[float]
    """Faithfulness of the prediction."""


class FaithfulnessMetadata(TypedDict):
    faithfulness: List[float]
    """Overall faithfulness."""

    subject: str
    """Subject of the faithfulness."""

    subject_faithfulness: List[SubjectFaithfulnessMetadata]
    """Faithfulness of each subject."""



def compute_faithfulness(
    operator: JacobianIclMeanEstimator,
    relation: data.Relation,
    examples: data.Relation,
    subject: str,
    num_preds: int = 5,
) -> FaithfulnessMetadata:
    correct = [0]*num_preds
    wrong = [0]*num_preds
    subject_faithfulness = []
    for sample in relation.samples:
        if sample.subject == subject:
            logger.info(f"Skipping subject {sample}")
            continue
        if sample.subject in [example.subject for example in examples.samples]:
            logger.info(f"Skipping example {sample}")
            continue

        logger.debug(f"Computing faithfulness for subject: {sample.subject}")
        logger.debug(f"prompt_template: {operator.prompt_template}")
        predictions = operator(subject = sample.subject, k=num_preds).predictions
        subject_correct = [0]*num_preds
        subject_wrong = [0]*num_preds
        for i, prediction in enumerate(predictions):
            if i >= num_preds:
                break
            known_flag = functional.is_nontrivial_prefix(
                prediction=prediction.token, target=sample.object
            )
            logger.debug(f"{sample.subject=}, {sample.object=}, ")
            logger.debug(f'predicted="{functional.format_whitespace(prediction.token)}", (p={prediction.prob}), known=({functional.get_tick_marker(known_flag)})')

            correct[i] += known_flag
            wrong[i] += not known_flag
            subject_correct[i] = known_flag
            subject_wrong[i] = not known_flag

        subject_faithfulness.append(
            SubjectFaithfulnessMetadata(
                prediction_subject=sample.subject,
                faithfulness=(np.cumsum(subject_correct)/num_preds).tolist(),
            )
        )


    faithfulness = np.cumsum([c / (c + w) for c, w in zip(correct,wrong)])

    logger.debug("------------------------------------------------------------")
    logger.debug(f"Faithfulness = {faithfulness}")
    logger.debug("------------------------------------------------------------")
    return {
        "faithfulness": faithfulness.tolist(),
        "subject": subject,
        "subject_faithfulness": subject_faithfulness,
    }

In [None]:
################### hparams ###################
layer = 5
beta = 2.5
###############################################
from src.operators import JacobianIclMeanEstimator
from dataclasses import dataclass
#estimator = JacobianIclMeanEstimator(
#    mt = mt,
#    h_layer = layer,
#    beta = beta
#)

DEFAULT_METADATA_FILE_PATH = "/content/drive3/Shareddrives/MoralLearning/RelationalComposition/data/with-examples"

@dataclass(kw_only=True)
class MetadataFile:
    file_name: str
    file_path: str = DEFAULT_METADATA_FILE_PATH
    relation_to_metadata: dict = None

    @property
    def full_file_path(self):
        return os.path.join(self.file_path, self.file_name)

    @classmethod
    def get_metadata_from_file(
        cls,
        file_name:str,
        file_path: str = DEFAULT_METADATA_FILE_PATH
    ) -> "MetadataFile":
        metadata_file = os.path.join(file_path, file_name)

        if not os.path.exists(metadata_file):
            # Ensure the directory exists
            os.makedirs(os.path.dirname(metadata_file), exist_ok=True)
            print(f"File {metadata_file} does not exist; creating.")
            torch.save({}, metadata_file)
            relation_to_metadata = torch.load(metadata_file)

        relation_to_metadata = torch.load(metadata_file)
        return cls(
            file_name=file_name,
            file_path=file_path,
            relation_to_metadata=relation_to_metadata
        )

    def save_metadata_to_file(self):
        torch.save(self.relation_to_metadata, self.full_file_path)

    def __getitem__(self, key):
        return self.relation_to_metadata[key]

    def __setitem__(self, key, value):
        self.relation_to_metadata[key] = value

    def __contains__(self, key):
        return key in self.relation_to_metadata


estimator = JacobianIclMeanEstimator(
    mt = mt,
    h_layer = layer,
    beta = beta,
    rank = None,
)
relations_set = [
    "has_color",
    "fruit inside color",
    "object superclass",
    "fruit outside color",
    "word sentiment",
    "task done by tool",
    "substance phase of matter",
    "work location",
    "task person type",
]
for relation in dataset:
    experiment_utils.set_seed(12345) # set seed to a constant value for sampling consistency
    examples, test = relation.split(3)
    key = relation.name
    if key not in relations_set:
        logger.debug(f"skipping relation {key}: not in relations_set")
        continue
    file_name = f"{key}_metadata.pt"
    relation_to_metadata = MetadataFile.get_metadata_from_file(file_name)
    if key in relation_to_metadata:
        logger.debug(f"Warning relation {key}: already in metadata")
        #continue
    logger.debug("==========================")
    logger.debug(f"running on relation {key}")
    logger.debug(f"Examples: {examples}")
    metadata = []

    for i, sample in enumerate(test.samples):
        logger.info(f"running on sample {i+1} of {len(relation.samples)}: {sample}")
        operator = estimator(
            relation.set(
                samples=[sample],
            ),
            examples=examples,
        )
        logger.debug(f"{sample}, {operator.weight.shape}")
        operator_metadata = operator.metadata.copy()
        faithfulness = compute_faithfulness(
            operator=operator,
            relation=relation,
            subject=sample.subject,
            examples = examples,
            )
        logger.debug(f"faithfulness = {faithfulness}")
        operator_metadata["faithfulness"]   = faithfulness
        metadata.append(operator_metadata)
        relation_to_metadata[key] = list(metadata)
        relation_to_metadata["commit_hash"] = commit_hash

        logger.info(f"saving metadata for relation {key}")
        relation_to_metadata.save_metadata_to_file()



In [None]:
import gc
if False:
  operator = None
  estimator = None
  mt = None
  gc.collect()
  torch.cuda.empty_cache()

In [None]:
test = functional.filter_relation_samples_based_on_provided_fewshots(
    mt=mt, test_relation=test, prompt_template=operator.prompt_template, batch_size=4
)
sample = test.samples[0]
print(sample)
operator(subject = sample.subject).predictions

hs_and_zs = functional.compute_hs_and_zs(
    mt = mt,
    prompt_template = operator.prompt_template,
    subjects = [sample.subject],
    h_layer= operator.h_layer,
)
h = hs_and_zs.h_by_subj[sample.subject]

In [None]:
z = operator.beta * (operator.weight @ h) + operator.bias

lens.logit_lens(
    mt = mt,
    h = z,
    get_proba = True
)