This notebook converts a tab-separated file into JSON that can be read by `dataclasses_json` as Relation objects. The intended use is on the BATS (Bigger Analogy Test Set), which comprises forty analogy sets in four different categories. They involve morphological derivatives, morphological inflections, lexical semantics, and encyclopedic knowledge: each can be interpreted as a relation. An attempt was made to provide three different prompt templates where possible.

In [4]:
import sys
sys.path.append('..')
from lre.data import Relation, RelationSample
from lre.operators import JacobianIclEstimator, Word2VecIclEstimator
import lre.functional as functional

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def relation_from_path(path, relation_name, prompts):

    RelationSamples = []

    with open(path, "r") as f:
        lines = f.readlines()
        lines = [line.replace('\n','') for line in lines]

        for line in lines:
            a, bs = line.split('\t')
            bs = bs.split('/')
            RelationSamples.append(RelationSample(a, bs))

    rel = Relation(
                    name=relation_name,
                    prompt_templates=prompts,
                    prompt_templates_zs=prompts,
                    samples=
                    RelationSamples
                )
    return rel

In [6]:
#We want to evaluate each subject with any of the available objects.
import json

relation_name = 'lexsem/L10 [antonyms - binary]'
relation_path = 'text/' + relation_name + '.txt'
relation_path_json = 'json/' + relation_name + '.json'
relation_tag = relation_name.split("[")[-1].split("]")[0]

prompts = [
    "The opposite of {} is",
    "The inverse of {} is",
    "Something that is {} is not"
    ]

relation = relation_from_path(relation_path, relation_tag, prompts)

subjects = []
subject_object_pairs = []
all_pairs = []

relation_json = relation.to_dict()

In [7]:
with open(relation_path_json, "w") as file:
    json.dump(relation_json, file, indent=4)