In [1]:
import wn
from typing import List, Dict

In [2]:
LEXICON_ID: str = "omw-en31"

In [42]:
REL_HYPERNYM: str = "hypernym"
REL_INSTANCE_HYPERNYM: str = "instance_hypernym"
REL_HOLONYM: str = "holonym"
REL_ANTONYM: str = "antonym"
REL_HYPERNYM_LEAP: str = "hypernym_leap_%s"

REL_SYNONYM: str = "synonym"
REL_COHYPONYM: str = "co_hyponym"


SYNSET_RELATIONS: List[str] = [
    # Covered by synset.hypernyms
    # REL_HYPERNYM,
    # REL_INSTANCE_HYPERNYM,
    
    # Covered computationally
    # Also hypernym_leap_1, hypernym_leap_2...

    # Covered by synset.meronyms
    # REL_HOLONYM,
    
    # Covered computationally
    # Also synonym
    # Also co_hyponym
]
    
# Synset-synset stats:
# [('derivation', 50397),
#  ('pertainym', 7920),
#  ('antonym', 7772),
#  ('is_exemplified_by', 390),
#  ('also', 324),
#  ('domain_region', 98),
#  ('participle', 73),
#  ('domain_topic', 12),
#  ('has_domain_topic', 11),
#  ('exemplifies', 8),
#  ('has_domain_region', 4),
#  ('similar', 2)]
    
SENSE_RELATIONS: List[str] = [
    REL_ANTONYM,
]

In [99]:
REL_HYPERNYM_LEAP % 1

'hypernym_leap_1'

In [70]:
from collections import namedtuple

root: wn.Synset = wn.synset(id="omw-en31-00001740-n", lexicon=LEXICON_ID)
sample: wn.Synset = wn.synset(id="omw-en31-05990115-n", lexicon=LEXICON_ID)
sample2: wn.Synset = wn.synset(id="omw-en31-07961030-n", lexicon=LEXICON_ID)

Relation = namedtuple(
    "Relation",
    [
        "synset_id_left",
        "synset_id_right",
        "sense_id_left",
        "sense_id_right",
        "pos_left",
        "pos_right",
        "rel",
        "lemma_left",
        "lemma_right",
        "path_len",  # Min length of the path between two synsets on hypernym/hyponym three
    ],
)

In [120]:
from itertools import combinations, product


def get_relation_record(
    sense_left: wn.Sense, sense_right: wn.Sense, rel_type: str
) -> Relation:
    synset_left: wn.Synset = sense_left.synset()
    synset_right: wn.Synset = sense_right.synset()

    return Relation(
        synset_id_left=synset_left.id,
        synset_id_right=synset_right.id,
        sense_id_left=sense_left.id,
        sense_id_right=sense_right.id,
        pos_left=synset_left.pos,
        pos_right=synset_right.pos,
        rel=rel_type,
        lemma_left=sense_left.word().lemma(),
        lemma_right=sense_right.word().lemma(),
        path_len= 0 # len(synset_left.shortest_path(synset_right)),
    )


def export_hypernyms(
    hypernym: wn.Synset, hyponym: wn.Synset, curr_depth: int, max_depth: int
) -> List[Relation]:
    if curr_depth == 0:
        rel: str = REL_HYPERNYM
    else:
        rel = REL_HYPERNYM_LEAP % curr_depth

    res: List[Relation] = []
    for a, b in product(hypernym.senses(), hyponym.senses()):
        res.append(get_relation_record(sense_left=a, sense_right=b, rel_type=rel))

    if curr_depth < max_depth - 1:
        for child_hyponym in hyponym.hyponyms():
            res += export_hypernyms(
                hypernym=hypernym,
                hyponym=child_hyponym,
                curr_depth=curr_depth + 1,
                max_depth=max_depth,
            )

    return res


def extract_relations(synset: wn.Synset, hypernym_depth: int = 2) -> List[Relation]:
    lemmas: List[str] = synset.lemmas()
    pos = synset.pos
    res: List[Relation] = []

    # Synonyms
    for a, b in combinations(synset.senses(), 2):
        res.append(
            # TODO: add reverse relation?
            get_relation_record(sense_left=a, sense_right=b, rel_type=REL_SYNONYM)
        )

    # hypernyms:
    for hyponym in synset.hyponyms():
        res += export_hypernyms(
            hypernym=synset, hyponym=hyponym, curr_depth=0, max_depth=hypernym_depth
        )

    # holonyms:
    for meronym in synset.meronyms():
        for a, b in product(synset.senses(), meronym.senses()):
            res.append(
                get_relation_record(sense_left=a, sense_right=b, rel_type=REL_HOLONYM)
            )

    # cohyponyms:
    for hyp1, hyp2 in combinations(synset.hyponyms(), 2):
        # TODO: check for reverse relations?
        for a, b in product(hyp1.senses(), hyp2.senses()):
            res.append(
                get_relation_record(sense_left=a, sense_right=b, rel_type=REL_COHYPONYM)
            )

    # Sense 2 Sense relations
    for sense in synset.senses():
        for rel, related_senses in sense.relations(*SENSE_RELATIONS).items():
            for related_sense in related_senses:
                res.append(
                    get_relation_record(
                        sense_left=sense, sense_right=related_sense, rel_type=rel
                    )
                )

    return res

In [121]:
extract_relations(root)

[Relation(synset_id_left='omw-en31-00001740-n', synset_id_right='omw-en31-00001930-n', sense_id_left='omw-en31-entity-00001740-n', sense_id_right='omw-en31-physical_entity-00001930-n', pos_left='n', pos_right='n', rel='hypernym', lemma_left='entity', lemma_right='physical entity', path_len=0),
 Relation(synset_id_left='omw-en31-00001740-n', synset_id_right='omw-en31-00002452-n', sense_id_left='omw-en31-entity-00001740-n', sense_id_right='omw-en31-thing-00002452-n', pos_left='n', pos_right='n', rel='hypernym_leap_1', lemma_left='entity', lemma_right='thing', path_len=0),
 Relation(synset_id_left='omw-en31-00001740-n', synset_id_right='omw-en31-00002684-n', sense_id_left='omw-en31-entity-00001740-n', sense_id_right='omw-en31-object-00002684-n', pos_left='n', pos_right='n', rel='hypernym_leap_1', lemma_left='entity', lemma_right='object', path_len=0),
 Relation(synset_id_left='omw-en31-00001740-n', synset_id_right='omw-en31-00002684-n', sense_id_left='omw-en31-entity-00001740-n', sense_id

In [122]:
all_relations_raw: List[Relation] = []
    
for synset in tqdm(wn.synsets(lexicon=LEXICON_ID)):
    all_relations_raw += extract_relations(synset, hypernym_depth=2)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 117791/117791 [30:42<00:00, 63.93it/s]


In [128]:
all_relations_raw[0]._asdict()

{'synset_id_left': 'omw-en31-00001740-n',
 'synset_id_right': 'omw-en31-00001930-n',
 'sense_id_left': 'omw-en31-entity-00001740-n',
 'sense_id_right': 'omw-en31-physical_entity-00001930-n',
 'pos_left': 'n',
 'pos_right': 'n',
 'rel': 'hypernym',
 'lemma_left': 'entity',
 'lemma_right': 'physical entity',
 'path_len': 0}

In [130]:
import smart_open
import csv

with open("all_relations_raw.depth2.no_distance.csv.bz2", "wt") as fp_out:
    w = csv.DictWriter(fp_out, fieldnames=Relation._fields)
    w.writeheader()
    for rel in tqdm(all_relations_raw):
        w.writerow(rel._asdict())

100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 8520327/8520327 [01:43<00:00, 82184.21it/s]
