In [1]:
# used to dynamically explore where to make concept splits
%load_ext autoreload
%autoreload 2

In [2]:
import irt2

from irt2.graph import Graph
from irt2.graph import load_graph
from irt2.types import RID

from ktz.collections import ryaml

# from pprint import pprint

config = ryaml(
    irt2.ENV.DIR.CONF / 'create' / 'cde.yaml',
    irt2.ENV.DIR.CONF / 'create' / 'cde-l.yaml',
)

graph = load_graph(
    config['graph loader'],
    config['graph name'],
    *[irt2.ENV.DIR.ROOT / path for path in config['graph loader args']],
    **{k: irt2.ENV.DIR.ROOT / path for k, path in config['graph loader kwargs'].items()},
)

# pprint(config)
# print()
# print(graph.description)

In [3]:
from irt2.graph import Relation

relations = Relation.from_graph(graph)
print(f'got {len(relations)} relations')

CodEx-M


got 51 relations


In [4]:
from tabulate import tabulate


def relation_table(relations):
    headers = '#', 'name', 'rid', 'ratio', '#heads', '#tails', '#triples'
    rows = []
    for no, rel in enumerate(relations, 1):
        rows.append((no, rel.name, rel.rid, rel.ratio, len(rel.heads), len(rel.tails), len(rel.triples)))

    return tabulate(rows, headers=headers)


print(relation_table(relations))

  #  name                                                       rid       ratio    #heads    #tails    #triples
---  -------------------------------------------------------  -----  ----------  --------  --------  ----------
  1  P1412:languages spoken, written, or signed                  11  0.00631622      9816        62       12584
  2  P1303:instrument                                            14  0.0104914       3622        38        6076
  3  P140:religion                                               25  0.015873        2520        40        2651
  4  P27:country of citizenship                                   3  0.0168763      13036       220       16828
  5  P30:continent                                               19  0.01983          353         7         391
  6  P509:cause of death                                          8  0.0201889       3071        62        3210
  7  P172:ethnic group                                            5  0.0248385       2013        50     

In [5]:
from itertools import islice

def print_relation_examples(graph, relations, n: int = 5):
    for rel in relations:
        print(f'{rel.ratio:2.3f} {rel.name} (rid={rel.rid})')
        for h, t, r in islice(graph.find(edges={rel.rid}), n):
            # print(h.split(':')[1])
            head = graph.source.ents[h].split(':')[1]
            tail = graph.source.ents[t].split(':')[1]

            print(' ' * 4 + f'{head} → {tail}')

    
print_relation_examples(graph, relations)

0.006 P1412:languages spoken, written, or signed (rid=11)
    Pedro Antonio de Alarcón → Spanish
    Tamara Bunke → Russian
    George Fenton → English
    Giorgio Gaber → Italian
    Louis Garrel → French
0.010 P1303:instrument (rid=14)
    Alexander Ludwig → voice
    Ferruccio Busoni → piano
    Karel Vacek → clarinet
    Siegfried Palm → cello
    Tito Gobbi → voice
0.016 P140:religion (rid=25)
    Errico Malatesta → atheism
    Oriana Fallaci → atheism
    Tarja Halonen → Lutheranism
    John Keats → Anglicanism
    Paul Hindemith → Lutheranism
0.017 P27:country of citizenship (rid=3)
    Cardinal Richelieu → France
    Wolfgang Golther → Germany
    Robert Siewert → Germany
    Hanns-Josef Ortheil → Germany
    Heinrich von Wild → Switzerland
0.020 P30:continent (rid=19)
    Honduras → North America
    North Macedonia → Europe
    Bhutan → Asia
    Wales → Europe
    South Sudan → Africa
0.020 P509:cause of death (rid=8)
    John Balance → fall
    Ronnie Van Zant → traffic coll

In [12]:
def print_relation(graph, relations, rid: RID, maxn: int = None):
    rel = [rel for rel in relations if rel.rid == rid][0]
    print(str(rel))

    print(f"showing {str(maxn) if maxn is not None else 'all'} triples:")
    triples = rel.triples if maxn is None else list(rel.triples)[:maxn]

    print(graph.tabulate_triples(triples))


# these relations are exluded from entry in irt2
for rid in {43, 46, 47, 48, 49, 50}:
    print('\n', '-' * 80, '\n')
    print_relation(graph=graph, relations=relations, rid=rid)


 -------------------------------------------------------------------------------- 

P50:author (43): ratio=0.23529 (heads=4, tails=17) 17 triples
showing all triples:
       head                             tail                                      relation
-----  ------------------------  -----  ------------------------------------  --  ----------
 5974  Q26162388:Duch Německa     9718  Q22670:Friedrich Schiller             43  P50:author
 5974  Q26162388:Duch Německa    12531  Q62432:August von Platen-Hallermünde  43  P50:author
 5974  Q26162388:Duch Německa     1613  Q61456:Felix Dahn                     43  P50:author
 5974  Q26162388:Duch Německa    11571  Q77492:Friedrich Wilhelm Weber        43  P50:author
 5974  Q26162388:Duch Německa     4977  Q213675:Gottfried August Bürger       43  P50:author
 5974  Q26162388:Duch Německa     5448  Q44403:Heinrich Heine                 43  P50:author
 1760  Q190050:Fight Club         7611  Q268181:Chuck Palahniuk               43  P50:auth