In [1]:
import os
import json
from typing import Dict, List, Tuple
import UDLib as udlib

In [7]:
CONLLU_PATH = '../conllu'
SNACS_PATH = '../snacs-output/ewt'

In [4]:
def get_annotations(path: str) -> List[Dict[str, List[str]]]:
    result = []
    with open(path, 'r', encoding='utf-8') as inp:
        for line in inp:
            result.append(json.loads(line))
    return result

In [12]:
def get_obliques_with_types(
        t: udlib.UDTree,
        annotation: Dict[str, List[str]]) -> List[Tuple[str]]:
    assert len(t.keys) == len(annotation['tokens'])
    tag_dict = dict(zip(t.keys, annotation['tags']))
    result = []
    # DFS on the tree. Get the real root
    for edge in t.graph['0']:
        root = edge.head
    stack = [root]
    while stack:
        current_node = stack.pop()
        if t.nodes[current_node].DEPREL == 'obl':
            # Find the first case child and retrieve its annotation
            for edge in t.graph[current_node]:
                if edge.directionality == 'down':
                    child = edge.head
                    if t.nodes[child].DEPREL == 'case':
                        result.append((t.nodes[child].FORM + ' ' + t.nodes[current_node].FORM,
                                       tag_dict[child]))
                        break
            else:
                result.append((t.nodes[current_node].FORM, 'caseless obl'))
        for edge in t.graph[current_node]:
            if edge.directionality == 'down':
                stack.append(edge.head)
    return result

In [8]:
dev_trees = udlib.conllu2trees(os.path.join(CONLLU_PATH, 'en_ewt-ud-dev.conllu'))
dev_annotations = get_annotations(os.path.join(SNACS_PATH, 'dev'))

In [13]:
get_obliques_with_types(dev_trees[0], dev_annotations[0])