In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
! unzip evidencegraph.zip

In [None]:
# ! git clone https://github.com/peldszus/evidencegraph  # we will use the updated version instead

In [None]:
%%writefile evidencegraph/run_preparations.sh
apt install libxml2-dev libxslt1-dev libpng-dev libfreetype6-dev graphviz -y
make install-requirements
make download-spacy-data-en
make download-corpora
make test
./env/bin/pip install ipykernel
./env/bin/python -m ipykernel install --name "Py38-evidencegraph"

In [None]:
! cd evidencegraph && sh run_preparations.sh

#### For this notebook, use python 3.8 environment from evidencegraph!

### Reproduce the experiments with EG method on the whole dataset 

In [3]:
# Collect the data
! evidencegraph/env/bin/python collect_corpus_for_eg.py

Saved evidencegraph/data/corpus/en_112
Saved evidencegraph/data/corpus/ru_112
Saved evidencegraph/data/corpus/en_full
Saved evidencegraph/data/corpus/ru_full
Saved evidencegraph/data/corpus/en2ru_full
Saved evidencegraph/data/corpus/ru2en_full


### Training

In [8]:
# ! evidencegraph/env/bin/python -m spacy download en_core_web_lg
# ! evidencegraph/env/bin/python -m spacy download ru_core_news_lg

! nohup python crossval_eg.py > /dev/null 2>&1 &

### Evaluation 
Given the predictions are collected as``data/eg_results/*.json``.

In [7]:
from evidencegraph.argtree import RELATION_SETS_BY_NAME
from evidencegraph.corpus import CORPORA
from evidencegraph.evaluation import evaluate_setting
import os

base_corpora_dir = os.path.join('evidencegraph', 'data', 'corpus')
CORPORA.update({
    'en_112': {'language': 'en',
               'path': os.path.join(base_corpora_dir, 'en_112')},
    'ru_112': {'language': 'ru',
               'path': os.path.join(base_corpora_dir, 'ru_112')},
    'en_full': {'language': 'en',
                'path': os.path.join(base_corpora_dir, 'en_full')},
    'ru_full': {'language': 'ru',
                'path': os.path.join(base_corpora_dir, 'ru_full')},
    'ru2en_full': {'language': 'en',
                   'path': os.path.join(base_corpora_dir, 'ru2en_full')},
    'en2ru_full': {'language': 'ru',
                   'path': os.path.join(base_corpora_dir, 'en2ru_full')},
})


settings = {
        # ("adu", "SIMPLE_RELATION_SET", "en"): [
        #     "en_full-features_all", "en_full"
        # ],
        # ("adu", "SIMPLE_RELATION_SET", "en"): [
        #     "en_full-features_-BC", "en_full"
        # ],
        # ("adu", "SIMPLE_RELATION_SET", "en"): [
        #     "en_full-features_-Cues", "en_full"
        # ],
        # ("adu", "SIMPLE_RELATION_SET", "en"): [
        #     "en_full-features_-BC_-Cues", "en_full"
        # ],
        # ("adu", "SIMPLE_RELATION_SET", "ru"): [
        #     "ru2en_full-features_-BC_-Cues", "ru2en_full"
        # ]
        # ("adu", "SIMPLE_RELATION_SET", "ru"): [
        #     "ru_full-features_-BC_-Cues", "ru_full"
        # ],
        ("adu", "SIMPLE_RELATION_SET", "ru"): [
            "en2ru_full-features_-BC_-Cues", "en2ru_full"
        ],
    }

for (segmentation, relationset, language), (condition, corpus_name) in settings.items():
    relationset = RELATION_SETS_BY_NAME.get(relationset)
    evaluate_setting(
        language,
        segmentation,
        relationset,
        [condition],
        corpus_id=corpus_name,
        predictions_path='data/eg_results/'
    )



EVALUATING SETTING ru, adu, ['cc', 'sup', 'same-arg', 'att']:

# Metric: Macro avg. F1
level	en2ru_full-features_-BC_-Cues
cc	87.27 (+- 6.76)
ro	73.81 (+- 6.42)
fu	73.78 (+- 6.25)
at	73.82 (+- 2.90)

# Metric: Unlabelled attachment score
level	en2ru_full-features_-BC_-Cues
uat	61.80 (+- 5.67)

# Metric: Labelled attachment score
level	en2ru_full-features_-BC_-Cues
lat	54.69 (+- 5.46)

# Classwise scores (P, R, F1) for level cc
condition	en2ru_full-features_-BC_-Cues
0	95.057 (± 2.5)	95.1 (± 2.5)	95.1 (± 2.5)
1	79.486 (± 10.3)	79.5 (± 10.3)	79.5 (± 10.3)

# Classwise scores (P, R, F1) for level ro
condition	en2ru_full-features_-BC_-Cues
0	88.374 (± 2.4)	89.1 (± 5.7)	88.7 (± 3.5)
1	62.040 (± 13.4)	57.7 (± 9.1)	58.9 (± 9.0)

# Classwise scores (P, R, F1) for level fu
condition	en2ru_full-features_-BC_-Cues
0	79.486 (± 10.3)	79.5 (± 10.3)	79.5 (± 10.3)
1	75.581 (± 4.8)	79.4 (± 4.9)	77.4 (± 4.2)
2	68.605 (± 8.2)	61.4 (± 6.8)	64.5 (± 5.4)

# Classwise scores (P, R, F1) for level at
conditi

In [2]:
# from evidencegraph.evaluation import load_predictions

# predictions = load_predictions('data/eg_results/en2ru_full-features_-BC_-Cues.json')