# Main

Using the notebook will allow you to load the model once and use it as many times as you
want. Also makes the code more resilient to errors, such as a bad path.

In [None]:
from pathlib import Path

INFO_TAG = "persuasive_essays_paragraph"
BASE_DATA = Path("data")

SOURCE_LANGUAGE = "english"
TARGET_LANGUAGE = "spanish"

# Corpus Projection
CORPUS = BASE_DATA / "corpus" / INFO_TAG
PROCESSED_CORPUS = BASE_DATA / "parsed_to_conll" / INFO_TAG
SENTENCE_ALIGN = BASE_DATA / 'sentence_alignment' / INFO_TAG
BIDIRECTIONAL_ALIGN = BASE_DATA / 'bidirectional_alignment' / INFO_TAG
PROJECTION = BASE_DATA / 'projection' / INFO_TAG

# Link Prediction
TO_PROCESS = BASE_DATA / "to_process" / INFO_TAG
SEGMENTER = BASE_DATA / "segmenter_processed" / INFO_TAG
LINK_PREDICTION = BASE_DATA / 'link_prediction_processed' / INFO_TAG

# Export to Brat
BRAT = Path("brat", "data", INFO_TAG)


# Corpus Projection

In [None]:
from pipelines.corpus_pipelines import full_corpus_processing_pipeline

from aligner.aligner import AwesomeAlignAligner as Aligner
from corpus_parser.unified_parser import UnifiedParser as Parser
from projector.projector import CrossLingualAnnotationProjector as Projector
from sentence_aligner.sentence_aligner import SentenceAligner
from sentence_aligner.translator import GoogleDeepTranslator as Translator

full_corpus_processing_pipeline(
    corpus_dir=CORPUS,
    standard_corpus_dest_dir=PROCESSED_CORPUS,
    sentence_alignment_dest_dir=SENTENCE_ALIGN,
    bidirectional_alignment_dest_dir=BIDIRECTIONAL_ALIGN,
    projection_dest_dir=PROJECTION,
    corpus_parser=Parser(),
    sentence_aligner=SentenceAligner(Translator()),
    aligner=Aligner(),
    projector=Projector(),
    source_language=SOURCE_LANGUAGE,
    target_language=TARGET_LANGUAGE
)

# Link Prediction

In [None]:
from pipelines.segmenter_pipelines import perform_full_inference_pipeline, perform_segmentation_pipeline, perform_link_prediction_pipeline


In [None]:
from segmenter.tf_segmenter import TensorflowArgumentSegmenter as Segmenter

segmenter = Segmenter(INFO_TAG, TARGET_LANGUAGE)


In [None]:
from link_prediction.tf_link_predictor import TensorflowLinkPredictor as LinkPredictor

link_predictor = LinkPredictor(INFO_TAG, TARGET_LANGUAGE)

In [None]:

# perform_segmentation_pipeline(
#     segmenter=segmenter,
#     source_dir=TO_PROCESS,
#     destination_dir=SEGMENTER,
# )

# perform_link_prediction_pipeline(
#     link_predictor=link_predictor,
#     source_dir=SEGMENTER,
#     destination_dir=LINK_PREDICTION,
#     source_language=TARGET_LANGUAGE
# )

perform_full_inference_pipeline(
    segmenter=segmenter,
    link_predictor=link_predictor,
    source_dir=TO_PROCESS,
    segmenter_destination_dir=SEGMENTER,
    destination_dir=LINK_PREDICTION,
    source_language=TARGET_LANGUAGE
)

# Export to Brat

In [None]:

from corpus_parser.brat_parser import BratParser
from corpus_parser.conll_parser import ConllParser

dataframes_dict = ConllParser(bioes=True).parse_dir(LINK_PREDICTION)

BratParser().export_from_dataframes(BRAT, dataframes_dict)


In [1]:
!pip freeze

absl-py==1.0.0
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
asttokens==2.0.5
astunparse==1.6.3
attrs==21.4.0
awesome-align==0.1.7
backcall==0.2.0
beautifulsoup4==4.11.1
bleach==5.0.0
boto3==1.24.71
botocore==1.27.71
cachetools==5.1.0
certifi==2022.5.18.1
cffi==1.15.0
charset-normalizer==2.0.12
click==8.1.3
cycler==0.11.0
debugpy==1.6.0
decorator==5.1.1
defusedxml==0.7.1
entrypoints==0.4
executing==0.8.3
fastjsonschema==2.15.3
filelock==3.8.0
flatbuffers==1.12
fonttools==4.33.3
gast==0.4.0
google-auth==2.6.6
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
grpcio==1.46.3
h5py==3.6.0
idna==3.3
importlib-metadata==4.11.4
importlib-resources==5.7.1
ipykernel==5.1.1
ipython==8.3.0
ipython-genutils==0.2.0
ipywidgets==7.7.0
jedi==0.17.2
Jinja2==3.1.2
jmespath==1.0.1
joblib==1.1.0
jsonschema==4.5.1
jupyter==1.0.0
jupyter-client==7.3.1
jupyter-console==6.4.3
jupyter-core==4.10.0
jupyter-http-over-ws==0.0.8
jupyterlab-pygments==0.2.2
jupyterl