## REConverter

This converts files from the SSA-Format (PERIN-Format) to the format for the Relation Classifier.

In [1]:
# imports
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GroupShuffleSplit
import numpy as np
import os
import pandas as pd
import json
import uuid

In [2]:
# makedirs if not exist
os.makedirs("../../etl/data/processed/REConverter", exist_ok=True)

In [3]:
os.makedirs("../../etl/data/raw/REConverter", exist_ok=True)

In [4]:
# parameters
# Input data in SSA format (in JSON)
INPUT_DATA_SSA_TRAIN="../../etl/data/processed/Perin_Preprocessing/01_train.json"
INPUT_DATA_SSA_VAL="../../etl/data/processed/Perin_Preprocessing/01_val.json"
INPUT_DATA_SSA_TEST="../../etl/data/processed/Perin_Preprocessing/01_test.json"

RE_OUTPUT_PATH_TRAIN="../../etl/data/processed/REConverter/01_train_re.json"
RE_OUTPUT_PATH_VAL="../../etl/data/processed/REConverter/01_val_re.json"
RE_OUTPUT_PATH_TEST="../../etl/data/processed/REConverter/01_test_re.json"

RANDOM_STATE=42

In [5]:
def read_json(INP_FILE):
    with open(INP_FILE, encoding="utf-8") as f:
        data = json.load(f)
        return data

ssa_data_train = read_json(INPUT_DATA_SSA_TRAIN)
ssa_data_val = read_json(INPUT_DATA_SSA_VAL)
ssa_data_test = read_json(INPUT_DATA_SSA_TEST)

In [6]:
import json

def list_to_file(sents, filepath):
    with open(filepath, 'w', encoding="utf-8") as f:
        for item in sents:
            # f.write("%s" % (json.dumps(item) + "\n"))
            json.dump(item, f, ensure_ascii=False)
            f.write("\n")

In [7]:
# validate the correctness of the parsing
ssa_data_train[0]['opinions'][0]['Source'][0][0]

'Oberländische'

In [8]:
# check input format
ssa_data_train

[{'sent_id': '0',
  'text': 'Im Kanton Bern etwa haben die Organisatoren des Emmentalischen bereits versichert , das Fest auch unter Ausschluss der Öffentlichkeit durchzuführen , das Oberländische findet ohne Zuschauer auf dem Brünigpass statt .',
  'opinions': [{'Source': [['Oberländische'], ['154:167']],
    'Target': [['Brünigpass'], ['198:208']],
    'Polar_expression': [['findet'], ['168:174']],
    'Polarity': 'Neutral',
    'Intensity': 'Average'}]},
 {'sent_id': '1',
  'text': 'Ende 2016 hatten die SBB das Projekt angekündigt , mit einem Drittel der Wohnungen im gemeinnützigen Segment .',
  'opinions': [{'Source': [['SBB'], ['21:24']],
    'Target': [['Projekt'], ['29:36']],
    'Polar_expression': [['angekündigt'], ['37:48']],
    'Polarity': 'Positive',
    'Intensity': 'Average'}]},
 {'sent_id': '2',
  'text': 'Knapp ein Vierteljahrhundert später wirken diese Vorstellungen naiv .',
  'opinions': [{'Source': [['Vorstellungen'], ['49:62']],
    'Target': [['naiv'], ['63:67']],

In [9]:
# simplified_training_data
import re
import copy

def convert_ssa_to_re(data: list):
    output = []
    for i, sent_dict in enumerate(data):
        # validate that...
        # has 1 opinion
        sent = sent_dict["text"]
        try:
            assert len(sent_dict['opinions']) == 1, "Sentences for annotation can currently only contain a single opinion."
            for opinion in sent_dict['opinions']:
                for k, v in opinion.items():
                    print(k, v)
                # from IPython.core.debugger import Pdb; Pdb().set_trace()
                # for each opinion we mask the holder
                source = opinion["Source"][0][0]
                # for each opinion we mask the target
                target = opinion["Target"][0][0]
                # replace polar expression
                pexp = opinion["Polar_expression"][0][0]
                
                tags = ["<eSOURCE>", "</eSOURCE>", "<eTARGET>", "</eTARGET>", "<ePEXP>", "</ePEXP>"]
                
                assert source and target and pexp, "Incomplete polar profile, skipping."

                if source:
                    res = re.search(f"{source}", sent)
                    e1_s, e1_e = res.span()
                    sent = sent[0:max(0, e1_s - 1)] + " <eSOURCE> " + sent[e1_s:min(len(sent), e1_e + 1)] + "</eSOURCE> " + sent[min(len(sent), e1_e + 1):]

                if target:
                    res = re.search(f"{target}", sent)
                    e2_s, e2_e = res.span()
                    sent = sent[0:max(0, e2_s - 1)] + " <eTARGET> " + sent[e2_s:min(len(sent), e2_e + 1)] + "</eTARGET> " + sent[min(len(sent), e2_e + 1):]

                if pexp:
                    res = re.search(f"{pexp}", sent)
                    e3_s, e3_e = res.span()
                    sent = sent[0:max(0, e3_s - 1)] + " <ePEXP> " + sent[e3_s:min(len(sent), e3_e + 1)] + "</ePEXP> " + sent[min(len(sent), e3_e + 1):]

                tokens = sent.split()

                assert all([tag in tokens for tag in tags]), f"There is a problem with this sentence, skipped: { sent }"

                label = opinion["Polarity"]

                element = {
                      "tokens": tokens,
                      "label": label
                  }
        except Exception as e:
            print(f"Skipped converting to ORL, {e}")
            continue

        output.append(element)
    return output

In [10]:
"Hello World...".find("kllk")

-1

In [11]:
orl_data_train = convert_ssa_to_re(ssa_data_train)

Source [['Oberländische'], ['154:167']]
Target [['Brünigpass'], ['198:208']]
Polar_expression [['findet'], ['168:174']]
Polarity Neutral
Intensity Average
Source [['SBB'], ['21:24']]
Target [['Projekt'], ['29:36']]
Polar_expression [['angekündigt'], ['37:48']]
Polarity Positive
Intensity Average
Source [['Vorstellungen'], ['49:62']]
Target [['naiv'], ['63:67']]
Polar_expression [['wirken'], ['36:42']]
Polarity Neutral
Intensity Average
Source [['Grinch'], ['4:10']]
Target [['Getränkeladen'], ['26:39']]
Polar_expression [['raubt'], ['11:16']]
Polarity Positive
Intensity Average
Source [['Orban-Regierung'], ['4:19']]
Target [['Milliardär'], ['51:61']]
Polar_expression [['unterstellt'], ['20:31']]
Polarity Negative
Intensity Average
Source [['Mann'], ['135:139']]
Target [['Frau'], ['115:119']]
Polar_expression [['getötet'], ['157:164']]
Polarity Negative
Intensity Average
Source [['Gericht'], ['21:28']]
Target [['Ausschaffung'], ['89:101']]
Polar_expression [['absehen'], ['102:109']]
Pola

Source [['Erzählung'], ['208:217']]
Target [['Keller'], ['229:235']]
Polar_expression [['bezeichnet'], ['218:228']]
Polarity Neutral
Intensity Average
Source [['Polizei'], ['27:34']]
Target [['Künstler'], ['49:57']]
Polar_expression [['heimgesucht'], ['58:69']]
Polarity Negative
Intensity Average
Source [['Hoffnungsschimmer'], ['23:40']]
Target [['Bundesliga-Spitzenreiter'], ['73:97']]
Polar_expression [['erfreut'], ['41:48']]
Polarity Positive
Intensity Average
Source [['Sozialleben'], ['54:65']]
Target [['Training'], ['86:94']]
Polar_expression [['findet'], ['66:72']]
Polarity Neutral
Intensity Average
Source [['KGB-Spitze'], ['4:14']]
Target [['Gordijewski'], ['22:33']]
Polar_expression [['rühmte'], ['15:21']]
Polarity Positive
Intensity Average
Source [['Erwartungen'], ['135:146']]
Target [['New'], ['0:3']]
Polar_expression [['enttäuscht'], ['147:157']]
Polarity Negative
Intensity Average
Source [['Bella'], ['17:22']]
Target [['Sprengkraft'], ['35:46']]
Polar_expression [['beibehal

Source [['Anlagen'], ['8:15']]
Target [['Probleme'], ['28:36']]
Polar_expression [['lösen'], ['16:21']]
Polarity Neutral
Intensity Average
Source [['Firstbird'], ['16:25']]
Target [['Millionen'], ['64:73']]
Polar_expression [['aufgenommen'], ['90:101']]
Polarity Positive
Intensity Average
Source [['Eltern'], ['88:94']]
Target [['Kredit'], ['105:111']]
Polar_expression [['aufnehmen'], ['112:121']]
Polarity Neutral
Intensity Average
Source [['Kurator'], ['4:11']]
Target [['Stadtpalais'], ['139:150']]
Polar_expression [['sorgte'], ['31:37']]
Polarity Neutral
Intensity Average
Source [['Parlament'], ['4:13']]
Target [['Moskau'], ['35:41']]
Polar_expression [['bestrafen'], ['114:123']]
Polarity Negative
Intensity Average
Source [['Anwohner'], ['174:182']]
Target [['Erhalt'], ['192:198']]
Polar_expression [['sorgen'], ['155:161']]
Polarity Positive
Intensity Average
Source [['Eistüte'], ['171:178']]
Target [['Jahrzehnte'], ['23:33']]
Polar_expression [['entstanden'], ['3:13']]
Polarity Neutr

Source [['Brasilien'], ['0:9']]
Target [['Präsidenten'], ['54:65']]
Polar_expression [['stürzen'], ['42:49']]
Polarity Negative
Intensity Average
Source [['Klassiker'], ['50:59']]
Target [['Auftritt'], ['32:40']]
Polar_expression [['fehlen'], ['66:72']]
Polarity Neutral
Intensity Average
Source [['Grossverteiler'], ['9:23']]
Target [['Bio-'], ['79:83']]
Polar_expression [['anzupreisen'], ['103:114']]
Polarity Positive
Intensity Average
Source [['Täter'], ['4:9']]
Target [['Gäste'], ['47:52']]
Polar_expression [['geschossen'], ['53:63']]
Polarity Negative
Intensity Average
Source [['Polizei'], ['22:29']]
Target [['Protestierende'], ['54:68']]
Polar_expression [['geschossen'], ['69:79']]
Polarity Negative
Intensity Average
Source [['Grüne'], ['18:23']]
Target [['Verhandlungen'], ['105:118']]
Polar_expression [['aufgenommen'], ['170:181']]
Polarity Positive
Intensity Average
Source [['Schweiz'], ['27:34']]
Target [['Winter'], ['48:54']]
Polar_expression [['irrt'], ['7:11']]
Polarity Neutr

Source [['Tesla'], ['92:97']]
Target [['VW'], ['20:22']]
Polar_expression [['enttäuscht'], ['5:15']]
Polarity Negative
Intensity Average
Source [['Premierminister'], ['15:30']]
Target [['Vorfall'], ['111:118']]
Polar_expression [['bezeichnete'], ['95:106']]
Polarity Neutral
Intensity Average
Source [['Uno-Hochkommissar'], ['37:54']]
Target [['Seenotrettung'], ['134:147']]
Polar_expression [['aufzunehmen'], ['174:185']]
Polarity Positive
Intensity Average
Source [['Zürcher'], ['19:26']]
Target [['Danach'], ['0:6']]
Polar_expression [['stürzte'], ['7:14']]
Polarity Neutral
Intensity Average
Source [['Hauseigentümerverband'], ['20:41']]
Target [['Auflagen'], ['92:100']]
Polar_expression [['beschwerte'], ['58:68']]
Polarity Negative
Intensity Average
Source [['Art'], ['31:34']]
Target [['Antoine'], ['50:57']]
Polar_expression [['bewahrt'], ['96:103']]
Polarity Positive
Intensity Average
Source [['Bombardierungen'], ['9:24']]
Target [['Schwabinger'], ['57:68']]
Polar_expression [['zerstört'

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [12]:
orl_data_val = convert_ssa_to_re(ssa_data_val)

Source [['Anlässe'], ['58:65']]
Target [['Generierung'], ['4:15']]
Polar_expression [['beitragen'], ['66:75']]
Polarity Neutral
Intensity Average
Source [['Deutsche'], ['33:41']]
Target [['Aktion'], ['9:15']]
Polar_expression [['stellte'], ['16:23']]
Polarity Positive
Intensity Average
Source [['Unternehmen'], ['21:32']]
Target [['Dazu'], ['0:4']]
Polar_expression [['stellt'], ['5:11']]
Polarity Positive
Intensity Average
Source [['Salihamidzic'], ['55:67']]
Target [['Nagelsmann'], ['104:114']]
Polar_expression [['freut'], ['68:73']]
Polarity Positive
Intensity Average
Source [['Hans'], ['10:14']]
Target [['Freitag'], ['37:44']]
Polar_expression [['freut'], ['22:27']]
Polarity Positive
Intensity Average
Source [['Stadtzürcher'], ['13:25']]
Target [['Quartier-Tankstelle'], ['35:54']]
Polar_expression [['freuen'], ['55:61']]
Polarity Positive
Intensity Average
Source [['Gosteli'], ['0:7']]
Target [['Jux'], ['53:56']]
Polar_expression [['verstanden'], ['57:67']]
Polarity Neutral
Intensity

Source [['Erfolge'], ['154:161']]
Target [['auch'], ['108:112']]
Polar_expression [['stellten'], ['136:144']]
Polarity Positive
Intensity Average
Source [['Schweiz'], ['25:32']]
Target [['Steuerprivilegien'], ['37:54']]
Polar_expression [['abschaffen'], ['101:111']]
Polarity Negative
Intensity Average
Source [['Bundesgericht'], ['11:24']]
Target [['Beschwerde'], ['33:43']]
Polar_expression [['zurückgewiesen'], ['57:71']]
Polarity Negative
Intensity Average
Source [['Thuner'], ['74:80']]
Target [['Nicola'], ['93:99']]
Polar_expression [['jubeln'], ['81:87']]
Polarity Positive
Intensity Average
Source [['Frage'], ['62:67']]
Target [['deshalb'], ['68:75']]
Polar_expression [['stellt'], ['86:92']]
Polarity Negative
Intensity Average
Source [['Frage'], ['6:11']]
Target [['Ankündigung'], ['33:44']]
Polar_expression [['stellt'], ['12:18']]
Polarity Positive
Intensity Average
Source [['Rebellen'], ['0:8']]
Target [['Polizeiposten'], ['61:74']]
Polar_expression [['überfielen'], ['50:60']]
Polar

Source [['Bolsonaro'], ['23:32']]
Target [['Umweltschutz'], ['37:49']]
Polar_expression [['abgeschafft'], ['73:84']]
Polarity Negative
Intensity Average
Source [['Geschäftsleitung'], ['155:171']]
Target [['Vermögen'], ['202:210']]
Polar_expression [['haftet'], ['172:178']]
Polarity Neutral
Intensity Average
Source [['Bautätigkeit'], ['20:32']]
Target [['Schwung'], ['52:59']]
Polar_expression [['gewonnen'], ['60:68']]
Polarity Neutral
Intensity Average
Source [['Parlament'], ['29:38']]
Target [['Mindeststrafe'], ['48:61']]
Polar_expression [['abschaffen'], ['62:72']]
Polarity Negative
Intensity Average
Source [['Gut-Behrami'], ['94:105']]
Target [['Premiere'], ['115:123']]
Polar_expression [['freuen'], ['124:130']]
Polarity Positive
Intensity Average
Source [['Pädagogen'], ['213:222']]
Target [['Lerncomputer'], ['273:285']]
Polar_expression [['schwärmen'], ['223:232']]
Polarity Neutral
Intensity Average
Source [['Problem'], ['36:43']]
Target [['Knaben'], ['13:19']]
Polar_expression [['s

Source [['Bundesrat'], ['4:13']]
Target [['Verhandlungen'], ['23:36']]
Polar_expression [['abgebrochen'], ['49:60']]
Polarity Negative
Intensity Average
Source [['Branche'], ['26:33']]
Target [['Einbussen'], ['48:57']]
Polar_expression [['stellt'], ['10:16']]
Polarity Positive
Intensity Average
Source [['Polizei'], ['4:11']]
Target [['Ehepaar'], ['37:44']]
Polar_expression [['verhaftete'], ['12:22']]
Polarity Negative
Intensity Average
Source [['Deal'], ['74:78']]
Target [['kurz'], ['35:39']]
Polar_expression [['scheitert'], ['60:69']]
Polarity Neutral
Intensity Average
Source [['SRF-Moderator'], ['46:59']]
Target [['Interview'], ['92:101']]
Polar_expression [['abbrechen'], ['126:135']]
Polarity Negative
Intensity Average
Source [['Wawrinka'], ['0:8']]
Target [['Breakball'], ['47:56']]
Polar_expression [['wehrte'], ['9:15']]
Polarity Negative
Intensity Average
Source [['Krzysztof'], ['0:9']]
Target [['Besuch'], ['61:67']]
Polar_expression [['freut'], ['16:21']]
Polarity Positive
Intens

In [13]:
orl_data_test = convert_ssa_to_re(ssa_data_test)

Source [['Schweiz'], ['4:11']]
Target [['EU'], ['41:43']]
Polar_expression [['unterwerfen'], ['47:58']]
Polarity Negative
Intensity Average
Source [['S.'], ['18:20']]
Target [['Worten'], ['58:64']]
Polar_expression [['stellte'], ['37:44']]
Polarity Positive
Intensity Average
Source [['Placierungen'], ['28:40']]
Target [['Rekord'], ['6:12']]
Polar_expression [['erreichten'], ['13:23']]
Polarity Neutral
Intensity Average
Source [['Wengener'], ['60:68']]
Target [['Verweis'], ['24:31']]
Polar_expression [['verstehen'], ['89:98']]
Polarity Neutral
Intensity Average
Source [['SBB-Manager'], ['14:25']]
Target [['Verfügung'], ['111:120']]
Polar_expression [['stellen'], ['121:128']]
Polarity Positive
Intensity Average
Source [['Personen'], ['12:20']]
Target [['China'], ['71:76']]
Polar_expression [['fürchten'], ['80:88']]
Polarity Negative
Intensity Average
Source [['Nachricht'], ['4:13']]
Target [['Ecclestone'], ['20:30']]
Polar_expression [['freut'], ['14:19']]
Polarity Positive
Intensity Ave

Source [['Frau'], ['101:105']]
Target [['Euro'], ['151:155']]
Polar_expression [['gewonnen'], ['165:173']]
Polarity Neutral
Intensity Average
Source [['Problem'], ['7:14']]
Target [['Vaduz'], ['28:33']]
Polar_expression [['stellen'], ['40:47']]
Polarity Negative
Intensity Average
Source [['Unternehmen'], ['26:37']]
Target [['Kommunikation'], ['68:81']]
Polar_expression [['versagt'], ['82:89']]
Polarity Neutral
Intensity Average
Source [['Gemässigten'], ['9:20']]
Target [['Seite'], ['70:75']]
Polar_expression [['stellten'], ['48:56']]
Polarity Positive
Intensity Average
Source [['Karaivaz'], ['26:34']]
Target [['Minister'], ['49:57']]
Polar_expression [['verklagt'], ['134:142']]
Polarity Negative
Intensity Average
Source [['Machtelite'], ['17:27']]
Target [['Unterredungen'], ['43:56']]
Polar_expression [['missfielen'], ['28:38']]
Polarity Negative
Intensity Average
Source [['Ausgang'], ['21:28']]
Target [['Del'], ['0:3']]
Polar_expression [['freute'], ['10:16']]
Polarity Positive
Intens

Source [['…'], ['59:60']]
Target [['Olympiagold'], ['92:103']]
Polar_expression [['freut'], ['71:76']]
Polarity Positive
Intensity Average
Source [['Partner'], ['82:89']]
Target [['Frauen'], ['14:20']]
Polar_expression [['ermordet'], ['90:98']]
Polarity Negative
Intensity Average
Source [['Forscher'], ['39:47']]
Target [['Mechanismen'], ['104:115']]
Polar_expression [['verstehen'], ['142:151']]
Polarity Neutral
Intensity Average
Source [['EVP-Nationalrat'], ['11:26']]
Target [['Motion'], ['73:79']]
Polar_expression [['gewonnen'], ['80:88']]
Polarity Positive
Intensity Average
Source [['Kickl'], ['105:110']]
Target [['Spitze'], ['118:124']]
Polar_expression [['gestellt'], ['139:147']]
Polarity Positive
Intensity Average
Source [['Lager'], ['31:36']]
Target [['Beitragsabschaffung'], ['66:85']]
Polar_expression [['freut'], ['37:42']]
Polarity Positive
Intensity Average
Source [['Bund'], ['19:23']]
Target [['Rechtshilfegesuche'], ['44:62']]
Polar_expression [['zurückgewiesen'], ['63:77']]


Polar_expression [['profitieren'], ['4:15']]
Polarity Neutral
Intensity Average
Source [['Behörden'], ['4:12']]
Target [['Schweizer'], ['29:38']]
Polar_expression [['verhafteten'], ['13:24']]
Polarity Negative
Intensity Average
Source [['Unbekannter'], ['61:72']]
Target [['Frau'], ['84:88']]
Polar_expression [['überfällt'], ['42:51']]
Polarity Negative
Intensity Average
Source [['Corona-Krise'], ['52:64']]
Target [['Lage'], ['74:78']]
Polar_expression [['verschlimmert'], ['84:97']]
Polarity Negative
Intensity Average
Source [['Amir'], ['38:42']]
Target [['Szenario'], ['109:117']]
Polar_expression [['abtun'], ['124:129']]
Polarity Positive
Intensity Average
Source [['Haas'], ['0:4']]
Target [['darauf'], ['22:28']]
Polar_expression [['freut'], ['5:10']]
Polarity Positive
Intensity Average
Source [['Anlagestiftung'], ['13:27']]
Target [['Managerlöhne'], ['56:68']]
Polar_expression [['stellt'], ['34:40']]
Polarity Positive
Intensity Average
Source [['Rom'], ['151:154']]
Target [['Euro'], [

Source [['Preis'], ['35:40']]
Target [['Krisen'], ['14:20']]
Polar_expression [['profitiert'], ['41:51']]
Polarity Neutral
Intensity Average
Skipped converting to ORL, There is a problem with this sentence, skipped: Gold gilt als <eTARGET> Krisenw</eTARGET> ährung , sein <eSOURCE> Preis </eSOURCE> <ePEXP> profitiert </ePEXP> oftmals von geopolitischen Krisen und Unsicherheit an den Finanzmärkten .
Source [['Inficon-Führung'], ['60:75']]
Target [['Aussagen'], ['100:108']]
Polar_expression [['hütet'], ['45:50']]
Polarity Negative
Intensity Average
Source [['Mann'], ['22:26']]
Target [['darüber'], ['38:45']]
Polar_expression [['freut'], ['27:32']]
Polarity Positive
Intensity Average
Source [['Modelle'], ['50:57']]
Target [['Elektro-Boost'], ['147:160']]
Polar_expression [['verfügen'], ['95:103']]
Polarity Neutral
Intensity Average
Source [['Frage'], ['75:80']]
Target [['Machtübernahme'], ['9:23']]
Polar_expression [['stellt'], ['55:61']]
Polarity Positive
Intensity Average
Source [['Vorsi

In [14]:
orl_data_train

[{'tokens': ['Im',
   'Kanton',
   'Bern',
   'etwa',
   'haben',
   'die',
   'Organisatoren',
   'des',
   'Emmentalischen',
   'bereits',
   'versichert',
   ',',
   'das',
   'Fest',
   'auch',
   'unter',
   'Ausschluss',
   'der',
   'Öffentlichkeit',
   'durchzuführen',
   ',',
   'das',
   '<eSOURCE>',
   'Oberländische',
   '</eSOURCE>',
   '<ePEXP>',
   'findet',
   '</ePEXP>',
   'ohne',
   'Zuschauer',
   'auf',
   'dem',
   '<eTARGET>',
   'Brünigpass',
   '</eTARGET>',
   'statt',
   '.'],
  'label': 'Neutral'},
 {'tokens': ['Ende',
   '2016',
   'hatten',
   'die',
   '<eSOURCE>',
   'SBB',
   '</eSOURCE>',
   'das',
   '<eTARGET>',
   'Projekt',
   '</eTARGET>',
   '<ePEXP>',
   'angekündigt',
   '</ePEXP>',
   ',',
   'mit',
   'einem',
   'Drittel',
   'der',
   'Wohnungen',
   'im',
   'gemeinnützigen',
   'Segment',
   '.'],
  'label': 'Positive'},
 {'tokens': ['Knapp',
   'ein',
   'Vierteljahrhundert',
   'später',
   '<ePEXP>',
   'wirken',
   '</ePEXP>',
   'die

In [15]:
# save as txt file
list_to_file(orl_data_train, RE_OUTPUT_PATH_TRAIN)
list_to_file(orl_data_val, RE_OUTPUT_PATH_VAL)
list_to_file(orl_data_test, RE_OUTPUT_PATH_TEST)