From 66980b287eb96fc361140bc81180893e0850bcf2 Mon Sep 17 00:00:00 2001
From: Nicolay Rusnachenko <kolyarus@yandex.ru>
Date: Sat, 7 Jan 2023 10:49:15 +0000
Subject: [PATCH 1/6] Provide AREnets link

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 541a5974..a45cc7f3 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,8 @@ Please follows th
 
 ## Applications
 
+* **AREnets** [[github]](https://github.com/nicolay-r/AREnets)
+  * is an OpenNRE like project, but the kernel based on tensorflow library, with implementation of neural networks on top of it, designed for Attitude 
 * **ARElight** [[site]](https://nicolay-r.github.io/arelight-page/) [[github]](https://github.com/nicolay-r/ARElight)
     * **Infer attitudes** from large Mass-media documents or **sample texts** for your Machine Learning models applications
 

From 35fd8ae083d01887eaf4cc9dcb71de3170fb632b Mon Sep 17 00:00:00 2001
From: Nicolay Rusnachenko <kolyarus@yandex.ru>
Date: Thu, 12 Jan 2023 12:20:23 +0000
Subject: [PATCH 2/6] #429 related sync

---
 arekit/contrib/utils/data/readers/jsonl.py     | 15 +++++++++++++++
 .../contrib/utils/data/storages/jsonl_based.py | 18 ++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 arekit/contrib/utils/data/readers/jsonl.py
 create mode 100644 arekit/contrib/utils/data/storages/jsonl_based.py

diff --git a/arekit/contrib/utils/data/readers/jsonl.py b/arekit/contrib/utils/data/readers/jsonl.py
new file mode 100644
index 00000000..ca9c4923
--- /dev/null
+++ b/arekit/contrib/utils/data/readers/jsonl.py
@@ -0,0 +1,15 @@
+from arekit.contrib.utils.data.readers.base import BaseReader
+from arekit.contrib.utils.data.storages.jsonl_based import JsonlBasedRowsStorage
+
+
+class JsonlReader(BaseReader):
+
+    def read(self, target):
+        rows = []
+        with open(target, "r") as f:
+            for line in f.readlines():
+                rows.append(line)
+        return JsonlBasedRowsStorage(rows)
+
+    def target_extension(self):
+        return ".jsonl"
diff --git a/arekit/contrib/utils/data/storages/jsonl_based.py b/arekit/contrib/utils/data/storages/jsonl_based.py
new file mode 100644
index 00000000..bc32e269
--- /dev/null
+++ b/arekit/contrib/utils/data/storages/jsonl_based.py
@@ -0,0 +1,18 @@
+import json
+
+from arekit.common.data.storages.base import BaseRowsStorage
+
+
+class JsonlBasedRowsStorage(BaseRowsStorage):
+
+    def __init__(self, rows):
+        assert(isinstance(rows, list))
+        self.__rows = rows
+
+    def _iter_rows(self):
+        for row_index, row in enumerate(self.__rows):
+            assert(isinstance(row, str))
+            yield row_index, json.loads(row)
+
+    def _get_rows_count(self):
+        return len(self.__rows)

From ea26d166716cfae0c272de3a1d152394f41eca8a Mon Sep 17 00:00:00 2001
From: Nicolay Rusnachenko <kolyarus@yandex.ru>
Date: Thu, 12 Jan 2023 12:21:56 +0000
Subject: [PATCH 3/6] #429 sync. Fix #427

---
 arekit/contrib/networks/input/const.py       |  1 +
 arekit/contrib/networks/input/rows_parser.py | 51 +++++++++++++-------
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/arekit/contrib/networks/input/const.py b/arekit/contrib/networks/input/const.py
index 888c338a..0f39a327 100644
--- a/arekit/contrib/networks/input/const.py
+++ b/arekit/contrib/networks/input/const.py
@@ -4,5 +4,6 @@
 SynonymObject = "syn_objs"
 SynonymSubject = "syn_subjs"
 PosTags = "pos_tags"
+Text = "text"
 
 ArgsSep = ','
diff --git a/arekit/contrib/networks/input/rows_parser.py b/arekit/contrib/networks/input/rows_parser.py
index 9f8a9ba8..10dad525 100644
--- a/arekit/contrib/networks/input/rows_parser.py
+++ b/arekit/contrib/networks/input/rows_parser.py
@@ -1,24 +1,35 @@
-import pandas as pd
-
 from arekit.common.data import const
 from arekit.common.utils import filter_whitespaces, split_by_whitespaces
-from . import const as network_input_const
+
+import arekit.contrib.networks.input.const as network_input_const
 
 empty_list = []
 
 
+def no_value():
+    return None
+
+
 def __process_values_list(value):
     return value.split(network_input_const.ArgsSep)
 
 
 def __process_indices_list(value):
-    return [int(v) for v in str(value).split(network_input_const.ArgsSep)]
+    return no_value() if not value else [int(v) for v in str(value).split(network_input_const.ArgsSep)]
 
 
 def __process_int_values_list(value):
     return __process_indices_list(value)
 
 
+def __handle_text(value):
+    """ The core method of the input text processing.
+    """
+    assert(isinstance(value, str) or isinstance(value, list))
+    return filter_whitespaces([term for term in split_by_whitespaces(value)]
+                              if isinstance(value, str) else value)
+
+
 parse_value = {
     const.ID: lambda value: value,
     const.DOC_ID: lambda value: int(value),
@@ -35,18 +46,19 @@ def __process_int_values_list(value):
     network_input_const.SynonymObject: lambda value: __process_indices_list(value),
     network_input_const.SynonymSubject: lambda value: __process_indices_list(value),
     network_input_const.PosTags: lambda value: __process_int_values_list(value),
-    "text_a": lambda value: filter_whitespaces([term for term in split_by_whitespaces(value)])
+    network_input_const.Text: lambda value: __handle_text(value)
 }
 
 
 class ParsedSampleRow(object):
-    """
-    Provides a parsed information for a sample row.
-    TODO. Use this class as API
+    """ Provides a parsed information for a sample row.
     """
 
     def __init__(self, row):
-        assert(isinstance(row, pd.Series))
+        """ row: dict
+                dict of the pairs ("field_name", value)
+        """
+        assert(isinstance(row, dict))
 
         self.__uint_label = None
         self.__params = {}
@@ -64,13 +76,16 @@ def __init__(self, row):
 
             self.__params[key] = parse_value[key](value)
 
+    def __value_or_none(self, key):
+        return self.__params[key] if key in self.__params else no_value()
+
     @property
     def SampleID(self):
         return self.__params[const.ID]
     
     @property
     def Terms(self):
-        return self.__params["text_a"]
+        return self.__params[network_input_const.Text]
 
     @property
     def SubjectIndex(self):
@@ -86,33 +101,33 @@ def UintLabel(self):
 
     @property
     def PartOfSpeechTags(self):
-        return self.__params[network_input_const.PosTags]
+        return self.__value_or_none(network_input_const.PosTags)
 
     @property
     def TextFrameVariantIndices(self):
-        return self.__params[network_input_const.FrameVariantIndices]
+        return self.__value_or_none(network_input_const.FrameVariantIndices)
 
     @property
     def TextFrameConnotations(self):
-        return self.__params[network_input_const.FrameConnotations]
+        return self.__value_or_none(network_input_const.FrameConnotations)
 
     @property
     def EntityInds(self):
-        return self.__params[const.ENTITIES]
+        return self.__value_or_none(const.ENTITIES)
 
     @property
     def SynonymObjectInds(self):
-        return self.__params[network_input_const.SynonymObject]
+        return self.__value_or_none(network_input_const.SynonymObject)
 
     @property
     def SynonymSubjectInds(self):
-        return self.__params[network_input_const.SynonymSubject]
+        return self.__value_or_none(network_input_const.SynonymSubject)
 
     def __getitem__(self, item):
         assert (isinstance(item, str) or item is None)
         if item not in self.__params:
-            return None
-        return self.__params[item] if item is not None else None
+            return no_value()
+        return self.__params[item] if item is not None else no_value()
 
     @classmethod
     def parse(cls, row):

From fbcb15fb3fec5d973a02db5edb671efd735585eb Mon Sep 17 00:00:00 2001
From: Nicolay Rusnachenko <kolyarus@yandex.ru>
Date: Thu, 12 Jan 2023 12:27:06 +0000
Subject: [PATCH 4/6] refactoring

---
 arekit/contrib/utils/evaluation/analyze_errors.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arekit/contrib/utils/evaluation/analyze_errors.py b/arekit/contrib/utils/evaluation/analyze_errors.py
index 13fba1e6..5d6f96ab 100644
--- a/arekit/contrib/utils/evaluation/analyze_errors.py
+++ b/arekit/contrib/utils/evaluation/analyze_errors.py
@@ -131,13 +131,12 @@ def extract_errors(eval_result, test_samples_filepath, etalon_samples_filepath,
         for sample_col in columns_to_copy:
             eval_errors_df.at[row_id, sample_col] = sample_row[sample_col]
 
-        text_terms =__post_text_processing(sample_row=sample_row, source_ind=source_ind, target_ind=target_ind)
+        text_terms = __post_text_processing(sample_row=sample_row, source_ind=source_ind, target_ind=target_ind)
         cropped_text = __crop_text_terms(source_ind=source_ind, target_ind=target_ind, text_terms=text_terms)
 
         eval_errors_df.at[row_id, BaseSingleTextProvider.TEXT_A] = cropped_text
 
-        # Replace with the values instead of indices.
-        entity_inds = __get_entity_inds(sample_row)
+        # Replace source and target the values instead of indices.
         eval_errors_df.at[row_id, const.S_IND] = text_terms[source_ind]
         eval_errors_df.at[row_id, const.T_IND] = text_terms[target_ind]
 

From 730d535213507dfdeffa1e3d7b903a4d1d778fd0 Mon Sep 17 00:00:00 2001
From: Nicolay Rusnachenko <kolyarus@yandex.ru>
Date: Thu, 12 Jan 2023 12:28:59 +0000
Subject: [PATCH 5/6] #436 fixed

---
 .../pipelines/sources/ruattitudes/extract_text_opinions.py      | 1 -
 arekit/contrib/utils/pipelines/sources/ruattitudes/utils.py     | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py b/arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py
index c2002254..96101feb 100644
--- a/arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py
+++ b/arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py
@@ -44,7 +44,6 @@ def create_text_opinion_extraction_pipeline(text_parser,
         version=version,
         doc_id_func=lambda doc_id: doc_id,
         keep_doc_ids_only=False,
-        label_scaler=label_scaler,
         limit=limit)
 
     doc_ops = DictionaryBasedDocumentOperations(ru_attitudes)
diff --git a/arekit/contrib/utils/pipelines/sources/ruattitudes/utils.py b/arekit/contrib/utils/pipelines/sources/ruattitudes/utils.py
index 65f3271f..1420856d 100644
--- a/arekit/contrib/utils/pipelines/sources/ruattitudes/utils.py
+++ b/arekit/contrib/utils/pipelines/sources/ruattitudes/utils.py
@@ -18,7 +18,7 @@ def get_doc(self, doc_id):
         return self.__ru_attitudes[doc_id]
 
 
-def read_ruattitudes_to_brat_in_memory(version, keep_doc_ids_only, doc_id_func, label_scaler, limit=None):
+def read_ruattitudes_to_brat_in_memory(version, keep_doc_ids_only, doc_id_func, limit=None):
     """ Performs reading of RuAttitude formatted documents and
         selection according to 'doc_ids_set' parameter.
     """

From 395671faf1c788fdf7e2faac41bcf58a252d890b Mon Sep 17 00:00:00 2001
From: Nicolay Rusnachenko <kolyarus@yandex.ru>
Date: Mon, 16 Jan 2023 13:06:40 +0000
Subject: [PATCH 6/6] #437 -- refactored. Improved rows parser. #415 -- removed
 case of mentioned `,` in value.

---
 arekit/contrib/source/brat/annot.py | 37 +++++++++++++++++------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/arekit/contrib/source/brat/annot.py b/arekit/contrib/source/brat/annot.py
index 959ff2f0..2749e7a0 100644
--- a/arekit/contrib/source/brat/annot.py
+++ b/arekit/contrib/source/brat/annot.py
@@ -14,33 +14,40 @@ def __non_prefixed_id(value):
 
     @staticmethod
     def handle_entity(args):
+        """ T2	Location 10 23	South America
+            T1	Location 0 5;16 23	North America
+        """
+        assert(len(args) == 3)
 
-        if len(args) < 4:
-            return None
+        e_id = int(BratAnnotationParser.__non_prefixed_id(args[0]))
+        entity_params = args[1].split()
 
-        if not str.isdigit(args[2]) or not str.isdigit(args[3]):
+        if len(entity_params) > 3:
+            # We do not support the case of a non-continuous entity mentions.
             return None
 
-        e_id = int(BratAnnotationParser.__non_prefixed_id(args[0]))
-        e_str_type = args[1]
-        e_begin = int(args[2])
-        e_end = int(args[3])
-        e_value = " ".join([arg.strip().replace(',', '') for arg in args[4:]])
+        e_str_type, e_begin, e_end = entity_params
 
         return BratEntity(id_in_doc=e_id,
                           e_type=e_str_type,
-                          index_begin=e_begin,
-                          index_end=e_end,
-                          value=e_value)
+                          index_begin=int(e_begin),
+                          index_end=int(e_end),
+                          value=args[2].strip())
 
     @staticmethod
     def handle_relation(args):
+        """ Example:
+            R1	Origin Arg1:T3 Arg2:T4
+        """
 
+        # Parse identifier index.
         e_id = args[0][1:]
 
-        rel_type = args[1]
-        source_id = args[2].split(':')[1]
-        target_id = args[3].split(':')[1]
+        # Parse relation arguments.
+        rel_type, source, target = args[1].split()
+
+        source_id = source.split(':')[1]
+        target_id = target.split(':')[1]
 
         return BratRelation(id_in_doc=e_id,
                             source_id=int(BratAnnotationParser.__non_prefixed_id(source_id)),
@@ -57,7 +64,7 @@ def parse_annotations(input_file, encoding='utf-8'):
         for line in input_file.readlines():
             line = line.decode(encoding)
 
-            args = line.split()
+            args = line.split('\t')
 
             record_type = args[0][0]