Added supertype closes #87

Deleted class DuplicateHeader closes Added saving context positions and link positions closes #91 Added type checks in classes in link_analysis.models closes #89 Updated converting CleanLink in JSON closes #74
robot-lab · Oct 25, 2018 · 15d4789 · 15d4789
1 parent 991c065
commit 15d4789
Show file tree

Hide file tree

Showing 6 changed files with 196 additions and 320 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@ ksrf_temp_folder/
 TestResults/
 link_analysis/json_to_pickle_converter.py
 link_analysis/my_funs.py
+link_analysis/archive.py
 run.cmd
 #Decision Files
 Decision files0/

diff --git a/link_analysis/api_module.py b/link_analysis/api_module.py
@@ -330,8 +330,8 @@ def start_process_with(
 if __name__ == "__main__":
     import time
     start_time = time.time()
-    process_period("18.06.1980", "18.07.2020", showPicture=False,
-                   isNeedReloadHeaders=False, includeIsolatedNodes=False)
+    # process_period("18.06.1980", "18.07.2020", showPicture=False,
+    #                isNeedReloadHeaders=False, includeIsolatedNodes=False)
     # process_period("18.06.1980", "18.07.2020", showPicture=False,
     #                isNeedReloadHeaders=False, includeIsolatedNodes=False)
     # process_period(
@@ -352,7 +352,7 @@ def start_process_with(
 
     # start_process_with(decisionID='КСРФ/1-П/2015', depth=3)
 
-    load_and_visualize()
+    # load_and_visualize()
 
     # start_process_with(
     #     decisionID='КСРФ/1-П/2015', depth=10,
@@ -369,5 +369,10 @@ def start_process_with(
     #     showPicture=True, isNeedReloadHeaders=False)
     # source = web_crawler.Crawler.get_data_source('LocalFileStorage')
     # text=source.get_data('КСРФ/19-П/2014', web_crawler.DataType.DOCUMENT_TEXT)
+
+    # process_period("18.09.2018", "18.07.2020", showPicture=True,
+    #                isNeedReloadHeaders=False, includeIsolatedNodes=True)
+    import my_funs
+    my_funs.saving_all_clean_links()
     print(f"Headers collection spent {time.time()-start_time} seconds.")
     input('press any key...')
diff --git a/link_analysis/converters.py b/link_analysis/converters.py
@@ -3,7 +3,7 @@
 import os
 from typing import Dict, Iterable, TypeVar, Type, List, Union, Any
 
-from models import Header, DuplicateHeader, DocumentHeader
+from models import Header, DocumentHeader
 from final_analysis import CleanLink
 
 # Don't forget to add to this place new classes where implemented

diff --git a/link_analysis/final_analysis.py b/link_analysis/final_analysis.py
@@ -1,5 +1,5 @@
 import re
-from models import Header, DuplicateHeader, CleanLink
+from models import Header, CleanLink, Positions
 from models import LinkGraph
 from rough_analysis import RoughLink
 from typing import Dict, Tuple, List, Union
@@ -10,7 +10,7 @@
 
 def get_clean_links(
         collectedLinks: Dict[Header, List[RoughLink]],
-        courtSiteContent: Dict[str, Union[Header, DuplicateHeader]],
+        courtSiteContent: Dict[str, Header],
         courtPrefix: str='КСРФ/') -> Tuple[Dict[Header, List[CleanLink]],
                                            Dict[Header, List[RoughLink]]]:
     '''
@@ -35,17 +35,10 @@ def get_clean_links(
                     gottenID = (courtPrefix + number[0].upper() +
                                 '/' + years.pop())
                     if gottenID in courtSiteContent:
-                        try:
-                            if isinstance(courtSiteContent[gottenID],
-                                          DuplicateHeader):
-                                raise TypeError("It links on duplicating "
-                                                "document")
-                        except TypeError:
-                            break
                         eggs = True
                         years.clear()
                         headerTo = courtSiteContent[gottenID]
-                        positionAndContext = (link.position, link.context)
+                        positionAndContext = link.positions
                         cleanLink = None
                         for cl in checkedLinks[headerFrom]:
                             if cl.header_to == headerTo: