Skip to content
This repository has been archived by the owner on Jun 1, 2023. It is now read-only.

Commit

Permalink
Fixed general domain bug
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinlu1248 committed Jun 28, 2021
1 parent 49c4390 commit c52eed9
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 17 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="pyate",
version="0.5.1", # Start with a small number and increase it with every change you make
version="0.5.2", # Start with a small number and increase it with every change you make
license="MIT", # Chose a license from here: https://help.github.com/articles/licensing-a-repository
description="PYthon Automated Term Extraction", # Give a short description about your library
long_description=long_description,
Expand Down
39 changes: 23 additions & 16 deletions src/pyate/term_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def get_general_domain(language: str = None, size: int = None):
pkg_resources.resource_stream(
__name__, f"default_general_domain.{language}.csv"),
nrows=size,
)
)["SECTION_TEXT"]
return TermExtraction.DEFAULT_GENERAL_DOMAINS[(language, size)]

@staticmethod
Expand Down Expand Up @@ -251,7 +251,8 @@ def add_to_counter(matcher, doc, i, matches):

def count_terms_from_documents(self,
seperate: bool = False,
verbose: bool = False):
verbose: bool = False,
doAsync: bool = True):
"""
This is the main purpose of this class. Counts terms from the documents and returns a pandas Series.
If self.corpus is a string, then it is identical to count_terms_from_document.
Expand Down Expand Up @@ -288,21 +289,27 @@ def callback(counter_dict):
term_counter[term] += frequency

def error_callback(e):
print(e)

P = Pool()

for document in self.corpus:
P.apply_async(
self.count_terms_from_document,
[document],
callback=callback,
error_callback=error_callback,
)
P.close()
P.join()
print("Error: " + e)

if doAsync:
P = Pool()

for document in self.corpus:
P.apply_async(
self.count_terms_from_document,
[document],
callback=callback,
error_callback=error_callback,
)
P.close()
P.join()

P.terminate()
else:
for document in self.corpus:
counts = self.count_terms_from_document(document)
callback(counts)

P.terminate()
if verbose:
pbar.close()
else:
Expand Down

0 comments on commit c52eed9

Please sign in to comment.