Problems with compatibility and old option removed.

scastlara · Oct 10, 2018 · 17f9f9e · 17f9f9e
1 parent 9eeba00
commit 17f9f9e
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 16 deletions.
diff --git a/bin/ppaxe b/bin/ppaxe
@@ -154,16 +154,13 @@ def main():
 
     # START THE PROGRAM
     pmids = read_identifiers(options.pmids)
-    if options.mode == "ppi":
-        stats = get_ppi(options, start_time, pmids)
-        log.info("Total articles analyzed: %s", stats['total_articles'])
-        log.info("Total sentences analyzed: %s", stats['total_sentences'])
-        log.info("Total candidates found: %s", stats['total_candidates'])
-        log.info("Total interactions retrieved: %s", stats['total_interacts'])
-        log.info("Total time: ~%s seconds", round(time.time() - start_time))
-        log.info("Program finished: %s", str(datetime.now()))
-    else:
-        print("Perform Gene/Protein symbol analysis")
+    stats = get_ppi(options, start_time, pmids)
+    log.info("Total articles analyzed: %s", stats['total_articles'])
+    log.info("Total sentences analyzed: %s", stats['total_sentences'])
+    log.info("Total candidates found: %s", stats['total_candidates'])
+    log.info("Total interactions retrieved: %s", stats['total_interacts'])
+    log.info("Total time: ~%s seconds", round(time.time() - start_time))
+    log.info("Program finished: %s", str(datetime.now()))
 
 
 if __name__ == "__main__":

diff --git a/ppaxe/core.py b/ppaxe/core.py
@@ -27,8 +27,8 @@
     sys.setdefaultencoding('utf8')
 except:
     # For python 3
+    import html
     import _pickle as pickle
-    from html.parser import HTMLParser
     from importlib import reload
 
 
@@ -377,10 +377,13 @@ def extract_sentences(self, mode="split", source="fulltext"):
             text = text.replace("<prd>",".")
             sentences = text.split("<stop>")
             #sentences = sentences[:-1]
-            sentences = [s.strip() for s in sentences]
-            h = HTMLParser()
+            sentences = [ s.strip() for s in sentences ]
+            try:
+                html = HTMLParser()
+            except:
+                import html
             for sentence in sentences:
-                sentence = str(h.unescape(sentence))
+                sentence = str(html.unescape(sentence))
                 if not sentence.strip() or not isinstance(sentence, str):
                     continue
                 self.sentences.append(Sentence(originaltext=sentence))
@@ -426,9 +429,8 @@ class Protein(object):
     '''
     GENEDICT = dict()
     GENEDICTFILE = pkg_resources.resource_filename('ppaxe', 'data/HGNC_gene_dictionary.txt')
-
     try:
-        with open(GENEDICTFILE, 'rb') as f:
+        with open(GENEDICTFILE, 'r') as f:
             for line in f:
                 line = line.strip()
                 cols = line.split("\t")

diff --git a/setup.py b/setup.py
@@ -1,5 +1,13 @@
 import setuptools
 
+requires = [
+    'pycorenlp',
+    'scipy',
+    'sklearn',
+    'requests',
+    'matplotlib'
+    ]
+
 setuptools.setup(name='ppaxe',
       version='0.2',
       description='PPI extractor from PubMed articles',
@@ -10,6 +18,7 @@
       scripts=['bin/ppaxe'],
       include_package_data=True,
       packages=setuptools.find_packages(),
+      install_requires=requires,
       package_data = { 'ppaxe' : ['data/RF_scikit.pkl', 'data/HGNC_gene_dictionary.txt', 'data/cytoscape_template.js', 'data/style.css']},
       zip_safe=False)