Skip to content

Commit

Permalink
Problems with compatibility and old option removed.
Browse files Browse the repository at this point in the history
  • Loading branch information
scastlara committed Oct 10, 2018
1 parent 9eeba00 commit 17f9f9e
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 16 deletions.
17 changes: 7 additions & 10 deletions bin/ppaxe
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,13 @@ def main():

# START THE PROGRAM
pmids = read_identifiers(options.pmids)
if options.mode == "ppi":
stats = get_ppi(options, start_time, pmids)
log.info("Total articles analyzed: %s", stats['total_articles'])
log.info("Total sentences analyzed: %s", stats['total_sentences'])
log.info("Total candidates found: %s", stats['total_candidates'])
log.info("Total interactions retrieved: %s", stats['total_interacts'])
log.info("Total time: ~%s seconds", round(time.time() - start_time))
log.info("Program finished: %s", str(datetime.now()))
else:
print("Perform Gene/Protein symbol analysis")
stats = get_ppi(options, start_time, pmids)
log.info("Total articles analyzed: %s", stats['total_articles'])
log.info("Total sentences analyzed: %s", stats['total_sentences'])
log.info("Total candidates found: %s", stats['total_candidates'])
log.info("Total interactions retrieved: %s", stats['total_interacts'])
log.info("Total time: ~%s seconds", round(time.time() - start_time))
log.info("Program finished: %s", str(datetime.now()))


if __name__ == "__main__":
Expand Down
14 changes: 8 additions & 6 deletions ppaxe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
sys.setdefaultencoding('utf8')
except:
# For python 3
import html
import _pickle as pickle
from html.parser import HTMLParser
from importlib import reload


Expand Down Expand Up @@ -377,10 +377,13 @@ def extract_sentences(self, mode="split", source="fulltext"):
text = text.replace("<prd>",".")
sentences = text.split("<stop>")
#sentences = sentences[:-1]
sentences = [s.strip() for s in sentences]
h = HTMLParser()
sentences = [ s.strip() for s in sentences ]
try:
html = HTMLParser()
except:
import html
for sentence in sentences:
sentence = str(h.unescape(sentence))
sentence = str(html.unescape(sentence))
if not sentence.strip() or not isinstance(sentence, str):
continue
self.sentences.append(Sentence(originaltext=sentence))
Expand Down Expand Up @@ -426,9 +429,8 @@ class Protein(object):
'''
GENEDICT = dict()
GENEDICTFILE = pkg_resources.resource_filename('ppaxe', 'data/HGNC_gene_dictionary.txt')

try:
with open(GENEDICTFILE, 'rb') as f:
with open(GENEDICTFILE, 'r') as f:
for line in f:
line = line.strip()
cols = line.split("\t")
Expand Down
9 changes: 9 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import setuptools

requires = [
'pycorenlp',
'scipy',
'sklearn',
'requests',
'matplotlib'
]

setuptools.setup(name='ppaxe',
version='0.2',
description='PPI extractor from PubMed articles',
Expand All @@ -10,6 +18,7 @@
scripts=['bin/ppaxe'],
include_package_data=True,
packages=setuptools.find_packages(),
install_requires=requires,
package_data = { 'ppaxe' : ['data/RF_scikit.pkl', 'data/HGNC_gene_dictionary.txt', 'data/cytoscape_template.js', 'data/style.css']},
zip_safe=False)

0 comments on commit 17f9f9e

Please sign in to comment.