Skip to content

Commit

Permalink
Fix parsing structured abstract with multiple AbstractText sections
Browse files Browse the repository at this point in the history
  • Loading branch information
titipata committed Jul 5, 2017
1 parent 23729b6 commit b59e150
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion pubmed_parser/medline_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,12 @@ def parse_article_info(medline, year_info_only):
title = ''

if article.find('Abstract/AbstractText') is not None:
abstract = stringify_children(article.find('Abstract/AbstractText')).strip() or ''
# structured abstract
if len(article.findall('Abstract/AbstractText')) > 1:
abstract_list = [stringify_children(abstract).strip() for abstract in article.findall('Abstract/AbstractText')]
abstract = '\n'.join(abstract_list)
else:
abstract = stringify_children(article.find('Abstract/AbstractText')).strip() or ''
elif article.find('Abstract') is not None:
abstract = stringify_children(article.find('Abstract')).strip() or ''
else:
Expand Down

0 comments on commit b59e150

Please sign in to comment.