diff --git a/processors/ictrp/extractors.py b/processors/ictrp/extractors.py index 60c0afc..04792c9 100644 --- a/processors/ictrp/extractors.py +++ b/processors/ictrp/extractors.py @@ -5,8 +5,12 @@ from __future__ import unicode_literals import re +import logging +from datetime import datetime from .. import base +logger = logging.getLogger(__name__) + # Module API @@ -110,6 +114,23 @@ def extract_trial(record): # Get has_published_results has_published_results = None + # Registration date + registration_date = None + date_of_registration = record.get('date_of_registration') + if date_of_registration: + date_formats = [ + '%d/%m/%Y', + '%Y-%m-%d', + ] + for fmt in date_formats: + try: + registration_date = datetime.strptime(date_of_registration, fmt).date() + break + except ValueError: + pass + if not registration_date: + logger.warn("Failed to parse date '%s'" % date_of_registration) + trial = { 'identifiers': identifiers, 'public_title': public_title, @@ -125,6 +146,7 @@ def extract_trial(record): 'secondary_outcomes': record['secondary_outcomes'], 'gender': gender, 'has_published_results': has_published_results, + 'registration_date': registration_date, } return trial diff --git a/tests/processors/ictrp/test_extractors.py b/tests/processors/ictrp/test_extractors.py new file mode 100644 index 0000000..46c7f24 --- /dev/null +++ b/tests/processors/ictrp/test_extractors.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import copy +import datetime +import pytest +import processors.ictrp.extractors as extractors + + +class TestICTRPExtractors(object): + STUB_RECORD = { + 'register': 'ClinicalTrials.gov', + 'main_id': 'NCT0000000', + 'public_title': 'Public title', + 'scientific_title': 'Scientific title', + 'target_sample_size': 100, + 'study_type': 'study_type', + 'study_design': 'study design', + 'study_phase': 'study phase', + 'primary_outcomes': 'primary outcomes', + 'secondary_outcomes': 'secondary outcomes', + 'key_inclusion_exclusion_criteria': 'key inclusion exclusion criteria', + } + + @pytest.mark.parametrize('date_str,expected_date', [ + ('2012-12-31', datetime.date(2012, 12, 31)), + ('31/12/2012', datetime.date(2012, 12, 31)), + ('2012-05-01', datetime.date(2012, 5, 1)), + ('01/05/2012', datetime.date(2012, 5, 1)), + ('invalid', None), + ('', None), + ]) + def test_extract_trial_handles_dates(self, date_str, expected_date): + record = copy.deepcopy(self.STUB_RECORD) + record['date_of_registration'] = date_str + + trial = extractors.extract_trial(record) + + assert trial.get('registration_date') == expected_date