Skip to content
This repository has been archived by the owner on Jan 29, 2022. It is now read-only.

[#280] Parse registration_date from ICTRP #51

Merged
merged 1 commit into from Sep 2, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 22 additions & 0 deletions processors/ictrp/extractors.py
Expand Up @@ -5,8 +5,12 @@
from __future__ import unicode_literals

import re
import logging
from datetime import datetime
from .. import base

logger = logging.getLogger(__name__)


# Module API

Expand Down Expand Up @@ -110,6 +114,23 @@ def extract_trial(record):
# Get has_published_results
has_published_results = None

# Registration date
registration_date = None
date_of_registration = record.get('date_of_registration')
if date_of_registration:
date_formats = [
'%d/%m/%Y',
'%Y-%m-%d',
]
for fmt in date_formats:
try:
registration_date = datetime.strptime(date_of_registration, fmt).date()
break
except ValueError:
pass
if not registration_date:
logger.warn("Failed to parse date '%s'" % date_of_registration)

trial = {
'identifiers': identifiers,
'public_title': public_title,
Expand All @@ -125,6 +146,7 @@ def extract_trial(record):
'secondary_outcomes': record['secondary_outcomes'],
'gender': gender,
'has_published_results': has_published_results,
'registration_date': registration_date,
}
return trial

Expand Down
42 changes: 42 additions & 0 deletions tests/processors/ictrp/test_extractors.py
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import datetime
import pytest
import processors.ictrp.extractors as extractors


class TestICTRPExtractors(object):
STUB_RECORD = {
'register': 'ClinicalTrials.gov',
'main_id': 'NCT0000000',
'public_title': 'Public title',
'scientific_title': 'Scientific title',
'target_sample_size': 100,
'study_type': 'study_type',
'study_design': 'study design',
'study_phase': 'study phase',
'primary_outcomes': 'primary outcomes',
'secondary_outcomes': 'secondary outcomes',
'key_inclusion_exclusion_criteria': 'key inclusion exclusion criteria',
}

@pytest.mark.parametrize('date_str,expected_date', [
('2012-12-31', datetime.date(2012, 12, 31)),
('31/12/2012', datetime.date(2012, 12, 31)),
('2012-05-01', datetime.date(2012, 5, 1)),
('01/05/2012', datetime.date(2012, 5, 1)),
('invalid', None),
('', None),
])
def test_extract_trial_handles_dates(self, date_str, expected_date):
record = copy.deepcopy(self.STUB_RECORD)
record['date_of_registration'] = date_str

trial = extractors.extract_trial(record)

assert trial.get('registration_date') == expected_date