Skip to content
This repository has been archived by the owner on Jan 29, 2022. It is now read-only.

Commit

Permalink
[#280] Parse registration_date from ICTRP (#51)
Browse files Browse the repository at this point in the history
  • Loading branch information
vitorbaptista committed Sep 2, 2016
1 parent 6b15ccf commit 868ccbc
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 0 deletions.
22 changes: 22 additions & 0 deletions processors/ictrp/extractors.py
Expand Up @@ -5,8 +5,12 @@
from __future__ import unicode_literals

import re
import logging
from datetime import datetime
from .. import base

logger = logging.getLogger(__name__)


# Module API

Expand Down Expand Up @@ -110,6 +114,23 @@ def extract_trial(record):
# Get has_published_results
has_published_results = None

# Registration date
registration_date = None
date_of_registration = record.get('date_of_registration')
if date_of_registration:
date_formats = [
'%d/%m/%Y',
'%Y-%m-%d',
]
for fmt in date_formats:
try:
registration_date = datetime.strptime(date_of_registration, fmt).date()
break
except ValueError:
pass
if not registration_date:
logger.warn("Failed to parse date '%s'" % date_of_registration)

trial = {
'identifiers': identifiers,
'public_title': public_title,
Expand All @@ -125,6 +146,7 @@ def extract_trial(record):
'secondary_outcomes': record['secondary_outcomes'],
'gender': gender,
'has_published_results': has_published_results,
'registration_date': registration_date,
}
return trial

Expand Down
42 changes: 42 additions & 0 deletions tests/processors/ictrp/test_extractors.py
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import datetime
import pytest
import processors.ictrp.extractors as extractors


class TestICTRPExtractors(object):
STUB_RECORD = {
'register': 'ClinicalTrials.gov',
'main_id': 'NCT0000000',
'public_title': 'Public title',
'scientific_title': 'Scientific title',
'target_sample_size': 100,
'study_type': 'study_type',
'study_design': 'study design',
'study_phase': 'study phase',
'primary_outcomes': 'primary outcomes',
'secondary_outcomes': 'secondary outcomes',
'key_inclusion_exclusion_criteria': 'key inclusion exclusion criteria',
}

@pytest.mark.parametrize('date_str,expected_date', [
('2012-12-31', datetime.date(2012, 12, 31)),
('31/12/2012', datetime.date(2012, 12, 31)),
('2012-05-01', datetime.date(2012, 5, 1)),
('01/05/2012', datetime.date(2012, 5, 1)),
('invalid', None),
('', None),
])
def test_extract_trial_handles_dates(self, date_str, expected_date):
record = copy.deepcopy(self.STUB_RECORD)
record['date_of_registration'] = date_str

trial = extractors.extract_trial(record)

assert trial.get('registration_date') == expected_date

0 comments on commit 868ccbc

Please sign in to comment.