[#280] Parse registration_date from ICTRP (#51)

Fixes opentrials/opentrials#280
opentrials · Sep 2, 2016 · 868ccbc · 868ccbc
1 parent 6b15ccf
commit 868ccbc
Show file tree

Hide file tree

Showing 2 changed files with 64 additions and 0 deletions.
diff --git a/processors/ictrp/extractors.py b/processors/ictrp/extractors.py
@@ -5,8 +5,12 @@
 from __future__ import unicode_literals
 
 import re
+import logging
+from datetime import datetime
 from .. import base
 
+logger = logging.getLogger(__name__)
+
 
 # Module API
 
@@ -110,6 +114,23 @@ def extract_trial(record):
     # Get has_published_results
     has_published_results = None
 
+    # Registration date
+    registration_date = None
+    date_of_registration = record.get('date_of_registration')
+    if date_of_registration:
+        date_formats = [
+            '%d/%m/%Y',
+            '%Y-%m-%d',
+        ]
+        for fmt in date_formats:
+            try:
+                registration_date = datetime.strptime(date_of_registration, fmt).date()
+                break
+            except ValueError:
+                pass
+        if not registration_date:
+            logger.warn("Failed to parse date '%s'" % date_of_registration)
+
     trial = {
         'identifiers': identifiers,
         'public_title': public_title,
@@ -125,6 +146,7 @@ def extract_trial(record):
         'secondary_outcomes': record['secondary_outcomes'],
         'gender': gender,
         'has_published_results': has_published_results,
+        'registration_date': registration_date,
     }
     return trial
 

diff --git a/tests/processors/ictrp/test_extractors.py b/tests/processors/ictrp/test_extractors.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+import datetime
+import pytest
+import processors.ictrp.extractors as extractors
+
+
+class TestICTRPExtractors(object):
+    STUB_RECORD = {
+        'register': 'ClinicalTrials.gov',
+        'main_id': 'NCT0000000',
+        'public_title': 'Public title',
+        'scientific_title': 'Scientific title',
+        'target_sample_size': 100,
+        'study_type': 'study_type',
+        'study_design': 'study design',
+        'study_phase': 'study phase',
+        'primary_outcomes': 'primary outcomes',
+        'secondary_outcomes': 'secondary outcomes',
+        'key_inclusion_exclusion_criteria': 'key inclusion exclusion criteria',
+    }
+
+    @pytest.mark.parametrize('date_str,expected_date', [
+        ('2012-12-31', datetime.date(2012, 12, 31)),
+        ('31/12/2012', datetime.date(2012, 12, 31)),
+        ('2012-05-01', datetime.date(2012, 5, 1)),
+        ('01/05/2012', datetime.date(2012, 5, 1)),
+        ('invalid', None),
+        ('', None),
+    ])
+    def test_extract_trial_handles_dates(self, date_str, expected_date):
+        record = copy.deepcopy(self.STUB_RECORD)
+        record['date_of_registration'] = date_str
+
+        trial = extractors.extract_trial(record)
+
+        assert trial.get('registration_date') == expected_date