In [7]:
## Public try uploads directly the translated_cleaned.json file
import sys
import json


try:
    sys.path.insert(0,'/groups/dso/meijie/melanoma_data/Patient_history/patient-history/Publication/extraction_funs_pub')
    sys.path.insert(0,'/groups/dso/meijie/melanoma_data/fhir_data/Top2000Melanomexport-2022-04-30T10:49:34')
    sys.path.insert(0,'/groups/dso/meijie/melanoma_data/fhir_data_pub')
except ImportError:
    sys.path.insert(0,'extraction_funs_pub')

from medication_extractor import *
from tnm_extractors import *
from extraction_baseclass import extractor, tryPath
from group_and_merge import groupResources, combineResources
from procedure_extractors import *
from condition_extractor import *
from careplan_extractor import *
from os import listdir
from itertools import chain

ptnm_extractor = getExtractorPTNM()
ctnm_extractor = getExtractorCTNM()
medi_extractor = getMedicationAdministration()
radiotherapy_ext = getRadioTherapy()
surgeries_ext = getOperation()
examinations_ext = getExaminations()
progress_extractor = getExtractorProgress()
tproperties_extractor = getExtractorPropertiesPrimary()

json_path_to_cleaned_patient = '/groups/dso/meijie/melanoma_data/fhir_data/translated_cleaned.json'
with open(json_path_to_cleaned_patient) as f:
    data = json.load(f)

resources = json.load(open(json_path_to_cleaned_patient))['entry']
resources

[{'resource': {'resourceType': 'condition',
   'id': 'bd9cd822-0b58-4f6f-9db4-de257b41426d',
   'identifier': [{'system': 'https://hospital.org/TumorDocumentation/PrimaryTumorDiagnosis'}],
   'verificationStatus': {'extension': [{'url': 'https://hospital.org/TumorDocumentation/PrimaryTumorDiagnosis/ConfirmationOfDiagnosis',
      'valueCoding': {'system': 'https://hospital.org/TumorDocumentation/PrimaryTumorDiagnosis/ConfirmationOfDiagnosis',
       'code': 'H',
       'display': 'Histology of the primary tumor'}}],
    'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/condition-ver-status',
      'code': 'confirmed',
      'display': 'Confirmed'}]},
   'code': {'coding': [{'system': 'http://fhir.de/CodeSystem/dimdi/icd-10-gm',
      'version': 'ICD10_2014',
      'code': 'C43.7',
      'display': 'Malignant melanoma of the lower limb, including the hip'}]},
   'bodySite': [{'coding': [{'system': 'http://snomed.info/sct',
       'code': '24028007',
       'display': 'Right'}

In [8]:
## Date perturbation
from perturbDates import dateChanger
dc = dateChanger(dateFields=[
    ['resource','effectiveDateTime'],
    ['resource','onsetDateTime'],
    ['resource','performedDateTime'],
    ['resource','performedPeriod','start'],
    ['resource','performedPeriod','end'],
    ['resources','effectivePeriod','start'],
    ['resource','effectivePeriod','end']
    
    
    ]) # create an instance of dateChanger
dc.addRandomNoise(resources)
resources

In [None]:
# adds a field to a dictionary and returns the dictionary
def addField(x, addfun, fieldName):
    x[fieldName] = addfun(x)
    return x

In [None]:
## STAGES
stages = [addField(x, lambda x: "p","p_or_c") for x in ptnm_extractor.extract(resources)]
stages += [addField(x, lambda x: "c","p_or_c") for x in ctnm_extractor.extract(resources)]
stages.sort(key = lambda x: x['dt_record'])

stages = groupResources(stages, "dt_record")

# we only want to keep one resource per date
def keepHighestPriority(resources, field, priorities):
    highestPrio = len(priorities) + 10
    for resource in resources:
        currentPrio = priorities.index(resource[field])
        if currentPrio < highestPrio:
             highestPrio = currentPrio
    highestPrio = priorities[highestPrio]
    return [x for x in resources if x[field] == highestPrio]

stages = [keepHighestPriority(x, 'p_or_c', ['p','c']) for x in stages]
stages = [keepHighestPriority(x, 'cat_version', ['AJCC2017','AJCC2009',None]) for x in stages]

# after checking the resources, we can safely combine them
stages = [combineResources(x) for x in stages]

# Some functions to make the output beautiful

# -> put in a resource with stage information -> this function outputs a string easy to read for doctors
# -> will be used later
def printStage(x):
    t, n, m = x.get('tstage',' k.A. '), x.get('nstage',' k.A. '), x.get('mstage',' k.A. ')
    tnm = x.get('tnm_stage','')
    ver = x.get('cat_version')
    if ver is None:
        ver = "k.A."
    porc = x.get('p_or_c')
    r = x.get('residual_state','')
    senPos = x.get('snodes_postive',"")
    senTest = x.get('snodes_examined',"?")
    renPos = x.get('rnodes_positive',"")
    renTest = x.get('rnodes_examined',"?")
    
    res = f"Version: {ver}, {porc}{tnm} T{t}N{n}M{m}{r}"
    if senPos != "":
        res += f", Sentinel: {senPos}+/{senTest}"
    if renPos != "":
        res += f", Regional Lymphnodes: {renPos}+/{renTest}"
    return res


# add nice field to print and the next date as stop date
stages = [addField(x, printStage, "val_print") for x in stages]
for i in range(len(stages)-1):
    stages[i]['dt_end'] = stages[i+1]['dt_record']
stages[-1]['dt_end'] = "2019-06-22" #  2019-06-21 is the date he deceased

# Add code to include code information on the stages
json_pat = open(json_path_to_cleaned_patient)
json_pat = json.load(json_pat)
primary_stage = getPrimaryTumor(json_pat)
cancer_code = primary_stage[0]['ICD10_2019_cancer_code']

def add_to_beginning(my_list, entry):
    my_list.insert(0, entry)
    return my_list

stages = add_to_beginning(stages, {'ICD10_2019_cancer_code':cancer_code})

stages

[{'ICD10_2019_cancer_code': 'C43.7'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2014-04-14',
  'cat_version': None,
  'tnm_stage': 'III',
  'tstage': '2 B',
  'nstage': '0',
  'p_or_c': 'p',
  'val_print': 'Version: k.A., pIII T2 BN0M k.A. ',
  'dt_end': '2014-05-03'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2014-05-03',
  'tnm_stage': 'IIIB',
  'cat_version': 'AJCC2017',
  'tstage': '2 B',
  'nstage': '1a',
  'mstage': '0',
  'p_or_c': 'c',
  'val_print': 'Version: AJCC2017, cIIIB T2 BN1aM0',
  'dt_end': '2014-05-23'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2014-05-23',
  'cat_version': None,
  'tnm_stage': 'IIIB',
  'tstage': '2 B',
  'nstage': '1a',
  'snodes_postive': 1,
  'snodes_examined': 1,
  'p_or_c': 'p',
  'val_print': 'Version: k.A., pIIIB T2 BN1aM k.A. , Sentinel: 1+/1',
  'dt_end': '2015-02-20'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': 

In [None]:
examinations = examinations_ext.extract(resources)
examinations = groupResources(examinations, "dt_record")
examinations = [combineResources(x) for x in examinations]
examinations

[{'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2014-06-23',
  'cat_examination_type': ['Laboratory procedure', 'physical exam'],
  'cat_reasons': ['Initial presentation', 'Treatment planning']},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2014-09-08',
  'cat_examination_type': ['Laboratory procedure',
   'Ultrasonography',
   'physical exam'],
  'cat_reasons': 'Follow-up'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2014-12-28',
  'cat_examination_type': ['Laboratory procedure',
   'Ultrasonography',
   'computed tomography',
   'physical exam'],
  'cat_reasons': ['Diagnostic or staging procedure', 'Follow-up']},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2015-02-14',
  'cat_examination_type': ['Laboratory procedure',
   'Ultrasonography',
   'computed tomography',
   'physical exam'],
  'cat_reasons': ['Diagnostic or staging procedure', 'Follow-up']},
 {'patid': '

In [None]:
radio_therapy = radiotherapy_ext.extract(resources)
radio_therapy = [combineResources(x) for x in groupResources(radio_therapy, "dt_start")] # deletes one duplicate
radio_therapy

[{'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_start': '2016-10-17',
  'dt_end': '2016-11-08',
  'cat_intention': ['palliative', 'adjuvant'],
  'cat_status': 'completed',
  'cat_reason_end': 'regular ending'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_start': '2017-07-10',
  'dt_end': '2017-07-18',
  'cat_intention': 'palliative',
  'cat_status': 'completed',
  'cat_reason_end': 'regular ending'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_start': '2019-07-31',
  'dt_end': '2019-07-31',
  'cat_intention': 'palliative',
  'cat_status': 'completed',
  'cat_reason_end': 'Miscellaneous'}]

In [None]:
progresses = progress_extractor.extract(resources)
progresses = [combineResources(x) for x in groupResources(progresses, "dt_record")]
progresses

[{'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2014-09-08',
  'cat_progress_nodes': 'No lymph node involvement detectable'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2014-12-28',
  'cat_progress_overall': 'Not applicable, as treatment is part of a multimodal concept and this is not yet completed',
  'cat_progress_metastases': 'questionable finding',
  'cat_progress_nodes': 'questionable finding'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2015-02-14',
  'cat_progress_nodes': 'questionable finding'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2015-03-14',
  'cat_progress_overall': 'Not applicable, as treatment is part of a multimodal concept and this is not yet completed',
  'cat_progress_metastases': 'No distant metastases detectable'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2015-06-24',
  'cat_progress_nodes': 'No lymph node

In [None]:
medis = medi_extractor.extract(resources)
medis.sort(key=lambda x: x['dt_start'])
medis



[{'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'cat_drugtype': 'Unknown',
  'dt_start': '2016-07-13T19:41:57+00:00',
  'dt_end': '2017-03-07',
  'cat_intention': 'palliative',
  'cat_status': 'stopped',
  'cat_reason_end': 'Discontinuation due to side effects',
  'num_quantity': 5},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'cat_drugtype': 'Unknown',
  'dt_start': '2017-06-06T19:41:57+00:00',
  'dt_end': '2019-10-27',
  'cat_intention': 'palliative',
  'cat_status': 'stopped',
  'cat_reason_end': 'Miscellaneous'}]

In [None]:
# unknown is stupid for demonstration -> change it to Nivo+Ipi and Pembro
medis[0]['cat_drugtype'] = "Nivo+Ipi"
medis[1]['cat_drugtype'] = "Pembrolizumab"
medis

[{'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'cat_drugtype': 'Nivo+Ipi',
  'dt_start': '2016-07-13T19:41:57+00:00',
  'dt_end': '2017-03-07',
  'cat_intention': 'palliative',
  'cat_status': 'stopped',
  'cat_reason_end': 'Discontinuation due to side effects',
  'num_quantity': 5},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'cat_drugtype': 'Pembrolizumab',
  'dt_start': '2017-06-06T19:41:57+00:00',
  'dt_end': '2019-10-27',
  'cat_intention': 'palliative',
  'cat_status': 'stopped',
  'cat_reason_end': 'Miscellaneous'}]

In [21]:
properties_prim = tproperties_extractor.extract(resources)
properties_prim = [combineResources(x) for x in groupResources(properties_prim, "dt_record")]
properties_prim

[{'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2014-04-14',
  'flg_ulcerated': True,
  'flg_regression': False,
  'no_tumor_thickness': 1.9},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2015-02-20',
  'flg_transcapsular': True},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2016-10-17',
  'flg_transcapsular': True}]

In [22]:
oncogenes = getExtractorOncogenes().extract(resources)
oncogenes = [x for x in oncogenes if x['cat_gene'] in ['TERT result','BRAF result',
                                                       'NRAS result','NF1 result']] # should be the most relevant

# make outcome nice
def printGene(x):
    res = x['cat_gene'][:-8]
    if x['flg_mutated']:
        res += 'mutation'
        freq = x.get('num_frequency',-1)
        if freq > 0:
            res += " (Frequency " + str(freq) + ")"
    else:
        res += 'wildtype'
    return res
        
oncogenes = [addField(x, printGene, 'val_print') for x in oncogenes]
for onco in oncogenes:
    onco.pop('cat_gene')
    onco.pop('flg_mutated')
    onco.pop('num_frequency', None)
oncogenes = [combineResources(x) for x in groupResources(oncogenes, 'dt_record')]
oncogenes

[{'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2013-05-05',
  'val_print': ['BRAwildtype', 'NRAwildtype']},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'dt_record': '2016-05-02',
  'val_print': ['NFwildtype', 'BRAwildtype', 'TERwildtype', 'NRAwildtype']}]

In [23]:
patientinfo = [x for x in resources if x['resource']['resourceType'] == 'patient']
patientinfo = patientinfo[0]['resource']
patientinfo.keys()
patientinfo.pop('meta', None)
patientinfo.pop('managingOrganization', None)
patientinfo.pop('identifier', None)
patientinfo

{'resourceType': 'patient',
 'id': '02b4f296-3cfc-4e2b-a35c-70c83712160b',
 'gender': 'female',
 'birthDate': '1961-05-30',
 'deceasedDateTime': '2019-06-21T19:41:57.025+02:00'}

In [24]:
# Make the Careplan entry
careplan_simp = getCareplan(json_pat)
careplan_simp 

[{'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'time_of_careplan': '2019-02-06',
  'careplan_name': 'radiotherapy',
  'careplan_intention': 'palliative',
  'careplan_type': 'recurrence therapy'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'time_of_careplan': '2019-02-06',
  'careplan_name': 'Targeted Substances',
  'careplan_intention': 'palliative',
  'careplan_type': 'recurrence therapy'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'time_of_careplan': '2018-02-21',
  'careplan_name': 'Targeted Substances',
  'careplan_intention': 'palliative',
  'careplan_type': 'recurrence therapy'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'time_of_careplan': '2017-11-08',
  'careplan_name': 'Targeted Substances',
  'careplan_intention': 'palliative',
  'careplan_type': 'recurrence therapy'},
 {'patid': 'Patient/02b4f296-3cfc-4e2b-a35c-70c83712160b',
  'time_of_careplan': '2017-08-30',
  'careplan_name': 'Targeted Substances',
  

In [25]:
json4demonstration = {
    'patient_info': patientinfo,
    'stages': stages,
    'examinations': examinations,
    'radiotherapy': radio_therapy,
    'progresses': progresses,
    'medication': medis,
    'properties_primary': properties_prim,
    'oncogenes': oncogenes,
    'careplans': careplan_simp
    
}

In [26]:
with open('example_patient_pub.json', 'w', encoding='utf-8') as f:
    json.dump(json4demonstration, f, ensure_ascii=False, indent=4)

### We should double check again but now in the example_patient json file all the green-highlighted data are presented. That means we have the pipeline for fhir data --> simplified json
