In [None]:
from copy import deepcopy
import json

import pandas as pd

DATA_DIR = 'data'

In [None]:
# Define template payloads
CS_TEMPLATE = {
    'resourceType': 'CodeSystem',
    'status': 'draft',
    'experimental': False,
    'hierarchyMeaning': 'is-a',
    'compositional': False,
    'content': 'fragment',
    'concept': []
}

# 1. PCGC

## 1.1 Phenotype

### 1.1.1 HP

In [None]:
# Copy template
cs_hp = deepcopy(CS_TEMPLATE)

# Set metadata
cs_hp['id'] = 'hp'
cs_hp['url'] = 'http://purl.obolibrary.org/obo/hp.owl'
cs_hp['name'] = 'http://purl.obolibrary.org/obo/hp.owl'
cs_hp['title'] = 'Human Phenotype Ontology'

In [None]:
# Read in phenotype codes
file_path = f'{DATA_DIR}/pcgc_ph_codes.tsv'
ph_codes = pd.read_csv(file_path, sep='\t')

In [None]:
# Populate concept
for i, row in ph_codes.iterrows():
    if row.hpo_id_phenotype == 'No Match':
        continue
    cs_hp['concept'].append({
        'code': row.hpo_id_phenotype,
        'display': row.source_text_phenotype
    })
    
cs_hp['count'] = len(cs_hp['concept'])

In [None]:
# Output to JSON
with open('CodeSystem-hp.json', 'w') as f:
    json.dump(cs_hp, f, indent=2)

## 1.2 Diagnosis

In [None]:
# Read in phenotype codes
file_path = f'{DATA_DIR}/pcgc_dg_codes.tsv'
dg_codes = pd.read_csv(file_path, sep='\t')

### 1.2.1 MONDO

In [None]:
# Copy template
cs_mondo = deepcopy(CS_TEMPLATE)

# Set metadata
cs_mondo['id'] = 'mondo'
cs_mondo['url'] = 'http://purl.obolibrary.org/obo/mondo.owl'
cs_mondo['name'] = 'http://purl.obolibrary.org/obo/mondo.owl'
cs_mondo['title'] = 'Mondo Disease Ontology'

In [None]:
# Populate concept
for i, row in dg_codes[[
        'source_text_diagnosis', 
        'mondo_id_diagnosis'
    ]].iterrows():
    if row.mondo_id_diagnosis == 'No Match':
        continue
    cs_mondo['concept'].append({
        'code': row.mondo_id_diagnosis,
        'display': row.source_text_diagnosis
    })
    
cs_mondo['count'] = len(cs_mondo['concept'])

In [None]:
# Output to JSON
with open('CodeSystem-mondo.json', 'w') as f:
    json.dump(cs_mondo, f, indent=2)

### 1.2.2 NCIt

In [None]:
# Copy template
cs_ncit = deepcopy(CS_TEMPLATE)

# Set metadata
cs_ncit['id'] = 'ncit'
cs_ncit['url'] = 'http://purl.obolibrary.org/obo/ncit.owl'
cs_ncit['name'] = 'http://purl.obolibrary.org/obo/ncit.owl'
cs_ncit['title'] = 'NCI Thesaurus'

In [None]:
# Populate concept
for i, row in dg_codes[[
        'source_text_diagnosis', 
        'ncit_id_diagnosis'
    ]].iterrows():
    if row.ncit_id_diagnosis == 'No Match':
        continue
    cs_ncit['concept'].append({
        'code': row.ncit_id_diagnosis,
        'display': row.source_text_diagnosis
    })
    
cs_ncit['count'] = len(cs_ncit['concept'])

In [None]:
# Output to JSON
with open('CodeSystem-ncit.json', 'w') as f:
    json.dump(cs_ncit, f, indent=2)

## 1.3 Vital Status

### 1.3.1 SNOMED CT

In [None]:
# Copy template
cs_sct = deepcopy(CS_TEMPLATE)

# Set metadata
cs_sct['id'] = 'sct'
cs_sct['url'] = 'http://snomed.info/sct'
cs_sct['name'] = 'http://snomed.info/sct'
cs_sct['title'] = 'SNOMED CT'

In [None]:
cs_sct['concept'] = cs_sct['concept'] + [
    {
        'code': '438949009',
        'display': 'Alive'
    },
    {
        'code': '419099009',
        'display': 'Dead'
    }
]
cs_sct['count'] = len(cs_sct['concept'])

In [None]:
# Output to JSON
with open('CodeSystem-sct.json', 'w') as f:
    json.dump(cs_sct, f, indent=2)

# 2. Synthea

## 2.1 SNOMED CT

In [None]:
with open(f'{DATA_DIR}/sct.json') as f:
    concept_sct = json.load(f)
    cs_sct['concept'] += concept_sct
cs_sct['count'] = len(cs_sct['concept'])

In [None]:
# Output to JSON
with open('CodeSystem-sct.json', 'w') as f:
    json.dump(cs_sct, f, indent=2)

## 2.2 LOINC

In [None]:
# Copy template
cs_loinc = deepcopy(CS_TEMPLATE)

# Set metadata
cs_loinc['id'] = 'loinc'
cs_loinc['url'] = 'http://loinc.org'
cs_loinc['name'] = 'http://loinc.org'
cs_loinc['title'] = 'LOINC'

In [None]:
with open(f'{DATA_DIR}/loinc.json') as f:
    concept_loinc = json.load(f)
    cs_loinc['concept'] += concept_loinc
cs_loinc['count'] = len(cs_loinc['concept'])

In [None]:
# Output to JSON
with open('CodeSystem-loinc.json', 'w') as f:
    json.dump(cs_loinc, f, indent=2)