In [37]:
import os
import glob
import json
import re
import numpy as np
import pandas as pd

os.chdir(os.path.expanduser("~/data/omop"))

In [2]:
# Load tables into dataframes
tables = {re.findall(r"(.*?)_OMOP.*?", f)[0]: 
          pd.read_table(f, index_col=0, dtype={"MRN": str}).round(5)
          for f in glob.glob("*.tsv")}

In [3]:
# List all the tables we found
tables.keys()

dict_keys(['condition_occurrence', 'drug_exposure', 'procedure_occurrence', 'death', 'person'])

In [4]:
# Organize by patient mrn
patients = {mrn: {name: table.loc[mrn].to_dict() if name == "person" else table.loc[mrn].to_dict("records")
                  for name, table in tables.items() if mrn in table.index}
            for mrn in tables["person"].index[0:1]}

In [None]:
# Manually add CGT ID for one patient to generate an example omop.json, use a table later
patients["D3689FA5A3F1D2"][""]

In [91]:
# Dump out a single patient
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.int64):
            return int(obj)
        else:
            return json.JSONEncoder.default(self, obj)
    
omop = json.dumps(patients["D3689FA5A3F1D2"], cls=NumpyEncoder, indent=4, sort_keys=True)
print(omop)

{
    "condition_occurrence": [
        {
            "condition_concept_id": 35206165,
            "condition_start_in_days": 20672,
            "condition_type_concept_id": 646370677
        },
        {
            "condition_concept_id": 35206165,
            "condition_start_in_days": 20674,
            "condition_type_concept_id": 646370678
        },
        {
            "condition_concept_id": 35206165,
            "condition_start_in_days": 20623,
            "condition_type_concept_id": 38000245
        },
        {
            "condition_concept_id": 35206165,
            "condition_start_in_days": 20623,
            "condition_type_concept_id": 646370874
        },
        {
            "condition_concept_id": 44827328,
            "condition_start_in_days": 20623,
            "condition_type_concept_id": 646370676
        },
        {
            "condition_concept_id": 44828730,
            "condition_start_in_days": 20623,
            "condition_type_concept_id": 380002

In [117]:
# Save as sample omop.json
with open("omop.json", "w") as f:
    f.write(json.dumps(patients["D3689FA5A3F1D2"], cls=NumpyEncoder, sort_keys=True))

In [103]:
# Verify round trip
A = json.dumps(patients["D3689FA5A3F1D2"], cls=NumpyEncoder, sort_keys=True)
B = json.dumps(json.loads(A), cls=NumpyEncoder, sort_keys=True)
assert A == B

## Validate Javascript Reading

Open the debug console in your browser to view the output of the following and verify that you can just eval the generated JSON back into a javascript dictionary. You should see a javascript dictionary in the console that you can expand that has the same values as the python dictionary and source json string.

In [111]:
# Pass the omop string into the browser via window variable
from IPython.display import Javascript
Javascript("""window.omop={};""".format(json.dumps(omop)))

<IPython.core.display.Javascript object>

In [115]:
%%javascript
console.log(eval("(" + window.omop + ")"))

<IPython.core.display.Javascript object>