# Inquiry into Kids First to FHIR model mapping

## Build a ship of pyrates

In [1]:
from fhir_pyrate import Pirate, Ahoy
import pandas as pd

pd.set_option('display.max_columns',None)
pd.set_option('display.expand_frame_repr',False)

PROD_URL = 'https://kf-api-fhir-service.kidsfirstdrc.org/'
DEV_URL = 'https://kf-api-fhir-service-dev.kidsfirstdrc.org/'
QA_URL = 'https://kf-api-fhir-service-qa.kidsfirstdrc.org'

ENDPOINT_URLS = {
    'PROD_URL' : 'https://kf-api-fhir-service.kidsfirstdrc.org/',
    'QA_URL' : 'https://kf-api-fhir-service-qa.kidsfirstdrc.org',
    'DEV_URL' : 'https://kf-api-fhir-service-dev.kidsfirstdrc.org/'
}

def get_pirate(url):
    auth = Ahoy(
        username="wnkhan32@gmail.com",
        auth_method=None,
        auth_url=url
    )

    pirate = Pirate(
        auth=auth, 
        base_url=url, 
        print_request_url=False, 
        num_processes=1
    )   

    return pirate

ship = {
    url_label : get_pirate(url) 
    for url_label, url in ENDPOINT_URLS.items()
}

  from .autonotebook import tqdm as notebook_tqdm


## Check Pyrate Booty

In [2]:
import requests

def get_studies(pirate: Pirate):
    studies = []
    try:
        studies_df = pirate.steal_bundles_to_dataframe(
            resource_type='ResearchStudy',
            fhir_paths=[
                ('study_ids','meta.tag[0].code'),
            ]
        )
        if isinstance(studies_df,pd.DataFrame) and 'study_ids' in studies_df.columns:
            studies = studies_df['study_ids'].to_list()
    except (requests.exceptions.HTTPError) as e:
        print(e.response)

    return studies

endpoint_studies = {
    label : get_studies(pirate)
    for label, pirate in ship.items()
}

endpoint_studies

Query & Build DF (ResearchStudy): 100%|██████████| 1/1 [00:00<00:00, 448.59it/s]
Query & Build DF (ResearchStudy): 100%|██████████| 1/1 [00:00<00:00, 450.66it/s]

Failed to reach: https://kf-api-fhir-service-dev.kidsfirstdrc.org/ResearchStudy?





{'PROD_URL': ['SD_HGHFVPFD',
  'SD_1P41Z782',
  'SD_9PYZAHHE',
  'SD_7NQ9151J',
  'SD_B8X3C1MX',
  'SD_DK0KRWK8',
  'SD_NMVV8A1Y',
  'SD_YNSSAPHE',
  'SD_W0V965XZ',
  'SD_R0EPRSGS',
  'SD_RM8AFW0R',
  'SD_DZ4GPQX6',
  'SD_ZFGDG5YS',
  'SD_0TYVY1TW',
  'SD_VTTSHWV4',
  'SD_DZTB5HRR',
  'SD_YGVA0E1C',
  'SD_6FPYJQBR',
  'SD_8Y99QZJJ',
  'SD_P445ACHV',
  'SD_46RR9ZR6',
  'SD_PET7Q6F2',
  'SD_46SK55A3',
  'SD_ZXJFFMEF',
  'SD_PREASA7S',
  'SD_BHJXBDQK',
  'SD_Z6MWD3H0',
  'SD_JWS3V24D',
  'SD_FFVQ3T38',
  'SD_DYPMEHHF',
  'SD_T8VSYRSG',
  'SD_FYCR78W0',
  'SD_Y6VRG6MD',
  'SD_65064P2Z'],
 'QA_URL': ['SD_Z6MWD3H0',
  'SD_BHJXBDQK',
  'SD_DYPMEHHF',
  'SD_Y6VRG6MD',
  'SD_65064P2Z',
  'SD_T8VSYRSG',
  'SD_FYCR78W0',
  'SD_PREASA7S',
  'SD_FFVQ3T38',
  'SD_JWS3V24D',
  'SD_ZXJFFMEF',
  'SD_46SK55A3',
  'SD_PET7Q6F2',
  'SD_46RR9ZR6',
  'SD_P445ACHV',
  'SD_8Y99QZJJ',
  'SD_6FPYJQBR',
  'SD_YGVA0E1C',
  'SD_DZTB5HRR',
  'SD_VTTSHWV4',
  'SD_0TYVY1TW',
  'SD_ZFGDG5YS',
  'SD_DZ4GPQX6',
  'SD_RM

## Check Stuides in Pyrate Booty

In [16]:
metadata = ship['PROD_URL'].steal_bundles_to_dataframe(
    resource_type='ResearchStudy',
    fhir_paths=[
        ('kf_id','meta.tag[0].code'),
        ('program','keyword[0].text'),
        ('name','title'),
        ('short_name','keyword[1].coding[0].code')
    ]
)

# metadata.sort_values(by='study_type',ignore_index=True,inplace=True)
metadata.head(10)

Query & Build DF (ResearchStudy): 100%|██████████| 1/1 [00:00<00:00, 131.13it/s]


Unnamed: 0,kf_id,program,name,short_name
0,SD_HGHFVPFD,Kids First,Common Fund (CF) Genotype-Tissue Expression Pr...,KF-GTEX
1,SD_1P41Z782,ICR,OpenDIPG: ICR London,ICR-DIPG
2,SD_9PYZAHHE,Kids First,Genomic Studies of Orofacial Cleft Birth Defects,KF-OCEA
3,SD_7NQ9151J,Kids First,Genome-wide Sequencing to Identify the Genes R...,KF-ED
4,SD_B8X3C1MX,Kids First,Kids First: Craniofacial Microsomia: Genetic C...,KF-CM
5,SD_DK0KRWK8,Kids First,Whole Genome Sequencing of African and Asian O...,KF-OFCAA
6,SD_NMVV8A1Y,Kids First,Kids First: Genetics of Structural Defects of ...,KF-KUT
7,SD_YNSSAPHE,TARGET,TARGET: Neuroblastoma (NBL),TARGET-NB
8,SD_W0V965XZ,Kids First,Genomic Analysis of Familial Leukemia,KF-FALL
9,SD_R0EPRSGS,Kids First,Kids First: Genomics of Orofacial Cleft Birth ...,KF-OFCLA


## Noteworthy results
1. Creation time not identified
2. No column identified for age_at_enrollment

# KF FHIR Patient &rarr; C2M2 Subject
| C2M2 Field | FHIR Field |
| ---------- | ---------- |
| local_id   | Patient.identifier[].value |
| project_local_id | Patient.meta_tag[].code |
| sex | Patient.gender |
| ethnicity | Patient.extension_1_extension_0_valueString |
 


In [4]:
patient_df = ship['PROD_URL'].steal_bundles_to_dataframe(
    resource_type="Patient",
    fhir_paths=[
        ("kf_id","identifier[0].value"),
        ("study_id","meta.tag[0].code"),
        ("sex","gender"),
        ("ethnicity","extension[1].extension[0].valueString"),
    ]
)

patient_df.head()

Query & Build DF (Patient): 645it [01:36,  6.68it/s]


Unnamed: 0,kf_id,study_id,sex,ethnicity
0,PT_TMQWP2F8,SD_65064P2Z,female,Not Reported
1,PT_ESANSF5N,SD_65064P2Z,female,Not Hispanic or Latino
2,PT_QV8RJTJY,SD_65064P2Z,female,Not Hispanic or Latino
3,PT_FV1C37JJ,SD_65064P2Z,male,Not Hispanic or Latino
4,PT_F2YJRHS0,SD_65064P2Z,female,Not Hispanic or Latino
