<a href="https://colab.research.google.com/github/parmitamishra/CancerHawkColabFiles/blob/main/Exploratory_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


# Querying Metadata
Python can be used with the GDC API to retrieve metadata that is indexed in the GDC Database.

example: passes some basic parameters (fields, format, size) to the cases endpoint and prints the results in a tab-delimited format.

In [2]:
import requests
import json
#passes some basic parameters (fields, format, size) to the cases endpoint
#and prints the results in a tab-delimited format
#fields parameter needs to be a string comprising comma-delimited field names.


cases_endpt = 'https://api.gdc.cancer.gov/cases'

# The 'fields' parameter is passed as a comma-separated string of single names.
fields = [
    "submitter_id",
    "case_id",
    "primary_site",
    "disease_type",
    "diagnoses.vital_status"
    ]

fields = ','.join(fields)

params = {
    "fields": fields,
    "format": "TSV",
    "size": "100"
    }

response = requests.get(cases_endpt, params = params)

print(response.content)




b'case_id\tdisease_type\tid\tprimary_site\tsubmitter_id\r\n69eced5b-1e76-45c9-bc9c-2aa71a921c57\tGliomas\t69eced5b-1e76-45c9-bc9c-2aa71a921c57\tBrain\tHCM-BROD-0011-C71\r\ne3b32485-b204-43a7-93a5-601408fcdf96\tDuctal and Lobular Neoplasms\te3b32485-b204-43a7-93a5-601408fcdf96\tPancreas\tHCM-CSHL-0074-C25\r\n4829dd8c-5445-41b3-ae37-bbcc333e8c9e\tAdenomas and Adenocarcinomas\t4829dd8c-5445-41b3-ae37-bbcc333e8c9e\tRectum\tHCM-BROD-0001-C18\r\nd420e653-3fb2-432b-9e81-81232a80264d\tGliomas\td420e653-3fb2-432b-9e81-81232a80264d\tBrain\tHCM-BROD-0210-C71\r\nbfe15f44-e1dd-46ed-b429-908822d0a781\tAdenomas and Adenocarcinomas\tbfe15f44-e1dd-46ed-b429-908822d0a781\tColon\tHCM-CSHL-0056-C18\r\n8b3b1f24-419e-4043-82be-2bd41268bb0e\tAdenomas and Adenocarcinomas\t8b3b1f24-419e-4043-82be-2bd41268bb0e\tRectum\tHCM-CSHL-0062-C18\r\n7717ccee-02fc-42aa-9951-e28ecb689ed5\tAdenomas and Adenocarcinomas\t7717ccee-02fc-42aa-9951-e28ecb689ed5\tRectum\tHCM-SANG-0266-C20\r\nf6824107-f92f-47fc-930a-cbe3bfe58895\tD

a filters parameter is added to the script. This parameter is passed as a Python dictionary object. The filter used in this example will only display cases that come from a neural disease study (primary_site: Brain)

In [4]:
fields = [
    "submitter_id",
    "case_id",
    "primary_site",
    "disease_type",
    "diagnoses.vital_status"
    ]

fields = ",".join(fields)

cases_endpt = "https://api.gdc.cancer.gov/cases"

filters = {
    "op": "in",
    "content":{
        "field": "primary_site",
        "value": ["Brain"]
        }
    }

# With a GET request, the filters parameter needs to be converted
# from a dictionary to JSON-formatted string

params = {
    "filters": json.dumps(filters),
    "fields": fields,
    "format": "TSV",
    "size": "100"
    }

response = requests.get(cases_endpt, params = params)

print(response.content)

b'case_id\tdisease_type\tid\tprimary_site\tsubmitter_id\r\n69eced5b-1e76-45c9-bc9c-2aa71a921c57\tGliomas\t69eced5b-1e76-45c9-bc9c-2aa71a921c57\tBrain\tHCM-BROD-0011-C71\r\nd420e653-3fb2-432b-9e81-81232a80264d\tGliomas\td420e653-3fb2-432b-9e81-81232a80264d\tBrain\tHCM-BROD-0210-C71\r\n149a8565-e0c5-4474-a693-d44f1b445c0c\tGliomas\t149a8565-e0c5-4474-a693-d44f1b445c0c\tBrain\tHCM-BROD-0199-C71\r\n5c5ef9c4-5896-4553-bbbd-afe4e4b52a6a\tGliomas\t5c5ef9c4-5896-4553-bbbd-afe4e4b52a6a\tBrain\tHCM-BROD-0214-C71\r\n2c636721-d11e-463e-8f77-a23501dd3d60\tGliomas\t2c636721-d11e-463e-8f77-a23501dd3d60\tBrain\tHCM-BROD-0209-C71\r\nafa149e0-edb7-4f8e-9f56-6e1c0ac78287\tGliomas\tafa149e0-edb7-4f8e-9f56-6e1c0ac78287\tBrain\tHCM-BROD-0106-C71\r\n1aad065b-567d-4f08-9cf0-bcc57e8ef496\tGliomas\t1aad065b-567d-4f08-9cf0-bcc57e8ef496\tBrain\tHCM-BROD-0103-C71\r\ncad9953c-7293-4699-9c47-3e24bc592f9a\tGliomas\tcad9953c-7293-4699-9c47-3e24bc592f9a\tBrain\tHCM-BROD-0104-C71\r\nc811d6dd-992f-435a-80ec-b282a2e38aad\