In [1]:
! pip install requests
! pip install pprint
! pip install pandas
! pip install plotly



In [2]:
import requests
import icdcQueries as icdc #This is a file with various GraphQL queries
import pprint
import pandas as pd
import plotly
import plotly.express as px

In [12]:
#General query interface
def runQuery(endpoint, query, variables):
    
    tiers = { "stage" : "https://caninecommons-stage.cancer.gov/v1/graphql/",
    "qa" : "https://caninecommons-qa.cancer.gov/v1/graphql/",
    "dev" : "https://caninecommons-dev.cancer.gov/v1/graphql/",
    "prod" : "https://caninecommons.cancer.gov/v1/graphql/"}
    
    request = requests.post(tiers[endpoint], json={'query': query, 'variables': variables})
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception("Query failed code {}. {}".format(request.status_code,query))

In [13]:
#Flatten a JSON object into a table
def flattenJSON(jsondata):
    flatdata = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i +=1
        else:
            flatdata[name] = x

    flatten(jsondata)
    return flatdata

In [14]:
def getDataframe(query, endpoint, variables):
    #Provide a GraphQL query and get a pandas dataframe back.  Assumes a query based on case
    
    #Run the GraphQL
    jsondata = runQuery(endpoint, query, variables)
    
    #Flatten the JSON and push it into a dataframe
    finaldata = pd.DataFrame(flattenJSON(case) for case in jsondata['data']['case'])
    
    return finaldata

In [15]:
def scatterPlot(dataframe, xaxis, yaxis):
    figure = px.scatter(dataframe, x=xaxis, y=yaxis)
    return figure

In [27]:
def main(args,xaxis,yaxis):
    #Query against the production system
    endpoint = 'prod'
    #We're not providing any variables
    variables = None
    #Load the queries file
    icdc.init()
    
    #Get a dataframe based on the example query
    dataframe = getDataframe(icdc.demo_query, endpoint, variables)
    
    #Print it out for a quick look
    if args == 'print':
        pprint.pprint(dataframe)
    
    #Graph it
    if args =='graph':
        #xaxis ='demographic_breed_'
        #yaxis = 'diagnoses_0_primary_disease_site_'
        figure = scatterPlot(dataframe, xaxis, yaxis)
        figure.show()
    

In [28]:
main('print', None, None)

                 case_id_ cohort_cohort_description_ cohort_cohort_dose_  \
0            COTC007B0201     NSC 725776; 3mg/m2/day          3mg/m2/day   
1            COTC007B0501     NSC 725776; 3mg/m2/day          3mg/m2/day   
2            COTC007B0901     NSC 743400; 8mg/m2/day          8mg/m2/day   
3            COTC007B0502     NSC 725776; 3mg/m2/day          3mg/m2/day   
4            COTC007B0503     NSC 725776; 3mg/m2/day          3mg/m2/day   
..                    ...                        ...                 ...   
139  NCATS-COP01CCB070020        Pulmonary Neoplasms                       
140  NCATS-COP01CCB070034        Pulmonary Neoplasms                       
141  NCATS-COP01CCB070102                   Melanoma                       
142  NCATS-COP01CCB080012                   Melanoma                       
143  NCATS-COP01CCB080018                   Melanoma                       

    diagnoses_0_stage_of_disease_ diagnoses_0_concurrent_disease_  \
0                 

In [30]:
main('graph', "diagnoses_0_primary_disease_site_", "demographic_sex_" )