In [None]:
import requests
import json

In [None]:
r = requests.get(url="https://www.cbioportal.org/api/health")
print (r.json())
print (r.status_code)

Part 1: Retrieve and display patient data. User must provide a valid patient and study Id. 
Example uses
patient: P-0000004 
study: msk_impact_2017

In [None]:
r = requests.get(url="https://www.cbioportal.org/api/studies/msk_impact_2017/patients/P-0000004/clinical-data")
x = r.json()
print (x)

Part 2: Find patients similar to a specified patient. Search within the same study, uses one criteria. 
Example uses
patient: P-0000004 
study: msk_impact_2017
attribute (not recorded across all studies): SMOKING_HISTORY

In [None]:
# get patient attribute
r = requests.get(url="https://www.cbioportal.org/api/studies/msk_impact_2017/patients/P-0000004/clinical-data?attributeId=SMOKING_HISTORY&projection=SUMMARY&pageSize=10000000&pageNumber=0&direction=ASC")
# parse for correct item (attribute)
# x in the following line returns a list with the keyval pairs, it needs to be taken out of the list to be parsed in the next line
x = r.json()
y = x[0]
pval = y['value']
print (y['clinicalAttributeId'])
print (pval)
# Get list of patients from study
print ("matching patients")
rs = requests.get(url="https://www.cbioportal.org/api/studies/msk_impact_2017/clinical-data?attributeId=SMOKING_HISTORY&clinicalDataType=PATIENT&projection=SUMMARY&pageSize=20000&pageNumber=0&direction=ASC")
clist = rs.json()
for a in clist:
    if a['value'] == y['value']:
        print (a['patientId'])

Part 3: Find patients similar to a specified patient across multiple studies. Uses one criterion, needs to check if the criteria is available in other studies. If using a criteria not availible in all studies, it may be possible to find attributes with similar names or purposes (ex SMOKING_HISTORY vs SMOKE STATUS vs SMOKING_STATUS) that may not be picked up.
https://www.cbioportal.org/study/summary?id=nsclc_tcga_broad_2016 and https://www.cbioportal.org/study/summary?id=msk_impact_2017 both have SMOKING_HISTORY, but the response types are different. This requires parsing sample data instead of patient data.
Example uses
patient:  P-0000004
studies: msk_impact_2017, metastatic_solid_tumors_mich_2017, 
attribute: CANCER_TYPE


In [51]:
# Request list of patient's samples. Take first sampleId
r = requests.get(url = "https://www.cbioportal.org/api/studies/msk_impact_2017/patients/P-0000004/samples?projection=SUMMARY&pageSize=10&pageNumber=0&direction=ASC")
pPatientData = r.json()
pSample = pPatientData[0]
pSampleId = pSample['sampleId']
print (pPatientData)
print(pSampleId)
# Get clincial data from the first sample
s = requests.get(url = "https://www.cbioportal.org/api/studies/msk_impact_2017/samples/P-0000004-T01-IM3/clinical-data?attributeId=CANCER_TYPE&projection=SUMMARY&pageSize=10000000&pageNumber=0&direction=ASC")
pAttribute = s.json()[0]
pAttributeValue = pAttribute['value']
# initialize list of studies
studyList = ["msk_impact_2017", "metastatic_solid_tumors_mich_2017"]
# loop through studies
for study in studyList:
    # check if attribute is applicable. if not, break
    t = requests.get(url=f"https://www.cbioportal.org/api/studies/{study}/clinical-attributes?projection=SUMMARY&pageSize=10000000&pageNumber=0&direction=ASC")
    listOfAttributes = t.json()
    print (listOfAttributes)
    for attribute in listOfAttributes:
        if attribute['clinicalAttributeId'] == "CANCER_TYPE":
            
            # get list of patients
            u = requests.get(url=f"https://www.cbioportal.org/api/studies/{study}/patients?projection=SUMMARY&pageSize=10000000&pageNumber=0&direction=ASC")
            patientIdList = u.json()
            # loop through patients, requesting their samples and then using the Ids to find the attribute (see first patient)
            for Id in patientIdList:
                currentPatientId = Id['patientId']
                v = requests.get(url=f"https://www.cbioportal.org/api/studies/{study}/patients/{currentPatientId}/samples?projection=SUMMARY&pageSize=10000000&pageNumber=0&direction=ASC")
                currentSamples = v.json()
                # following loop requests each sample and checks for the attribute, prints if match
                for sample in currentSamples:
                    currentSampleId = sample['sampleId']
                    w = requests.get(url=f"https://www.cbioportal.org/api/studies/{study}/samples/{currentSampleId}/clinical-data?attributeId=CANCER_TYPE&projection=SUMMARY&pageSize=10000000&pageNumber=0&direction=ASC")
                    currentSample = w.json()[0]
                    currentSampleValue = currentSample['value']
                    if currentSampleValue == pAttributeValue:
                        print(currentPatientId)
                    




[{'uniqueSampleKey': 'UC0wMDAwMDA0LVQwMS1JTTM6bXNrX2ltcGFjdF8yMDE3', 'uniquePatientKey': 'UC0wMDAwMDA0Om1za19pbXBhY3RfMjAxNw', 'sampleType': 'Primary Solid Tumor', 'sampleId': 'P-0000004-T01-IM3', 'patientId': 'P-0000004', 'studyId': 'msk_impact_2017'}]
P-0000004-T01-IM3
[{'displayName': 'Cancer Type', 'description': 'Cancer Type', 'datatype': 'STRING', 'patientAttribute': False, 'priority': '1', 'clinicalAttributeId': 'CANCER_TYPE', 'studyId': 'msk_impact_2017'}, {'displayName': 'Cancer Type Detailed', 'description': 'Cancer Type Detailed', 'datatype': 'STRING', 'patientAttribute': False, 'priority': '1', 'clinicalAttributeId': 'CANCER_TYPE_DETAILED', 'studyId': 'msk_impact_2017'}, {'displayName': 'DNA Input', 'description': 'DNA Input', 'datatype': 'NUMBER', 'patientAttribute': False, 'priority': '1', 'clinicalAttributeId': 'DNA_INPUT', 'studyId': 'msk_impact_2017'}, {'displayName': 'Fraction Genome Altered', 'description': 'Fraction Genome Altered', 'datatype': 'NUMBER', 'patientAtt