## How do I get unavailable metadata
This is an **alpha** feature using the SparqQL endpoint.

In [1]:
# from defs.apimethods import *
import SPARQLWrapper as spark
import urllib as urllib

## Make the Query

In [13]:
# 0) Check that sparql endpoint is operational
try:
    rc = urllib.urlopen("https://opensparql.sbgenomics.com").getcode()
except Exception:
    rc = 0
if rc != 200:
    print("""script relies on sparql endpoint (https://opensparql.sbgenomics.com/) which is currently not
    responding. Can not continue, exiting.""")
    raise KeyboardInterrput

# Connect to the endpoint
sparql_endpoint = "https://opensparql.sbgenomics.com/bigdata/namespace/tcga_metadata_kb/sparql"
sparql = spark.SPARQLWrapper(sparql_endpoint)   # Initialize the SparQL wrapper with the endpoint

# query non-survivors
query = """
    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    prefix tcga: <https://www.sbgenomics.com/ontologies/2014/11/tcga#>

    select distinct ?case_id ?file_name ?path ?days_to_death ?vital_status
    where
    {
      ?case a tcga:Case .
      ?case rdfs:label ?case_id .
      ?case tcga:hasDiseaseType ?dt .
      ?dt rdfs:label 'Breast Invasive Carcinoma' .

      ?case tcga:hasDaysToDeath ?days_to_death .
      # ?case tcga:hasDaysToLastFollowUp ?days_to_follow .
      ?case tcga:hasVitalStatus ?vs .
      ?vs rdfs:label ?vital_status .
      ?case tcga:hasFile ?file .
      
      ?file rdfs:label ?file_name .
      ?file tcga:hasStoragePath ?path .
      
      ?file tcga:hasAccessLevel ?ac .
      ?ac rdfs:label 'Open' .
      
      ?file tcga:hasExperimentalStrategy ?es .
      ?es rdfs:label 'RNA-Seq'
    }
"""

sparql.setQuery(query)              # Define query on the wrapper
sparql.setReturnFormat(spark.JSON)  # We want server to return JSON to use
results = sparql.query().convert()  # Convert results to Python object
# From results, we grab a list of files - TCGA metadata database returns a list of paths
filelist = [result['path']['value'] for result in results['results']['bindings']]
uuid_list = [result['case_id']['value'] for result in results['results']['bindings']]
vital_list = [result['vital_status']['value'] for result in results['results']['bindings']]
day_to_death_list = [result['days_to_death']['value'] for result in results['results']['bindings']]

In [14]:
print(day_to_death_list[0:10])
print(vital_list[0:10])

[u'426', u'426', u'426', u'2534', u'2534', u'2534', u'2534', u'2534', u'538', u'538']
[u'Dead', u'Dead', u'Dead', u'Dead', u'Dead', u'Dead', u'Dead', u'Dead', u'Dead', u'Dead']


In [6]:
# query survivors
query = """
    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    prefix tcga: <https://www.sbgenomics.com/ontologies/2014/11/tcga#>

    select distinct ?case_id ?file_name ?path ?days_to_follow ?vital_status
    where
    {
      ?case a tcga:Case .
      ?case rdfs:label ?case_id .
      ?case tcga:hasDiseaseType ?dt .
      ?dt rdfs:label 'Breast Invasive Carcinoma' .

      # ?case tcga:hasDaysToDeath ?days_to_death .
      ?case tcga:hasDaysToLastFollowUp ?days_to_follow.
      ?case tcga:hasVitalStatus ?vs .
      ?vs rdfs:label ?vital_status .
      ?case tcga:hasFile ?file .
      
      ?file rdfs:label ?file_name .
      ?file tcga:hasStoragePath ?path .
      
      ?file tcga:hasAccessLevel ?ac .
      ?ac rdfs:label 'Open' .
      
      ?file tcga:hasExperimentalStrategy ?es .
      ?es rdfs:label 'RNA-Seq'
    }
"""

sparql.setQuery(query)              # Define query on the wrapper
sparql.setReturnFormat(spark.JSON)  # We want server to return JSON to use
results = sparql.query().convert()  # Convert results to Python object
# From results, we grab a list of files - TCGA metadata database returns a list of paths
filelist = [result['path']['value'] for result in results['results']['bindings']]
uuid_list = [result['case_id']['value'] for result in results['results']['bindings']]
vital_list = [result['vital_status']['value'] for result in results['results']['bindings']]
day_to_follow_list = [result['days_to_follow']['value'] for result in results['results']['bindings']]

In [10]:
print(uuid_list[0:10])
print(vital_list[0:10])

[u'01F50ABF-FC15-446E-9D07-EDEECC545A32', u'01F50ABF-FC15-446E-9D07-EDEECC545A32', u'029CE650-5E5A-4100-8596-CD94300E7EF5', u'029CE650-5E5A-4100-8596-CD94300E7EF5', u'02BBB632-0F7F-439D-B8F0-C86A06237424', u'02BBB632-0F7F-439D-B8F0-C86A06237424', u'02BBB632-0F7F-439D-B8F0-C86A06237424', u'02BBB632-0F7F-439D-B8F0-C86A06237424', u'02BBB632-0F7F-439D-B8F0-C86A06237424', u'02BBB632-0F7F-439D-B8F0-C86A06237424']
[u'Alive', u'Alive', u'Alive', u'Alive', u'Alive', u'Alive', u'Alive', u'Alive', u'Alive', u'Alive']
