## How do I get unavailable metadata
This is an **alpha** feature using the SparqQL endpoint.

In [1]:
# from defs.apimethods import *
import SPARQLWrapper as spark
import urllib as urllib

## Make the Query

In [4]:
# 0) Check that sparql endpoint is operational
try:
    rc = urllib.urlopen("https://opensparql.sbgenomics.com").getcode()
except Exception:
    rc = 0
if rc != 200:
    print("""script relies on sparql endpoint (https://opensparql.sbgenomics.com/) which is currently not
    responding. Can not continue, exiting.""")
    raise KeyboardInterrput

# Connect to the endpoint
sparql_endpoint = "https://opensparql.sbgenomics.com/bigdata/namespace/tcga_metadata_kb/sparql"
sparql = spark.SPARQLWrapper(sparql_endpoint)   # Initialize the SparQL wrapper with the endpoint

# query (assuming we are looking a BRCA cases)
query = """
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix tcga: <https://www.sbgenomics.com/ontologies/2014/11/tcga#>

select distinct ?case_uuid ?disease_type ?file_name ?exp_strat ?data_format ?sample_id ?sample_source ?aliquot_uuid # ?case_id ?sample_id ?aliquot_id

where
    {
      ?case a tcga:Case .
      ?case rdfs:label ?case_uuid .
      ?case tcga:hasProgram ?case_id .
      ?case tcga:hasDiseaseType ?dt .
      ?dt rdfs:label ?disease_type .
      filter (?disease_type='Breast Invasive Carcinoma') 

      ?case tcga:hasFile ?file .
      ?file rdfs:label ?file_name .
      ?file tcga:hasExperimentalStrategy ?es .
      ?es rdfs:label ?exp_strat .
      
      ?file tcga:hasDataFormat ?data .
      ?data rdfs:label ?data_format .
      filter (?data_format="BAM").
              
      ?case tcga:hasSample ?s .
      ?s rdfs:label ?sample_id .
      ?s tcga:hasSampleType ?ss .
      ?ss rdfs:label ?sample_source .
      
      ?s tcga:hasPortion ?p .
      ?p tcga:hasAnalyte ?al .
      ?al tcga:hasAliquot ?aq .
      ?aq rdfs:label ?aliquot_uuid
      }
"""

sparql.setQuery(query)              # Define query on the wrapper
sparql.setReturnFormat(spark.JSON)  # We want server to return JSON to use
results = sparql.query().convert()  # Convert results to Python object
# From results, we grab a list of files - TCGA metadata database returns a list of paths
namelist = [result['file_name']['value'] for result in results['results']['bindings']]
uuid_list = [result['case_uuid']['value'] for result in results['results']['bindings']]
expstrat_list = [result['exp_strat']['value'] for result in results['results']['bindings']]

In [5]:
print(expstrat_list[0:10])
print(uuid_list[0:10])

[u'miRNA-Seq', u'miRNA-Seq', u'miRNA-Seq', u'RNA-Seq', u'RNA-Seq', u'RNA-Seq', u'WXS', u'WXS', u'WXS', u'WXS']
[u'3144F1FB-4342-4079-BFE8-940DA4BFD88E', u'3144F1FB-4342-4079-BFE8-940DA4BFD88E', u'3144F1FB-4342-4079-BFE8-940DA4BFD88E', u'3144F1FB-4342-4079-BFE8-940DA4BFD88E', u'3144F1FB-4342-4079-BFE8-940DA4BFD88E', u'3144F1FB-4342-4079-BFE8-940DA4BFD88E', u'3144F1FB-4342-4079-BFE8-940DA4BFD88E', u'3144F1FB-4342-4079-BFE8-940DA4BFD88E', u'3144F1FB-4342-4079-BFE8-940DA4BFD88E', u'3144F1FB-4342-4079-BFE8-940DA4BFD88E']
