In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON
import json

top_genes = [str(row['gene']) for row in results]
print("Top 10 genes:", top_genes)
monarch_endpoint = "https://monarchinitiative.org/sparql"
sparql = SPARQLWrapper(monarch_endpoint)

gene_filter = " || ".join([f'CONTAINS(LCASE(STR(?gene)), LCASE("{gene}"))' for gene in top_genes])

monarch_query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX obo: <http://purl.obolibrary.org/obo/>

    SELECT DISTINCT ?gene ?phenotype ?disease 
    WHERE {{
        ?gene rdfs:label ?geneName ;
        obo:RO_0002200 ?phenotype .
        ?phenotype rdfs:label ?phenotypeName .
        OPTIONAL {{
            ?gene obo:RO_0003303 ?disease .
            ?disease rdfs:label ?diseaseName .
        }}
        FILTER({gene_filter})
    }}
    LIMIT 10
"""

# http://purl.obolibrary.org/obo/RO_0002200 - has phenotype
# http://purl.obolibrary.org/obo/RO_0002331 - involved in
# http://purl.obolibrary.org/obo/RO_0003303 - causes condition
# http://purl.obolibrary.org/obo/RO_0002327 - enables
# 

sparql.setQuery(monarch_query)
sparql.setReturnFormat(JSON)

try:
    monarch_results = sparql.query().convert()
    
    # Check if monarch_results is bytes (which would cause the TypeError)
    if isinstance(monarch_results, bytes):
        monarch_results = json.loads(monarch_results.decode('utf-8'))
    
    print("\nResults from Monarch Initiative:")
    if "results" in monarch_results and "bindings" in monarch_results["results"]:
        for result in monarch_results["results"]["bindings"]:
            gene = result.get("geneName", {}).get("value", "N/A")
            phenotype = result.get("phenotypeName", {}).get("value", "N/A")
            disease = result.get("diseaseName", {}).get("value", "N/A")
            print(f"Gene: {gene} | Phenotype: {phenotype} | Disease: {disease}")
    else:
        print("No results found or unexpected result format.")
        print("Raw results:", monarch_results)
except Exception as e:
    print(f"An error occurred: {str(e)}")
    print("Raw results:", monarch_results)

In [None]:
"""monarch_results = sparql.query().convert()
print("Gene | Phenotype | Disease")
print("-----|-----------|--------")
for row in monarch_results:
    print(row)    
"""    
    
"""gene = str(row['gene'])
    geneName = str(row['geneName']) 
    phenotype = str(row['phenotype']) 
    phenotypeName = str(row['phenotypeName']) 
    disease = str(row['disease']) 
    diseaseName = str(row['diseaseName'])
    print(f"{gene} | {geneName} | {phenotype} | {phenotypeName} | {disease} | {diseaseName}")
"""

"""print("\nResults from Monarch Initiative:")
for result in monarch_results["results"]["bindings"]:
    gene = result["geneName"]["value"]
    phenotype = result["phenotypeName"]["value"]
    disease = result.get("diseaseName", {}).get("value", "N/A")
    print(f"Gene: {gene} | Phenotype: {phenotype} | Disease: {disease}")"""

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Define the SPARQL endpoint URL
endpoint_url = "https://monarchinitiative.org/sparql/"

# Create a SPARQLWrapper object
sparql = SPARQLWrapper(endpoint_url)

# Define your SPARQL query
query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX monarch: <https://monarchinitiative.org/gene/>

SELECT ?gene ?geneName ?phenotype ?phenotypeName
WHERE {
  ?gene rdfs:label ?geneName ;
        <https://monarchinitiative.org/vocabulary/has_phenotype> ?phenotype .
  ?phenotype rdfs:label ?phenotypeName .
  FILTER(CONTAINS(LCASE(STR(?geneName)), "sox4"))
}
LIMIT 10
"""

# Set the query and response format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute the query and convert results to JSON
results = sparql.query().convert()

# Process the results
for result in results["results"]["bindings"]:
    gene = result["gene"]["value"]
    gene_name = result["geneName"]["value"]
    phenotype = result["phenotype"]["value"]
    phenotype_name = result["phenotypeName"]["value"]
    print(f"Gene: {gene_name} ({gene})")
    print(f"Phenotype: {phenotype_name} ({phenotype})")
    print("---")


import rdflib

g = rdflib.Graph()

for result in results["results"]["bindings"]:
    gene = rdflib.URIRef(result["gene"]["value"])
    phenotype = rdflib.URIRef(result["phenotype"]["value"])
    g.add((gene, rdflib.URIRef("https://monarchinitiative.org/vocabulary/has_phenotype"), phenotype))