In [14]:
import json
from pprint import pprint
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

In [7]:
def generate_query():
    wd_query = '''
        SELECT DISTINCT ?exposureLabel ?chemicalLabel 
        ?chebi ?symptomLabel 
        ?exposure_routeLabel ?reference_URL WHERE {
        ?exposure wdt:P279 wd:Q21167512;
                    wdt:P828 ?chemical;
                    wdt:P780 ?symptom;
                    wdt:P636 ?exposure_route.
          ?chemical wdt:P683 ?chebi.
          ?exposure p:P636 ?exposure_statement.
          ?exposure_statement prov:wasDerivedFrom/pr:P854 ?reference_URL.

          SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
        } 
        ''' 
    return wd_query

def query_wikidata(query):
    wd_endpoint = 'https://query.wikidata.org/sparql'
    return execute_query(endpoint=wd_endpoint, query=query)

def execute_query(endpoint, query):
    endpoint = SPARQLWrapper(endpoint)
    endpoint.setQuery(query)
    endpoint.setReturnFormat(JSON)
    return endpoint.query().convert()['results']['bindings']

def parse_object(obj):  
    return {
         'chebi': obj['chebi']['value'],
         'chemicalLabel': obj['chemicalLabel']['value'],
         'exposure_routeLabel': obj['exposure_routeLabel']['value'],
         'reference_URL': obj['reference_URL']['value'],
         'symptomLabel': obj['symptomLabel']['value']
    }

In [8]:
query1 = generate_query()
q1r = query_wikidata(query1)
results = []
for obj in q1r:
    results.append(parse_object(obj))

In [9]:
csv_df = pd.DataFrame(data=results)

In [13]:
csv_df.to_csv('wikidata_exposure_symptoms.csv', sep='\t')

In [15]:
ecto_lookup = dict()
with open('../data/ecto.json', 'r') as ecto:
    ecto = json.load(ecto)
    graphs = ecto['graphs']
    for graph in graphs:
        for index, node in enumerate(graph['nodes']):
            if 'lbl' in node.keys() and 'ECTO' in node['id']:
                ecto_lookup[node['lbl']] = node['id']
pprint(ecto_lookup)

{'abdominal radiography  related exposure': 'http://purl.obolibrary.org/obo/ECTO_0001121',
 'activity related exposure': 'http://purl.obolibrary.org/obo/ECTO_0002032',
 'alcohol abuse related exposure': 'http://purl.obolibrary.org/obo/ECTO_0001095',
 'alcohol consumption related exposure': 'http://purl.obolibrary.org/obo/ECTO_0001082',
 'asbestos mining related exposure': 'http://purl.obolibrary.org/obo/ECTO_0001154',
 'axotomy of saphenous nerve': 'http://purl.obolibrary.org/obo/ECTO_0002044',
 'axotomy of sciatic nerve': 'http://purl.obolibrary.org/obo/ECTO_0002045',
 'behavior related exposure': 'http://purl.obolibrary.org/obo/ECTO_0001974',
 'bronchography  related exposure': 'http://purl.obolibrary.org/obo/ECTO_0001118',
 'chest radiography  related exposure': 'http://purl.obolibrary.org/obo/ECTO_0001122',
 'cholangiography  related exposure': 'http://purl.obolibrary.org/obo/ECTO_0001123',
 'cigar smoking related exposure': 'http://purl.obolibrary.org/obo/ECTO_0001092',
 'cigarett

 'exposure to fenvalerate': 'http://purl.obolibrary.org/obo/ECTO_0001757',
 'exposure to ferbam': 'http://purl.obolibrary.org/obo/ECTO_0001758',
 'exposure to ferric oxide': 'http://purl.obolibrary.org/obo/ECTO_0002178',
 'exposure to ferrocene': 'http://purl.obolibrary.org/obo/ECTO_0002130',
 'exposure to fibrin modulating drug': 'http://purl.obolibrary.org/obo/ECTO_0000671',
 'exposure to fine respirable suspended particulate matter': 'http://purl.obolibrary.org/obo/ECTO_0000113',
 'exposure to fipronil': 'http://purl.obolibrary.org/obo/ECTO_0001351',
 'exposure to first generation antipsychotic': 'http://purl.obolibrary.org/obo/ECTO_0000823',
 'exposure to flame retardant': 'http://purl.obolibrary.org/obo/ECTO_0002283',
 'exposure to flash flooding': 'http://purl.obolibrary.org/obo/ECTO_0000090',
 'exposure to flavouring agent': 'http://purl.obolibrary.org/obo/ECTO_0000589',
 'exposure to flonicamid': 'http://purl.obolibrary.org/obo/ECTO_0001309',
 'exposure to flooding': 'http://pu

 'exposure to sarin': 'http://purl.obolibrary.org/obo/ECTO_0002281',
 'exposure to scabicide': 'http://purl.obolibrary.org/obo/ECTO_0001051',
 'exposure to schistosomicide drug': 'http://purl.obolibrary.org/obo/ECTO_0000648',
 'exposure to sebuthylazine': 'http://purl.obolibrary.org/obo/ECTO_0001484',
 'exposure to secbumeton': 'http://purl.obolibrary.org/obo/ECTO_0001462',
 'exposure to second generation antipsychotic': 'http://purl.obolibrary.org/obo/ECTO_0000824',
 'exposure to secondary sludge': 'http://purl.obolibrary.org/obo/ECTO_0000033',
 'exposure to sedative': 'http://purl.obolibrary.org/obo/ECTO_0000599',
 'exposure to selane': 'http://purl.obolibrary.org/obo/ECTO_0002348',
 'exposure to selenium': 'http://purl.obolibrary.org/obo/ECTO_0001574',
 'exposure to selenium hexafluoride': 'http://purl.obolibrary.org/obo/ECTO_0002373',
 'exposure to selenium ion': 'http://purl.obolibrary.org/obo/ECTO_0000157',
 'exposure to serine protease inhibitor': 'http://purl.obolibrary.org/obo

In [29]:
from collections import defaultdict
exp_terms = defaultdict(int)
for index, term in enumerate(results):
    exp_terms[term['exposure_routeLabel']] +=1

In [30]:
exp_terms

defaultdict(int,
            {'Transdermal patch': 19,
             'eye contact': 4935,
             'ingestion': 4568,
             'inhalation': 39,
             'inhalation administration': 5156,
             'intramuscular injection': 19,
             'intravenous infusion': 91,
             'skin absorption': 5026})