In [5]:
import requests
import wikidataintegrator as wdi
import pandas as pd

In [6]:
def sparql_map(prop_nr):
    query = '''
    SELECT DISTINCT * WHERE {{
        ?c wdt:{} ?id .
    }}
    '''.format(prop_nr)
    
    results = wdi.wdi_core.WDItemEngine.execute_sparql_query(query)
    
    id_map = {}
    for x in results['results']['bindings']:
        id_map.update({x['id']['value']: x['c']['value']})
        
    return id_map

In [7]:
def generate_gene_protein_map():
    query = '''
    SELECT DISTINCT * WHERE {
      ?c wdt:P351 ?entrez.
      ?c wdt:P703 wd:Q15978631.
      ?c wdt:P688 ?protein.
      ?protein wdt:P352 ?uniprot .
    }
    '''
    
    results = wdi.wdi_core.WDItemEngine.execute_sparql_query(query)
    
    id_map = {}
    for x in results['results']['bindings']:
        if x['entrez']['value'] in id_map:
            id_map[x['entrez']['value']].add(x['protein']['value'])
        else:
            id_map.update({x['entrez']['value']: {x['protein']['value']}})
        
    return id_map

gene_protein_map = generate_gene_protein_map()

In [12]:
files = [
    '/home/sebastian/jupyter-notebooks/paper_data/Sawada et al 2015 data PMID:26580494/pred-DPI_phen.txt/pred-DPI_phen.txt',
    '/home/sebastian/jupyter-notebooks/paper_data/Sawada et al 2015 data PMID:26580494/pred-DPI_chem.txt/pred-DPI_chem.txt'
]

drug_target_list = []


kegg_map = sparql_map('P665')

for ff in files:
    data = pd.read_csv(ff, header=0, sep='\t')
    
    for count, x in data.iterrows():
        kegg = x['drug']
        if kegg in kegg_map:
            kegg_qid = kegg_map[kegg]
        else:
            continue            
        
        entrez = x['protein'][4:]
                
        if entrez in gene_protein_map:
            for e in gene_protein_map[entrez]:
                drug_target_list.append({'drug_qid': kegg_qid, 'target_qid': e})
            
print(len(drug_target_list))

12501


In [13]:
print(drug_target_list[1])

{'drug_qid': 'http://www.wikidata.org/entity/Q22075909', 'target_qid': 'http://www.wikidata.org/entity/Q21111224'}


In [18]:

tmp_list = []
for x in drug_target_list:
    tmp_list.append([x['drug_qid'], x['target_qid']])
    
drug_target_table = pd.DataFrame(tmp_list, columns=['drug', 'target'])
    
drug_target_table.head()

Unnamed: 0,drug,target
0,http://www.wikidata.org/entity/Q22075909,http://www.wikidata.org/entity/Q408575
1,http://www.wikidata.org/entity/Q22075909,http://www.wikidata.org/entity/Q21111224
2,http://www.wikidata.org/entity/Q22075909,http://www.wikidata.org/entity/Q1920577
3,http://www.wikidata.org/entity/Q22075909,http://www.wikidata.org/entity/Q21130485
4,http://www.wikidata.org/entity/Q22075909,http://www.wikidata.org/entity/Q21149579


In [19]:
drug_target_table.to_csv('sawada.csv')