In [3]:
import requests
import pprint
import pandas as pd
import wikidataintegrator as wdi

s = requests.Session()


def sparql_map(prop_nr):
    query = '''
    SELECT DISTINCT * WHERE {{
        ?c wdt:{} ?id .
    }}
    '''.format(prop_nr)
    
    results = wdi.wdi_core.WDItemEngine.execute_sparql_query(query)
    
    id_map = {}
    for x in results['results']['bindings']:
        id_map.update({x['id']['value']: x['c']['value']})
        
    return id_map

def generate_gene_protein_map():
    query = '''
    SELECT DISTINCT * WHERE {
      ?c wdt:P353 ?hgnc.
      ?c wdt:P688 ?protein.
      ?protein wdt:P352 ?uniprot .
    }
    '''
    
    results = wdi.wdi_core.WDItemEngine.execute_sparql_query(query)
    
    id_map = {}
    for x in results['results']['bindings']:
        if x['hgnc']['value'] in id_map:
            id_map[x['hgnc']['value']].add(x['protein']['value'])
        else:
            id_map.update({x['hgnc']['value']: {x['protein']['value']}})
        
    return id_map

gene_protein_map = generate_gene_protein_map()



pubchem_map = sparql_map('P662')



In [4]:
fu_data = pd.read_csv('/home/sebastian/jupyter-notebooks/paper_data/Fu et al 2016 data PMID:27071755/12859_2016_1005_MOESM5_ESM(1)/evolving_network_prediction/positive_CPI_test_with_slaprawscore.csv',
                     header=None)


    

In [5]:
fu_data.head()

Unnamed: 0,0,1,2
0,660379,GMNN,0.0
1,44587447,NAT1,0.0
2,20760215,FXA,23.9
3,44407823,TFA,0.0
4,44414763,HTT,0.0


In [16]:
drug_target_list = []

for c, x in fu_data.iterrows():
    hgnc = x[1]
    pubchem = str(x[0])
    
    if pubchem in pubchem_map:
        pubchem_qid = pubchem_map[pubchem]
#         print(pubchem_qid)
    else:
        continue
    
    if hgnc in gene_protein_map:
        for y in gene_protein_map[hgnc]:
            drug_target_list.append({'target_qid': y, 'drug_qid': pubchem_qid})
                

print(len(drug_target_list))
print(drug_target_list[1])

8151
{'drug_qid': 'http://www.wikidata.org/entity/Q2268463', 'target_qid': 'http://www.wikidata.org/entity/Q5401858'}


In [17]:
tmp_list = []
for x in drug_target_list:
    tmp_list.append([x['drug_qid'], x['target_qid']])
    
drug_target_table = pd.DataFrame(tmp_list, columns=['drug', 'target'])
    
drug_target_table.head()

Unnamed: 0,drug,target
0,http://www.wikidata.org/entity/Q417884,http://www.wikidata.org/entity/Q4734884
1,http://www.wikidata.org/entity/Q2268463,http://www.wikidata.org/entity/Q5401858
2,http://www.wikidata.org/entity/Q417824,http://www.wikidata.org/entity/Q21111564
3,http://www.wikidata.org/entity/Q5957181,http://www.wikidata.org/entity/Q21130913
4,http://www.wikidata.org/entity/Q425119,http://www.wikidata.org/entity/Q4847910


In [18]:
drug_target_table.to_csv('fu.csv')