In this code I'll get every subclass of neurological disorder and every respective parent class from Disease Ontology, reconciling all possible entries in OpenRefine and submitting these to Wikidata as part of the Neurological Disorders task in Wikiproject:Neuroscience

In [1]:
from owlready2 import *
import pandas as pd
from pronto import Ontology

In [2]:
do = get_ontology("https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/master/src/ontology/HumanDO.owl").load()
obo = do.get_namespace("http://purl.obolibrary.org/obo/")

In [3]:
neural = do.search(subclass_of = obo.DOID_863 )

A bit of spaghetti code from the next few chunks because I don't really know how to work with these ontology packages(Particularly acquiring parent class name using owlready, therefore I used pronto for that), the documentation of each one is lacking on different aspects, anyone is welcome to fix it :P

In [4]:
pr = Ontology("https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/master/src/ontology/HumanDO.owl")

In [17]:
neural_disorders = {'doid':[], 'label':[], 'subclass_of': []}

for disord in neural:
    neural_disorders['doid'].append(disord)
    neural_disorders['label'].append(disord.label[0])
    sub = disord.is_a
    sub = str(sub[0])
    sub = sub.replace("obo.", "").replace("_", ":")
    sub = pr[sub].name
    neural_disorders['subclass_of'].append(sub)
    
neural_disorders = pd.DataFrame.from_dict(neural_disorders)
neural_disorders.to_csv("../../data/disorders/do_disorders.csv")
neural_disorders

Unnamed: 0,doid,label,subclass_of
0,obo.DOID_863,nervous system disease,disease of anatomical entity
1,obo.DOID_0050155,sensory system disease,nervous system disease
2,obo.DOID_331,central nervous system disease,nervous system disease
3,obo.DOID_0060695,hyperekplexia,nervous system disease
4,obo.DOID_0090115,spinocerebellar ataxia type 1 with axonal neur...,nervous system disease
...,...,...,...
1909,obo.DOID_0111695,familial adult myoclonic epilepsy 3,familial adult myoclonic epilepsy
1910,obo.DOID_0111696,familial adult myoclonic epilepsy 6,familial adult myoclonic epilepsy
1911,obo.DOID_0111448,progressive myoclonus epilepsy 1B,Unverricht-Lundborg syndrome
1912,obo.DOID_0111452,progressive myoclonus epilepsy 1A,Unverricht-Lundborg syndrome


Now the saved data will be reconciled in OpenRefine

#### Loading reconciled data and creating quickstatements

In [8]:
recon = pd.read_csv("../../data/disorders/do_disorders_final.csv")
recon['doid'] = recon['doid'].str.replace('obo.', '').str.replace('_',':')
recon

Unnamed: 0,Column,doid,label,disorder_qid,subclass_of,subclass_of_qid
0,0,DOID:863,neurological disorder,Q3339235,disease of anatomical entity,Q18557436
1,1,DOID:0050155,sensory system disease,Q18553219,neurological disorder,Q3339235
2,2,DOID:331,central nervous system disease,Q5062122,neurological disorder,Q3339235
3,3,DOID:0060695,hyperekplexia,Q1781802,neurological disorder,Q3339235
4,4,DOID:0090115,spinocerebellar ataxia type 1 with axonal neur...,Q30989322,neurological disorder,Q3339235
...,...,...,...,...,...,...
1244,1897,DOID:9388,lens-induced iridocyclitis,Q18558117,iridocyclitis,Q145790
1245,1898,DOID:9389,infectious anterior uveitis,Q18558118,iridocyclitis,Q145790
1246,1899,DOID:0111324,juvenile absence epilepsy,Q3726905,juvenile absence epilepsy,Q3726905
1247,1902,DOID:0111327,juvenile myoclonic epilepsy,Q543517,juvenile myoclonic epilepsy,Q543517


Making quickstatements for subclass of each respective subclass, instance of disease and disease ontology ID.

In [22]:
for index, row in recon.iterrows():
    print(
        row['disorder_qid']+ "|P279|" + row['subclass_of_qid'] + '\n' +
        row['disorder_qid']+ "|P31|" + "Q12136" + '\n' +
        row['disorder_qid'] + "|P699|" + '"'+ row['doid'] + '"')

Q3339235|P279|Q18557436
Q3339235|P31|Q12136
Q3339235|P699|"DOID:863"
Q18553219|P279|Q3339235
Q18553219|P31|Q12136
Q18553219|P699|"DOID:0050155"
Q5062122|P279|Q3339235
Q5062122|P31|Q12136
Q5062122|P699|"DOID:331"
Q1781802|P279|Q3339235
Q1781802|P31|Q12136
Q1781802|P699|"DOID:0060695"
Q30989322|P279|Q3339235
Q30989322|P31|Q12136
Q30989322|P699|"DOID:0090115"
Q708165|P279|Q3339235
Q708165|P31|Q12136
Q708165|P699|"DOID:0090124"
Q814547|P279|Q3339235
Q814547|P31|Q12136
Q814547|P699|"DOID:0111580"
Q945238|P279|Q3339235
Q945238|P31|Q12136
Q945238|P699|"DOID:574"
Q794457|P279|Q3339235
Q794457|P31|Q12136
Q794457|P699|"DOID:12697"
Q1424956|P279|Q3339235
Q1424956|P31|Q12136
Q1424956|P699|"DOID:14464"
Q7830379|P279|Q3339235
Q7830379|P31|Q12136
Q7830379|P699|"DOID:3602"
Q1515119|P279|Q3339235
Q1515119|P31|Q12136
Q1515119|P699|"DOID:4969"
Q18557980|P279|Q3339235
Q18557980|P31|Q12136
Q18557980|P699|"DOID:862"
Q5325633|P279|Q18553219
Q5325633|P31|Q12136
Q5325633|P699|"DOID:2742"
Q3041498|P279|Q1855321

Q7204901|P699|"DOID:3688"
Q18558090|P279|Q1755122
Q18558090|P31|Q12136
Q18558090|P699|"DOID:9277"
Q5509169|P279|Q48143
Q5509169|P31|Q12136
Q5509169|P699|"DOID:11608"
Q3678510|P279|Q48143
Q3678510|P31|Q12136
Q3678510|P699|"DOID:9470"
Q3301664|P279|Q48143
Q3301664|P31|Q12136
Q3301664|P699|"DOID:10310"
Q18553843|P279|Q48143
Q18553843|P31|Q12136
Q18553843|P699|"DOID:10341"
Q2669284|P279|Q48143
Q2669284|P31|Q12136
Q2669284|P699|"DOID:12156"
Q4804182|P279|Q48143
Q4804182|P31|Q12136
Q4804182|P699|"DOID:12157"
Q386346|P279|Q2303951
Q386346|P31|Q12136
Q386346|P699|"DOID:1089"
Q17232521|P279|Q2303951
Q17232521|P31|Q12136
Q17232521|P699|"DOID:320"
Q551085|P279|Q2303951
Q551085|P31|Q12136
Q551085|P699|"DOID:322"
Q1112977|P279|Q2303951
Q1112977|P31|Q12136
Q1112977|P699|"DOID:327"
Q991037|P279|Q1049655
Q991037|P31|Q12136
Q991037|P699|"DOID:606"
Q424242|P279|Q1634879
Q424242|P31|Q12136
Q424242|P699|"DOID:639"
Q3702898|P279|Q945238
Q3702898|P31|Q12136
Q3702898|P699|"DOID:0050548"
Q2246789|P279|Q945238

Q127724|P699|"DOID:83"
Q50349814|P279|Q18553990
Q50349814|P31|Q12136
Q50349814|P699|"DOID:0111148"
Q18554068|P279|Q18553990
Q18554068|P31|Q12136
Q18554068|P699|"DOID:11364"
Q18554069|P279|Q18553990
Q18554069|P31|Q12136
Q18554069|P699|"DOID:11367"
Q3043268|P279|Q18558225
Q3043268|P31|Q12136
Q3043268|P699|"DOID:8466"
Q55345684|P279|Q18558225
Q55345684|P31|Q12136
Q55345684|P699|"DOID:0111570"
Q18553789|P279|Q18558225
Q18553789|P31|Q12136
Q18553789|P699|"DOID:10139"
Q18554311|P279|Q18558225
Q18554311|P31|Q12136
Q18554311|P699|"DOID:1237"
Q18554164|P279|Q18558225
Q18554164|P31|Q12136
Q18554164|P699|"DOID:11754"
Q1862805|P279|Q18558225
Q1862805|P31|Q12136
Q1862805|P699|"DOID:11772"
Q18554170|P279|Q18558225
Q18554170|P31|Q12136
Q18554170|P699|"DOID:11776"
Q18558226|P279|Q18558225
Q18558226|P31|Q12136
Q18558226|P699|"DOID:980"
Q1147596|P279|Q18553766
Q1147596|P31|Q12136
Q1147596|P699|"DOID:10033"
Q18554023|P279|Q18553766
Q18554023|P31|Q12136
Q18554023|P699|"DOID:11177"
Q7573981|P279|Q18553766


Q18553724|P699|"DOID:0060207"
Q18553726|P279|Q206901
Q18553726|P31|Q12136
Q18553726|P699|"DOID:0060209"
Q18553727|P279|Q206901
Q18553727|P31|Q12136
Q18553727|P699|"DOID:0060210"
Q18553728|P279|Q206901
Q18553728|P31|Q12136
Q18553728|P699|"DOID:0060211"
Q18553730|P279|Q206901
Q18553730|P31|Q12136
Q18553730|P699|"DOID:0060213"
Q18553731|P279|Q206901
Q18553731|P31|Q12136
Q18553731|P699|"DOID:0060214"
Q21124521|P279|Q206901
Q21124521|P31|Q12136
Q21124521|P699|"DOID:0060355"
Q53660471|P279|Q206901
Q53660471|P31|Q12136
Q53660471|P699|"DOID:0080225"
Q27674812|P279|Q206901
Q27674812|P31|Q12136
Q27674812|P699|"DOID:0110067"
Q27164433|P279|Q206901
Q27164433|P31|Q12136
Q27164433|P699|"DOID:0110068"
Q27164434|P279|Q206901
Q27164434|P31|Q12136
Q27164434|P699|"DOID:0110069"
Q1995327|P279|Q580290
Q1995327|P31|Q12136
Q1995327|P699|"DOID:0060161"
Q7577467|P279|Q580290
Q7577467|P31|Q12136
Q7577467|P699|"DOID:0070348"
Q55345681|P279|Q580290
Q55345681|P31|Q12136
Q55345681|P699|"DOID:0111194"
Q55345735|P279