In [1]:
from owlready2 import *
import pandas as pd
import json

In [2]:
# Загружаем CSV
df_pl = pd.read_csv("plant_taxonomy4.csv")

In [3]:
import types

plant_dict = {}
pat_dict={}


taxon_rank={1:"kingdom",2:"phylum",3:"class",4:"category",5:"order",6:"family",7:"subfamily",8:"genus",9:"species"}

# Создаём онтологию 
onto = get_ontology("http://example.org/taxonomy.owl")

In [4]:
def add_taxon(df,taxon_id,taxon_dict):
    print(f"Adding taxon {taxon_id}")
    row = df[df["id"] == taxon_id].iloc[0]
    latin_name = row["name"].replace(" ", "_")
    if latin_name in taxon_dict:
        return  # Already created

    row = df[df["id"] == taxon_id].iloc[0]
    parent = row["parent"]

    if pd.isna(parent):
        parent_class = Taxon
    else:
        parent = int(parent)
        if parent == taxon_id:
            raise ValueError(f"Taxon {taxon_id} cannot be its own parent.")
        if parent not in taxon_dict:
            add_taxon(df,parent,taxon_dict)
        parent_class = taxon_dict[parent]

    # Prevent inheritance cycle: do not allow a class to inherit from itself
    if parent_class.__name__ == latin_name:
        raise TypeError(f"Inheritance cycle detected for {latin_name}")

    taxon_class = types.new_class(latin_name, (parent_class,))
    taxon_class.label = row["name"]

    # Annotations
    if pd.notna(row["q"]):
        taxon_class.has_wikidata_code.append(str(row["q"]))
    if pd.notna(row["eppo"]):
        taxon_class.has_eppo_code.append(str(row["eppo"]))
    if pd.notna(row["level"]):
        rank_cls = next(
    (cls for cls in Rank.subclasses() if cls.name.lower() == str(taxon_rank[row["level"]]).lower()),
    None
)
        taxon_class.has_rank_level.append(rank_cls)

    # Save to dict
    taxon_dict[row["id"]] = taxon_class

In [5]:
with onto: 
    # Базовый класс для всех таксонов 
    class Taxon(Thing): 
        pass 

    # Создание классов

    PlantDisease = types.new_class("PlantDisease", (Thing,))
    Symptom = types.new_class("Symptom", (Thing,))

    Rank = types.new_class("Rank", (Thing,))
    Kingdom=types.new_class("kingdom", (Rank,))
    Phylum=types.new_class("phylum", (Rank,))
    RankClass=types.new_class("class", (Rank,))
    Category=types.new_class("category", (Rank,))
    Order=types.new_class("order", (Rank,))
    Family=types.new_class("family", (Rank,))
    Subfamily=types.new_class("subfamily", (Rank,))
    Genus=types.new_class("genus", (Rank,))
    Species=types.new_class("species", (Rank,))

    External=types.new_class("external", (Thing,))

    class has_wikidata_code(AnnotationProperty): 
        pass 
    
    class has_eppo_code(AnnotationProperty): 
        pass 
    
    class has_rank_level(ObjectProperty): 
        domain = [Taxon]; range = [Rank]

    Plantae = types.new_class("Plantae", (Taxon,))
    Plantae.has_eppo_code.append("1PLAK")
    Plantae.has_rank_level.append(Kingdom)

    Bacteria = types.new_class("Bacteria", (Taxon,))
    Bacteria.has_eppo_code.append("1BACTK")
    Bacteria.has_rank_level.append(Kingdom)

    Fungi = types.new_class("Fungi", (Taxon,))
    Fungi.has_wikidata_code.append("Q53818215")
    Fungi.has_eppo_code.append("1FUNGK")
    Fungi.has_rank_level.append(Kingdom)

    Chromista = types.new_class("Chromista", (Taxon,))
    Chromista.has_eppo_code.append("1CHROK")
    Chromista.has_rank_level.append(Kingdom)    

    Animalia = types.new_class("Animalia", (Taxon,))
    Animalia.has_eppo_code.append("1ANIMK")
    Animalia.has_rank_level.append(Kingdom)   

    Viruses = types.new_class("Viruses_and_viroids", (Taxon,))
    Viruses.has_eppo_code.append("1VIRUK")
    Viruses.has_rank_level.append(Kingdom)   

    Protista = types.new_class("Protista", (Taxon,))
    Protista.has_eppo_code.append("1PROT")
    Protista.has_rank_level.append(Kingdom)   

    plant_dict[2096] = Plantae
    
    pat_dict[582]=Bacteria
    pat_dict[551]=Fungi
    pat_dict[569]=Viruses
    pat_dict[561]=Animalia
    pat_dict[552]=Chromista
    pat_dict[927]=Protista


    # hasQ = types.new_class("hasQ", (DataProperty, FunctionalProperty)) # идентификатор из Wikidata
    # hasQ.domain = [Thing]
    # hasQ.range = [str]

    # hasEPPOcode = types.new_class("hasEPPOcode", (DataProperty, FunctionalProperty))
    # hasEPPOcode.domain = [Taxon]
    # hasEPPOcode.range = [str]


    afflict = types.new_class("afflict", (ObjectProperty,))
    afflict.domain = [PlantDisease]
    afflict.range = [Plantae]

    hasCause = types.new_class("hasCause", (ObjectProperty,)) #P828
    hasCause.domain = [PlantDisease, Symptom]
    hasCause.range = [Bacteria,Fungi,Viruses,Animalia,Chromista,Protista]

    #hasCause = types.new_class("hasCause", (ObjectProperty,))
    #hasCause.domain = [Symptom]
    #hasCause.range = [PlantDisease]

    hasEffect = types.new_class("hasEffect", (ObjectProperty,))
    hasEffect.domain = [Bacteria,Fungi,Viruses,Animalia,Chromista,Protista,Symptom]
    hasEffect.range = [PlantDisease]

    hasCharacteristic = types.new_class("hasCharacteristic", (ObjectProperty,)) #P1552
    hasCharacteristic.domain = [PlantDisease]
    hasCharacteristic.range = [Symptom]

    host = types.new_class("host", (ObjectProperty,))
    host.domain = [Bacteria,Fungi,Viruses,Animalia,Chromista,Protista]
    host.range = [Plantae]


    # Create taxa
    for _, row in df_pl.iterrows():
        add_taxon(df_pl,row["id"],plant_dict)

# Save OWL
onto.save("taxonomy.owl")
print("Ontology saved to taxonomy.owl")


Adding taxon 0
Adding taxon 2101
Adding taxon 2100
Adding taxon 2099
Adding taxon 2098
Adding taxon 2097
Adding taxon 1
Adding taxon 68
Adding taxon 2103
Adding taxon 2102
Adding taxon 69
Adding taxon 2
Adding taxon 2107
Adding taxon 2106
Adding taxon 2105
Adding taxon 2104
Adding taxon 3
Adding taxon 4
Adding taxon 2111
Adding taxon 2110
Adding taxon 2109
Adding taxon 2108
Adding taxon 5
Adding taxon 2113
Adding taxon 2112
Adding taxon 6
Adding taxon 2116
Adding taxon 2115
Adding taxon 2114
Adding taxon 7
Adding taxon 2120
Adding taxon 2119
Adding taxon 2118
Adding taxon 2117
Adding taxon 8
Adding taxon 2121
Adding taxon 9
Adding taxon 22
Adding taxon 2122
Adding taxon 10
Adding taxon 30
Adding taxon 2123
Adding taxon 11
Adding taxon 12
Adding taxon 2126
Adding taxon 19
Adding taxon 2125
Adding taxon 2124
Adding taxon 13
Adding taxon 2128
Adding taxon 2127
Adding taxon 14
Adding taxon 15
Adding taxon 2130
Adding taxon 2129
Adding taxon 16
Adding taxon 17
Adding taxon 2134
Adding taxon

In [6]:
def add_taxonDisease(df,taxon_id,taxon_dict):
    print(f"Adding taxon {taxon_id}")
    row = df[df["id"] == taxon_id].iloc[0]
    latin_name = row["name"].replace(" ", "_")
    if latin_name in taxon_dict:
        return  # Already created

    row = df[df["id"] == taxon_id].iloc[0]
    parent = row["parent"]

    if pd.isna(parent):
        parent_class = Taxon
    else:
        parent = int(parent)
        if parent == taxon_id:
            raise ValueError(f"Taxon {taxon_id} cannot be its own parent.")
        if parent not in taxon_dict:
            add_taxon(df,parent,taxon_dict)
        parent_class = taxon_dict[parent]

    # Prevent inheritance cycle: do not allow a class to inherit from itself
    if parent_class.__name__ == latin_name:
        raise TypeError(f"Inheritance cycle detected for {latin_name}")

    taxon_class = types.new_class(latin_name, (parent_class,))
    taxon_class.label = row["name"]

    # Annotations
    if pd.notna(row["q"]):
        taxon_class.has_wikidata_code.append(str(row["q"]))
    if pd.notna(row["eppo"]):
        taxon_class.has_eppo_code.append(str(row["eppo"]))
    if pd.notna(row["level"]):
        rank_cls = next(
    (cls for cls in Rank.subclasses() if cls.name.lower() == str(taxon_rank[row["level"]]).lower()),
    None
)
        taxon_class.has_rank_level.append(rank_cls)

    for h in row['hosts_ix']:
        if h is not None:
            if h not in plant_dict:
                taxon_class.host.append(plant_dict[h])
            else:
                print(h)
    for h in row['corr_deseases']:
        if h is not None:
            if h[0] not in d:
                disease=types.new_class(h[0].replace(" ", "_"),(PlantDisease,))
                d.append(h[0])
                try:
                    if pd.notna(h[1]):
                        disease.has_wikidata_code.append(h[1])
                except IndexError:
                    print(IndexError)                    
                taxon_class.hasEffect.append(disease)
                disease.hasCause.append(taxon_class)
            else:
                print(h)

    # Save to dict
    taxon_dict[row["id"]] = taxon_class

In [7]:
df_pat= pd.read_csv("a_taxon15.csv")

df_pat.hosts = df_pat.hosts.apply(json.loads)
df_pat.hosts_ix = df_pat.hosts_ix.apply(json.loads)
df_pat.corr_deseases = df_pat.corr_deseases.apply(json.loads)


In [8]:
df_pat[df_pat["level"] == 1]

Unnamed: 0,id,level,parent,name,q,eppo,hosts,hosts_ix,corr_deseases
551,551,1.0,,Fungi,Q53818215,1FUNGK,[],[],[]
552,552,1.0,,Chromista,,1CHROK,[],[],[]
561,561,1.0,,Animalia,,1ANIMK,[],[],[]
569,569,1.0,,Viruses and viroids,,1VIRUK,[],[],[]
582,582,1.0,,Bacteria,,1BACTK,[],[],[]
927,927,1.0,,Protista,,1PROTK,[],[],[]


In [9]:
for _, row in df_pat.iterrows():
    print(row["corr_deseases"])

[["witch's broom", 'Q755981']]
[['downy mildew', 'Q1394849']]
[['black wood-vessel disease', 'Q3382633'], ['Phoma blight', 'Q65306828']]
[['Bark beetle infestation in Europe', 'Q109019501']]
[['rust', 'Q4273292']]
[['Q114352465', 'Q114352465']]
[['Black bundle disease']]
[['crown gall disease', 'Q12425880'], ['Crown gall']]
[['White rust']]
[['White rust']]
[['Brown spot'], ['Fruit rot']]
[['black spot disease', 'Q14789656']]
[['Alternaria blotch']]
[['Leaf spot']]
[['Brown spot']]
[['Stackburn']]
[['Purple blotch']]
[['Black rot']]
[['early blight', 'Q14789614']]
[['Alternaria leaf spot', 'Q85741043']]
[['Aphanomyces root rot', 'Q10726070']]
[['Black root']]
[['apple mosaic disease', 'Q11775187']]
[['Q24573126', 'Q24573126']]
[['Leaf spot']]
[['ascochytosis', 'Q99013460']]
[['Leaf spot'], ['Black spot']]
[['Q132126199', 'Q132126199'], ['Q132126194', 'Q132126194']]
[['Q117081539', 'Q117081539']]
[['Q11775269', 'Q11775269']]
[['Bean mild mosaic']]
[['rhizomania', 'Q25457666']]
[['Neck r

In [10]:
df_pat["level"] = df_pat["level"].replace(10, 9)

In [11]:
d=[]

with onto:

    # Create taxa
    for _, row in df_pat.iterrows():
        add_taxonDisease(df_pat,row["id"],pat_dict)

# Save OWL
onto.save("taxonomy.owl")
print("Ontology saved to taxonomy.owl")

Adding taxon 0
Adding taxon 1
Adding taxon 554
Adding taxon 553
Adding taxon 2
Adding taxon 560
Adding taxon 559
Adding taxon 558
Adding taxon 557
Adding taxon 556
Adding taxon 555
Adding taxon 3
Adding taxon 566
Adding taxon 565
Adding taxon 564
Adding taxon 563
Adding taxon 562
Adding taxon 4
Adding taxon 568
Adding taxon 567
Adding taxon 5
Adding taxon 576
Adding taxon 575
Adding taxon 574
Adding taxon 573
Adding taxon 572
Adding taxon 571
Adding taxon 570
547
1526
1521
1551
1529
1603
1171
1165
1522
507
549
647
1531
770
1516
1558
1912
1154
1153
1597
1563
209
1167
1554
1600
1169
1517
1231
Adding taxon 6
Adding taxon 581
Adding taxon 580
Adding taxon 579
Adding taxon 578
Adding taxon 577
2088
<class 'IndexError'>
Adding taxon 7
Adding taxon 587
Adding taxon 586
Adding taxon 585
Adding taxon 584
Adding taxon 583
642
869
1521
12
1551
100
324
1529
34
674
787
1674
996
937
319
1939
2069
96
1306
72
341
1790
647
1219
22
711
1290
1531
58
1238
81
1516
7
1005
45
15
281
9
255
1200
506
911
54
972

In [12]:
# Загружаем онтологию (например Plant Ontology в owl)
onto_po = get_ontology("plant-ontology.obo.owl").load()

# Находим класс Plant anatomical entity
plant_anatomical_entity = onto_po.search_one(label="plant anatomical entity")

# Рекурсивно обходим подклассы
def print_subclasses(cls, level=0):
    print("  " * level + cls.name)
    for sub in cls.subclasses():
        print_subclasses(sub, level+1)

print_subclasses(plant_anatomical_entity)

PO_0025131
  PO_0009011
    PO_0025099
      PO_0000001
      PO_0020110
      PO_0020108
      PO_0025028
      PO_0025233
    PO_0000003
      PO_0009009
        PO_0000010
          PO_0000011
            PO_0025305
          PO_0025304
        PO_0025302
          PO_0000011
            PO_0025305
          PO_0004537
        PO_0025303
          PO_0025304
      PO_0008037
      PO_0025017
        PO_0020048
        PO_0020019
          PO_0000244
          PO_0000245
      PO_0025280
        PO_0025281
        PO_0025283
      PO_0025279
        PO_0025074
        PO_0025282
      PO_0030027
    PO_0000004
      PO_0000005
        PO_0000006
        PO_0000007
        PO_0000008
      PO_0000009
        PO_0006091
        PO_0006090
      PO_0000010
        PO_0000011
          PO_0025305
        PO_0025304
    PO_0009002
      PO_0000005
        PO_0000006
        PO_0000007
        PO_0000008
      PO_0025606
        PO_0025026
          PO_0000038
          PO_0000191
        

In [13]:
def collect_labels(cls, labels=None):
    if labels is None:
        labels = {}
    for sub in cls.subclasses():
        if hasattr(sub, "label") and sub.label:
            for l in sub.label:
                labels[l.lower()] = sub
        else:
            # fallback на техническое имя
            labels[sub.name.lower()] = sub
        collect_labels(sub, labels)
    return labels

ontology_labels = collect_labels(plant_anatomical_entity)

In [14]:
with open("query Q20011319.json", "r", encoding="utf-8") as f:
    plant_structure_data = json.load(f)

with open("query Q24060707.json", "r", encoding="utf-8") as f:
    plant_organs_data = json.load(f)

In [15]:
def extract_labels(data):
    labels = {}
    for entry in data:
        label = entry["itemLabel"].lower()
        iri = entry["item"]
        labels[label] = iri
    return labels

wikidata_labels = {}
wikidata_labels.update(extract_labels(plant_structure_data))
wikidata_labels.update(extract_labels(plant_organs_data))

In [16]:
# Совпадения
matches = {}
not_in_onto = {}
not_in_wikidata = {}

for label, iri in wikidata_labels.items():
    if label in ontology_labels:
        matches[label] = (ontology_labels[label], iri)
    else:
        not_in_onto[label] = iri

for label in ontology_labels:
    if label not in wikidata_labels:
        not_in_wikidata[label] = ontology_labels[label]

In [17]:
def contains_match(cls):
    """Проверяем, есть ли в этом классе или у его потомков совпадение с matches"""
    label = cls.label[0] if cls.label else cls.name
    if label in matches:
        return True
    for sub in cls.subclasses():
        if contains_match(sub):
            return True
    return False

In [18]:
def add_class_to_ontology(cls, parent_onto_class=None):
    """
    Добавляем класс cls и его потомков в онтологию, если они есть в matches
    или ведут к ним.
    """
    label = cls.label[0] if cls.label else cls.name


    if contains_match(cls):
        # Создаём класс в онтологии
        with onto:
            base = (parent_onto_class,) if parent_onto_class else (Thing,)
            NewClass = types.new_class(cls.label[0].replace(" ", "_"),base)
            #cls.is_a.append(External)
            NewClass.equivalent_to = [cls]
            NewClass.label = [cls.label[0]]  # добавляем label как аннотацию

            if label in matches:
                #q=types.new_class(matches[label][1].replace("http://www.wikidata.org/entity/",""), (External,))
                NewClass.has_wikidata_code.append(matches[label][1].replace("http://www.wikidata.org/entity/",""))
                #NewClass.equivalent_to = [class_q]
                print(NewClass)
        # Рекурсивно добавляем потомков
        for sub in cls.subclasses():
            add_class_to_ontology(sub, parent_onto_class=NewClass)

with onto:
    
    # Стартуем с корневого класса
    add_class_to_ontology(plant_anatomical_entity)

# Сохраняем онтологию
onto.save("taxonomy.owl")

taxonomy.plant_structure
taxonomy.thallus
taxonomy.sieve_tube_element
taxonomy.tracheid
taxonomy.prickle
taxonomy.phloem
taxonomy.xylem
taxonomy.sapwood
taxonomy.vascular_bundle
taxonomy.endosperm
taxonomy.shoot_apical_meristem
taxonomy.cork_cambium
taxonomy.vascular_cambium
taxonomy.procambium
taxonomy.root_meristem
taxonomy.apical_meristem
taxonomy.shoot_apical_meristem
taxonomy.cortex
taxonomy.parenchyma
taxonomy.chlorenchyma
taxonomy.mesophyll
taxonomy.aerenchyma
taxonomy.collenchyma
taxonomy.sclerenchyma
taxonomy.stereome
taxonomy.bark
taxonomy.nectary
taxonomy.seed_coat
taxonomy.perisperm
taxonomy.gemma
taxonomy.hypocotyl
taxonomy.epicotyl
taxonomy.leaflet
taxonomy.style
taxonomy.leaf_sheath
taxonomy.petiole
taxonomy.stipule
taxonomy.elaiosome
taxonomy.ligule
taxonomy.leaf_margin
taxonomy.lamina
taxonomy.plant_ovary
taxonomy.fruit
taxonomy.plant_organ
taxonomy.root
taxonomy.lateral_root
taxonomy.pneumatophore
taxonomy.prop_root
taxonomy.tuberous_root
taxonomy.branch
taxonomy.stol