In [1]:
import pandas as pd
from owlready2 import *



In [2]:
onto = get_ontology("/Users/sarahreb/Desktop/local/protege_files/cds-ontology.owl").load()

gndo = Namespace(
    world_or_ontology=onto,
    base_iri="https://d-nb.info/gnd/",
    name="gndo"
)

cds_docs = Namespace(
    world_or_ontology=onto,
    base_iri="https://constance-de-salm.de/archiv/#/document/",
    name="cds_docs"
)

gn = Namespace(
    world_or_ontology=onto,
    base_iri="https://www.geonames.org/",
    name="gn"
)

viaf = Namespace(
    world_or_ontology=onto,
    base_iri="https://viaf.org/viaf/",
    name="viaf"
)

wikidata = Namespace(
    world_or_ontology=onto,
    base_iri="https://www.wikidata.org/wiki/",
    name="wikidata"
)



In [None]:
"""Individuals are instances in ontologies. They are created as any other Python instances. The first parameter is the name (or identifier) of the Individual; it corresponds to the .name attribute in Owlready2. If not given, the name if automatically generated from the Class name and a number.
"""

In [None]:
# Parsing CSV file as DataFrame in order to create individuals.

df = pd.read_csv('../data/retrieved/filtered_cds_data.csv')
df = df.fillna(0)
df

In [None]:
sachindex = pd.read_csv('../data/retrieved/sachindex_additional_data_completed-v2.csv', sep=";")
sachindex = sachindex.drop(columns='Unnamed: 0', axis=1)
sachindex = sachindex.fillna(0)
sachindex

In [None]:
# Adding instances of the CdS letters with senders and addressees, topics and places of exposition.
index = 0
for url in df['URL']:
    new_letter = onto.Letter(re.sub('https://constance-de-salm.de/archiv/#/document/', '', url), namespace=cds_docs)
    new_letter.fud_key = df['FuD-Key'][index]
    new_letter.has_year.append(int(df['year'][index]))
    new_letter.has_decade.append(int(df['decade'][index]))
    new_letter.has_date.append(df['Datierung (JJJJ-MM-TT)'][index])
    new_sender = onto.Sender(re.sub('http://d-nb.info/gnd/', '', df['GND (Verfasser)'][index]), namespace=gndo)
    new_sender.label = df['Verfasser'][index]
    new_sender.has_viaf = df['VIAF (Verfasser)'][index]
    new_letter.has_sender.append(new_sender)
    if df['GND (Empfänger)'][index] == 0 and df['VIAF (Empfänger)'][index] != 0:
        new_addressee = onto.Addressee(re.sub('http://viaf.org/viaf/', '', df['VIAF (Empfänger)'][index]), namespace=viaf)
    elif df['GND (Empfänger)'][index] == 0 and df['VIAF (Empfänger)'][index] == 0:
        new_addressee = onto.Addressee(str(index), namespace=onto)
    else:
        new_addressee = onto.Addressee(re.sub('http://d-nb.info/gnd/', '', df['GND (Empfänger)'][index]), namespace=gndo)
        if not df['VIAF (Empfänger)'][index] == 0:
            new_addressee.has_viaf = df['VIAF (Empfänger)'][index]
    new_addressee.label = df['Empfänger'][index]
    new_letter.has_addressee.append(new_addressee)

    if df['Schlagwörter'][index] == 0:
        empty_topic = onto.Topic("t0", namespace=onto)
        empty_topic.label = "Empty Topic"
        new_letter.has_topic.append(empty_topic)
    else:
        for keyword_list in df['Schlagwörter'][index].split(";"):
            for keyword in keyword_list.split(";"):
                l = [word.strip() for word in keyword.split('/')]
                for word in l:
                    try:
                        w_i = 0
                        entry = sachindex.loc[sachindex['Deutsch'] == word]['Wikidata'].values[0]
                        if entry == 0:
                            new_topic = onto.Topic("t_"+str(int(sachindex[sachindex['Deutsch']==word].index[0])), namespace=onto)
                            new_topic.label = word
                            new_letter.has_topic.append(new_topic)
                        else:
                            new_topic = onto.Topic(re.sub('https://www.wikidata.org/wiki/', '', entry),
                                                   namespace=wikidata)
                            new_topic.label = word
                            new_letter.has_topic.append(new_topic)
                        w_i += 1
                    except ValueError and IndexError:
                        continue


    index += 1

In [None]:
for i in onto.Letter.instances():
    #destroy_entity(i)
    print(i, i.has_date, i.fud_key, i.has_decade, i.has_year, i.has_sender, i.has_addressee, i.has_topic)


for t in onto.Topic.instances():
    print(t, t.label)