In [98]:
import pandas as pd
import regex as re

In [99]:
# Import AOP EC table data
# AOP_EC_table = pd.read_csv("../aop_ke_ec.csv")
AOP_EC_table = pd.read_csv("../aop_150_ke_ec.csv")

#Set output file name
outfile = "../aop150_from_script.ttl"


In [100]:
AOP150 = AOP_EC_table[AOP_EC_table["AOP"] == "Aop:150"]


In [101]:
# write header 
header = '''@prefix : <http://www.semanticweb.org/mmandal/ontologies/2022/4/untitled-ontology-76#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@base <http://www.w3.org/2002/07/owl#> .

[ rdf:type owl:Ontology ;
   owl:imports <http://purl.obolibrary.org/obo/go/releases/2022-01-13/go.owl> ,
               <http://purl.obolibrary.org/obo/ro/releases/2022-01-20/ro.owl>
 ] .
'''

with open(outfile, "w+") as f:
    f.write(header)

In [102]:
def process_term(input_id, name):
    """
    Takes a term and id and returns ttl class and instance statements
    """
    if ":" in input_id:
        input_id_str = f"{input_id[:2]}_{input_id[3:]}"
    else:
        input_id_str = input_id
    name = re.sub(" ", "_", name)
    class_statement = f'''\n###  http://purl.obolibrary.org/obo/{input_id_str}\n\t<http://purl.obolibrary.org/obo/{input_id_str}> rdf:type owl:Class .\n\n'''
    instance_statement = f'''###  http://www.co-ode.org/ontologies/ont.owl#{name}\n<http://www.co-ode.org/ontologies/ont.owl#{name}> rdf:type owl:NamedIndividual ,\n\t<http://purl.obolibrary.org/obo/{input_id_str}> '''

    return class_statement, instance_statement;

def get_action_id(action):
    """
    Takes an action word and returns an RO relationship ID
    simplified version
    """
    if action == "increased":
        return "RO_0000057"
    elif action == "decreased":
        return "RO_0000058"
    else:
        return ""

def get_action_id (act,  source_1, id_1, term_1, source_2, id_2, term_2):
#     if act == "": # step action
#     else: # row action
        
    if act == "increased":
        return "RO_0000057"
    else:
        return "RO_0000058"

    
def get_action_id_steps(act, source_1, id_1, term_1, source_2, id_2, term_2):
    print(act, source_1, id_1, term_1, source_2, id_2, term_2)
    return "RO_0000057"
#     if step1_source == "GO" and step2_source == "GO":
#         if action == "reduced":
#             return "RO_0000057"
#     elif action == "decreased":
#         return "RO_0000058"
#     else:
#         return ""
    
def add_action(act, source_1, id_1, term_1, source_2, id_2, term_2):
    action_id = get_action_id_steps(act, source_1, id_1, term_1, source_2, id_2, term_2)
        
    if action_id != "":
        action_statement = f";\n\t<http://purl.obolibrary.org/obo/{action_id}> <http://www.co-ode.org/ontologies/ont.owl#{term_2}> "
        return(action_statement)
    else:
        return("")

    
# def add_action(action, obj_name, proc_phen_name):
def add_step_action(step1_ao, step1_source, step2_ao, step2_source, pp_term_1, pp_term_2):

#     action_id = get_action_id(action)
    action_id = get_action_id_steps(step1_ao, step1_source, step2_ao, step2_source)
#     proc_phen_name = re.sub(" ", "_", proc_phen_name)
    if action_id != "":
        action_statement = f";\n\t<http://purl.obolibrary.org/obo/{action_id}> <http://www.co-ode.org/ontologies/ont.owl#{pp2_term}> "
        return(action_statement)
    else:
        return("")

In [103]:
classes = {}
instances = {}
steps = {}
# Cycle through rows and create classes and instances
for index, row in AOP150.iterrows():
    row = row.rename(lambda x: re.sub("[\s\/]", "_", x.lower()))
    
    # if object_id is not already in classes.keys() add object_id
    if row.object_id not in classes.keys() and not pd.isna(row.object_id):
        class_statement, instance_statement = process_term(row.object_id, row.object_term)
        classes[row.object_id] = class_statement
        instances[row.object_id] = instance_statement
        
    # if process_phenotype_id is not already in instances.keys() add process_phenotype_id
    if row.process_phenotype_id not in instances.keys() and not pd.isna(row.process_phenotype_id):
        class_statement, instance_statement = process_term(row.process_phenotype_id, row.process_phenotype_term)
        classes[row.process_phenotype_id] = class_statement
        instances[row.process_phenotype_id] = instance_statement
    step = int(row.step)
    if step in steps.keys():
        steps[step] = steps[step] + [(row.action, row.process_phenotype_source, row.process_phenotype_id, row.process_phenotype_term, row.object_id, row.object_source, row.object_term)]
    else:
        steps[step] = [(row.action, row.process_phenotype_source, row.process_phenotype_id, row.process_phenotype_term, row.object_id, row.object_source, row.object_term)]
#     steps[step] = [(row.action, row.process_phenotype_source, row.process_phenotype_id, row.process_phenotype_term, row.object_id, row.object_source, row.object_term)]
print(steps)
for step in range(1,max(steps.keys())+1):
    for step1_item in (steps[step]):
        act_1, prph_source_1, prph_id_1,prph_term_1, obj_id_1, obj_source_1, obj_term_1 = step1_item
        if not pd.isna(obj_id_1):
                action_statement = add_action(act_1, prph_source_1, prph_id_1,prph_term_1, obj_id_1, obj_source_1, obj_term_1)
                instances[prph_id_1] = instances[prph_id_1] + action_statement
        if step>= max(steps.keys()):
            continue
        for step2_item in (steps[step+1]):
            act_2, prph_source_2, prph_id_2,prph_term_2, obj_id_2, obj_source_2, obj_term_2 = step2_item
            action_statement = add_action("", prph_source_1, prph_id_1,prph_term_1, prph_source_2, prph_id_2,prph_term_2)
            instances[prph_id_1] = instances[prph_id_1] + action_statement
# print(steps)            
with open(outfile, "a") as f:
            f.write("\n\n#################################################################")     
            f.write("\n#   Classes")  
            f.write("\n#################################################################\n\n")  
for c, s in classes.items():
    with open(outfile, "a") as f:
            f.write(s)
with open(outfile, "a") as f:
            f.write("\n\n#################################################################")     
            f.write("\n#   Instances")  
            f.write("\n#################################################################\n\n")  
for i, s in instances.items():
    with open(outfile, "a") as f:
            f.write(s + ".\n\n")


{1: [('increased', 'GO', 'GO:0004874', 'aryl hydrocarbon receptor activity', 'PR:000003858', 'PR', 'aryl hydrocarbon receptor')], 2: [('increased', 'GO', 'GO:0046983', 'protein dimerization activity', 'PR:000003858', 'PR', 'aryl hydrocarbon receptor'), ('increased', 'GO', 'GO:0046983', 'protein dimerization activity', 'PR:000004303', 'PR', 'aryl hydrocarbon receptor nuclear translocator')], 3: [('decreased', 'GO', 'GO:0046983', 'protein dimerization activity', 'PR:000008555', 'PR', 'hypoxia-inducible factor 1-alpha'), ('decreased', 'GO', 'GO:0046983', 'protein dimerization activity', 'PR:000004303', 'PR', 'aryl hydrocarbon receptor nuclear translocator')], 5: [('abnormal', 'GO', 'GO:0003158', 'endothelium development', nan, nan, nan)], 6: [('morphological change', 'MP', 'MP:0001544', 'abnormal cardiovascular system physiology', nan, nan, nan), ('abnormal', 'GO', 'GO:0072358', 'cardiovascular system development', 'UBERON:0004535', 'UBERON', 'cardiovascular system')], 7: [('increased', '