In [1]:
import pandas as pd
import regex as re

In [4]:
# Import AOP EC table data
# AOP_EC_table = pd.read_csv("../aop_ke_ec.csv")
AOP_EC_table = pd.read_csv("../../aop_wiki_tables/aop_ke_ec.csv")
AOP_KE_table = pd.read_csv("../../aop_wiki_tables/aop_ke_mie_ao.tsv", sep="\t")
AOP_KER_table = pd.read_csv("../../aop_wiki_tables/aop_ke_ker.tsv", sep="\t")
# AOP_KE_table = pd.read_csv("../aop_ke_mie_ao.tsv", sep="\t")
# AOP_KER_table = pd.read_csv("../aop_ke_ker.tsv", sep="\t")

AOP_num = 6

#Set output file name
outfile = f"../aop{AOP_num}_from_script.ttl"
print(AOP_num)

6


In [None]:
AOP_EC = pd.DataFrame()
AOP_EC = AOP_EC_table[AOP_EC_table["AOP"] == f"Aop:{AOP_num}"]
AOP_EC["KE"] = [int(re.sub("Event\:", "", x)) for x in AOP_EC["Key Event"]]

AOP_KE = pd.DataFrame()
AOP_KE = AOP_KE_table[AOP_KE_table["AOP"] == f"Aop:{AOP_num}"]
AOP_KE["KE"] = [int(re.sub("Event\:", "", x)) for x in AOP_KE["Key Event"]]
AO_dict = {}
for index, row in AOP_KE.iterrows():
    AO_dict[row.KE] = re.sub(",", ";", row["Adverse Outcome"])

AOP_KER = pd.DataFrame()
AOP_KER = AOP_KER_table[AOP_KER_table["AOP"] == f"Aop:{AOP_num}"]
AOP_KER["Event1"] = [int(re.sub("Event\:", "", x)) for x in AOP_KER["Event1"]]
AOP_KER["Event2"] = [int(re.sub("Event\:", "", x)) for x in AOP_KER["Event2"]]


KE_pairs = []
for index, row in AOP_KER.iterrows():
    # if row.adjacent == "adjacent":
    KE_pairs += [(row.Event1, row.Event2)]

print (KE_pairs)

In [None]:
# write header 
header = '''@prefix : <http://www.semanticweb.org/mmandal/ontologies/2022/4/untitled-ontology-76#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@base <http://www.w3.org/2002/07/owl#> .

[ rdf:type owl:Ontology ;
   owl:imports <http://purl.obolibrary.org/obo/go/releases/2022-01-13/go.owl> ,
               <http://purl.obolibrary.org/obo/ro/releases/2022-01-20/ro.owl>
 ] .
'''

with open(outfile, "w+") as f:
    f.write(header)

In [None]:
def process_term(input_id, name):
    """
    Takes a term and id and returns ttl class and instance statements
    """
    if ":" in input_id:
        input_id_str = f"{input_id[:2]}_{input_id[3:]}"
    else:
        input_id_str = input_id
    name = re.sub(" ", "_", name)
    class_statement = f'''\n###  http://purl.obolibrary.org/obo/{input_id_str}\n\t<http://purl.obolibrary.org/obo/{input_id_str}> rdf:type owl:Class .\n\n'''
    instance_statement = f'''###  http://www.co-ode.org/ontologies/ont.owl#{name}\n<http://www.co-ode.org/ontologies/ont.owl#{name}> rdf:type owl:NamedIndividual ,\n\t<http://purl.obolibrary.org/obo/{input_id_str}> '''

    return class_statement, instance_statement;

def get_action_id (act,  source_1, id_1, term_1, source_2, id_2, term_2):
#     if act == "": # step action
#     else: # row action
        
    if act == "increased":
        return "RO_0000057"
    else:
        return "RO_0000058"

    

def add_action(act, source_1, id_1, term_1, source_2, id_2, term_2, outfile):
    action_id = get_action_id(act, source_1, id_1, term_1, source_2, id_2, term_2)
    row = f"{act},{action_id},{source_1},{id_1},{term_1},{source_2},{id_2},{term_2}\n"
    # with open(outfile, "a") as f:
    #         f.write(row)
    # prepend_line(outfile, row)
    if act != "":
        action_statement = f";\n\t<http://purl.obolibrary.org/obo/{action_id}> <http://www.co-ode.org/ontologies/ont.owl#{term_2}> "
        return(action_statement)
    else:
        return("")


In [None]:
classes = {}
instances = {}
steps = {}
# Cycle through rows and create classes and instances
outfile_csv = f"../outfile_09122022_AOP{AOP_num}.csv"
KE_dict = {}


for index, row in AOP_EC.iterrows():
    row = row.rename(lambda x: re.sub("[\s\/]", "_", x.lower()))


    # if object_id is not already in classes.keys() add object_id
    if row.object_id not in classes.keys() and not pd.isna(row.object_id):
        class_statement, instance_statement = process_term(row.object_id, row.object_term)
        classes[row.object_id] = class_statement
        instances[row.object_id] = instance_statement
        
    # if process_phenotype_id is not already in instances.keys() add process_phenotype_id
    if row.process_phenotype_id not in instances.keys() and not pd.isna(row.process_phenotype_id):
        class_statement, instance_statement = process_term(row.process_phenotype_id, row.process_phenotype_term)
        classes[row.process_phenotype_id] = class_statement
        instances[row.process_phenotype_id] = instance_statement
    
    row_action_statement = add_action(row.action, row.object_source, row.object_id ,row.object_term, row.process_phenotype_source, row.process_phenotype_id,
                                      row.process_phenotype_term, outfile_csv)

    try:
        KE_dict[row.ke] += [(row.action, row.object_source, row.object_id ,row.object_term, row.process_phenotype_source, row.process_phenotype_id, row.process_phenotype_term)]
    except:
        KE_dict[row.ke] = [(row.action, row.object_source, row.object_id ,row.object_term, row.process_phenotype_source, row.process_phenotype_id, row.process_phenotype_term)]
    
print(KE_dict)
with open(outfile, "a") as f:
            f.write("\n\n#################################################################")     
            f.write("\n#   Classes")  
            f.write("\n#################################################################\n\n")  
for c, s in classes.items():
    with open(outfile, "a") as f:
            f.write(s)
with open(outfile, "a") as f:
            f.write("\n\n#################################################################")     
            f.write("\n#   Instances")  
            f.write("\n#################################################################\n\n")  
for i, s in instances.items():
    with open(outfile, "a") as f:
            f.write(s + ".\n\n")


In [None]:
with open(outfile_csv, "w+") as f:
        f.write(f"action,source_1,id_1,term_1,source_2,id_2,term_2,ke(s), adverse_outcome\n")

prev_ke = ""
for ke_1, ke_2 in KE_pairs:

    for i, e in enumerate(KE_dict[ke_1]):
        if ke_1 != prev_ke:
            row = [x if not pd.isna(x) else "" for x in e]
            row_str = ', '.join(row+[str(ke_1), AO_dict[ke_1]]) + '\n'
            with open(outfile_csv, "a") as f:
                f.write(row_str)
    for i2, e2 in enumerate(KE_dict[ke_2]):

        row2 = [x if not pd.isna(x) else "" for x in e2]
        row2_str = ', '.join(row2+[str(ke_2), AO_dict[ke_2]]) + '\n'
        row_between = [x if not pd.isna(x) else "" for x in row[4:] + row2[4:]]
        row_between_str = ', '.join([""] + row_between + [f"{ke_1}_{ke_2}", ""]) + '\n'
        with open(outfile_csv, "a") as f:
            f.write(row_between_str)
            f.write(row2_str)
    prev_ke = ke_2



In [None]:
print(AO_dict)