In [5]:
import pandas as pd
import re
from rdflib import Graph, URIRef
from google.colab import drive

def parse_predicate_and_object(predicate_obj_str):
    # Regular expression to match the predicate and object within parentheses
    match = re.match(r"(.+?)\((.+),(.+)\)", predicate_obj_str.strip())
    if match:
        predicate_str = match.group(1).strip()
        obj1_str = match.group(2).strip()
        obj2_str = match.group(3).strip()
        return predicate_str, obj1_str, obj2_str
    else:
        raise ValueError("Unexpected format for predicate and object extraction")

drive.mount('/content/drive')
# Load CSV file without headers and assign column names
f_df = pd.read_csv(
    "/content/drive/My Drive/pos-to-neg Experiment results/family-ontology-data/rules-1000-f",
    sep='\t',
    header=None,
    names=["score", "frequency", "probability", "rule"]
)
g = Graph()
# Certainty Assessment Functions 
owl_file = "/content/drive/My Drive/pos-to-neg Experiment results/family-ontology-data/family-2.owl"
print("Parsing file:", owl_file)
try:
    g.parse(owl_file, format="application/rdf+xml")
    print(f"Parsed {len(g)} triples.")
except Exception as e:
    print(f"Error parsing OWL file: {e}")


# Define probability ranges and process each range
ranges = [
    (0.85, 0.90),(0.90, 0.95), (0.95, 1.0)
]

for lower, upper in ranges:
    range_df = f_df[(f_df["probability"] >= lower) & (f_df["probability"] < upper)].copy()
    
    print(f"Range ({lower}, {upper}): {len(range_df)} entries")

    output_file_path = f'/pos-to-neg-rules-family/output ranges/output_{upper}-{lower}.csv'
    
    # Export the range DataFrame to a CSV file
    range_df.to_csv(output_file_path, sep='\t', index=False)
    print(f"Exported {output_file_path} with {len(range_df)} rows.")


Parsing file: /home/jovyan/work/pos-to-neg-rules/family-2.owl
Parsed 2371 triples.
Range (0.85, 0.9): 18 entries
Exported /home/jovyan/work/pos-to-neg-rules-family/output ranges/output_0.9-0.85.csv with 18 rows.
Range (0.9, 0.95): 22 entries
Exported /home/jovyan/work/pos-to-neg-rules-family/output ranges/output_0.95-0.9.csv with 22 rows.
Range (0.95, 1.0): 28 entries
Exported /home/jovyan/work/pos-to-neg-rules-family/output ranges/output_1.0-0.95.csv with 28 rows.


In [6]:
import pandas as pd
import re

# the ranges of rule confidence
ranges = [
    (0.85, 0.90), (0.90, 0.95), (0.95, 1.0)
]

output_file = '/pos-to-neg-rules-family/output ranges/unique_head_relations.txt'

#to store unique relations
unique_relations = set()

def extract_relation(rule):
    """Extract the head relation from a rule."""
    try:
        head = rule.split("<=")[0].strip()  #the head part
        relation = re.search(r'#(\w+)\(', head)  # Match the relation after the last #
        if relation:
            return relation.group(1)
    except Exception as e:
        print(f"Error parsing rule: {rule}, Error: {e}")
    return None

# Process each range
for lower, upper in ranges:
    file_path = f'/pos-to-neg-rules-family/output ranges/output_{upper}-{lower}.csv'
    try:
        df = pd.read_csv(file_path, sep='\t')
        
        if 'rule' in df.columns:
            for rule in df['rule']:
                relation = extract_relation(rule)
                if relation:
                    unique_relations.add(relation)
        else:
            print(f"No 'rule' column found in {file_path}")
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

try:
    with open(output_file, 'w') as f:
        for relation in sorted(unique_relations):
            f.write(f"{relation}\n")
    print(f"Unique head relations saved to {output_file}")
except Exception as e:
    print(f"Error writing to output file: {e}")


Unique head relations saved to /home/jovyan/work/pos-to-neg-rules-family/output ranges/unique_head_relations.txt


In [7]:
input_file = "/pos-to-neg-rules-family/output ranges/unique_head_relations.txt"
output_file = "/pos-to-neg-rules-family/output ranges/sibling_output.txt"

with open(input_file, "r") as infile:
    relations = [line.strip() for line in infile]

# Create relation - candidate relation blocks for LLM step
with open(output_file, "w") as outfile:
    for rule_constant in relations:
        remaining_relations = [rel for rel in relations if rel != rule_constant]
        outfile.write(f"Rule_constant: {rule_constant}\n")
        outfile.write(", ".join(remaining_relations) + "\n\n")

print(f"Relations blocks saved to: {output_file}")


Relations blocks saved to: /home/jovyan/work/pos-to-neg-rules-family/output ranges/sibling_output.txt
