# Experimentation Mainframe

This notebook automates the synthesis of SHACL constraints from natural language using LLMs, validates their syntax, and evaluates them through scenario-based mutation testing.

## Setup and Dependencies

This section imports all necessary libraries and modules required for the experimentation framework. It includes standard library modules, third-party packages for graph processing and validation and local utilities from the src directory.

In [None]:
# Standard library imports
import datetime
import os
import yaml

# Third-party imports
import pandas as pd
from pyshacl import validate
from rdflib import Graph, Namespace
from tqdm.auto import tqdm

# Local imports
from src.llm_utils import GeminiExhaustedException
from src.pipeline_core import run_main_pipeline
from src.testing_utils import apply_mutations, flush_context_to_csv, parse_validation_report

## Configuration and Context Initialization

Here, we define the experiment parameters and a function to set up the initial context for each run. The context tracks metadata, artifacts and results throughout the pipeline.

In [None]:
# Configuration variables for the experiment
DOCUMENT_NAME = "parental_leave"
PROMPT_VERSION = 'Default'
GEMINI_MODEL = "gemini-2.5-flash"
CSV_FILE = "Master_Results.csv"
ARTIFACT_DIRECTORY = "Testing Artifacts"
NUM_RUNS = 1

# Create the CSV file if it does not already exist
if not os.path.exists(CSV_FILE):
    CSV_HEADERS = [
    "Run ID", "Timestamp", "Document Name", "Prompts", "Model Name",
    "Service Graph Hash", "SHACL Graph Hash", "SHACL Valid Syntax",
    "SHACL Error Type", "SHACL Error Message", "Execution Time",
    "Scenario ID", "Scenario Description", "Expected Violation Count",
    "Actual Violation Count", "Violated Shapes", "Violation Messages",
    "Successfully Executed"
    ]
    pd.DataFrame(columns=CSV_HEADERS).to_csv(CSV_FILE, index=False)

# Determine the next run ID by checking the existing CSV
df = pd.read_csv(CSV_FILE, usecols=[0])
if df.empty:
    last_run_id = 0
else: 
    last_run_id = df.iloc[:, 0].max()

# Function to initialize the context dictionary for a single experiment run
def initialize_run_context(run_id, doc_name, model_name):
    return {
        # Metadata section
        "Run ID": run_id,
        "Timestamp": datetime.datetime.now().isoformat(sep=" ", timespec="seconds"),
        "Document Name": doc_name,
        "Prompts":  PROMPT_VERSION,
        "Model Name": model_name,
        
        # Pipeline artifacts placeholders
        "Service Graph Hash": "N/A",
        "SHACL Graph Hash": "N/A",
        "SHACL Valid Syntax": "N/A",
        "SHACL Error Type": "N/A",
        "SHACL Error Message": "N/A",
        "Execution Time": "N/A",
        
        # Scenario specifics (will be overwritten per scenario)
        "Scenario ID": "N/A",
        "Scenario Description": "N/A",
        "Expected Violation Count": "N/A",
        "Actual Violation Count": "N/A",
        "Violated Shapes": "N/A",
        "Violation Messages": "N/A",
        
        # Execution stats
        "Successfully Executed": False,
    }

## Main Experiment Execution

The core loop of the experiment runs multiple iterations, each generating SHACL shapes, performing mutations based on the predefined scenarios and validating them.

In [None]:
# Main experiment loop
with tqdm(total=NUM_RUNS, desc="Initializing...") as pbar:
    for run_id in range(last_run_id+1, last_run_id+NUM_RUNS+1):
        try:
            # Create artifact directory for this run
            artifact_dir = f"{ARTIFACT_DIRECTORY}/RUN_{run_id}_{DOCUMENT_NAME}" 
            if not os.path.exists(artifact_dir):
                os.makedirs(artifact_dir)
            else:
                raise FileExistsError(f"Artifact directory {artifact_dir} already exists. Aborting to prevent overwriting.")    
            
            # Initialize context for this run
            ctx = initialize_run_context(run_id, DOCUMENT_NAME, GEMINI_MODEL)
            
            # Run the main pipeline, creating artifacts and updating context
            ctx = run_main_pipeline(ctx, artifact_dir, pbar, DOCUMENT_NAME, PROMPT_VERSION, GEMINI_MODEL, run_id)
            
            if ctx["SHACL Valid Syntax"]: # Begin scenario testing only if SHACL syntax is valid
                
                # Load the Golden Citizen (Baseline) graph
                golden_ttl = f"Citizens/{DOCUMENT_NAME} eligible.ttl"
                golden_graph = Graph()
                golden_graph.parse(golden_ttl, format="turtle")
                golden_graph.bind("", Namespace("http://example.org/schema#"))
                
                # Load SHACL Shapes Graph
                shacl_ttl = f"{artifact_dir}/{DOCUMENT_NAME} shacl shapes.ttl"
                shacl_graph = Graph()
                shacl_graph.parse(shacl_ttl, format="turtle")
                shacl_graph.bind("", Namespace("http://example.org/schema#"))

                # Load the Scenarios from YAML
                with open(f"Citizens/{DOCUMENT_NAME} scenarios.yaml", "r") as f:
                    scenarios = yaml.safe_load(f)

                # Iterate through each scenario
                for scn in scenarios:
                    # Set scenario details in context
                    ctx["Scenario ID"] = scn['id']
                    ctx["Scenario Description"] = scn['description']
                    ctx["Expected Violation Count"] = scn['expected_violation_count']
                    
                    # Apply mutations to create a new mutated graph (leaving golden_graph untouched)
                    mutated_graph = apply_mutations(golden_graph, scn['actions'])

                    # Validate the mutated graph against SHACL shapes
                    conforms, results_graph, results_text = validate(
                        data_graph=mutated_graph,
                        shacl_graph=shacl_graph,    
                        inference='rdfs',
                    )
                    
                    # Parse the validation report
                    parse_result = parse_validation_report(conforms, results_graph, results_text, shacl_graph)
                    ctx["Actual Violation Count"] = parse_result["violation_count"]
                    ctx["Violated Shapes"] = parse_result["failed_shapes"]
                    ctx["Violation Messages"] = parse_result["messages"]
                    
                    # Mark scenario as successfully executed if we made it this far
                    ctx["Successfully Executed"] = True
                    flush_context_to_csv(ctx, CSV_FILE) 
            else:
                # SHACL syntax was invalid, log and move on to the next run
                flush_context_to_csv(ctx, CSV_FILE)
                
        except GeminiExhaustedException:
            # Handle API exhaustion error
            print(f"ðŸ›‘ FATAL: Gemini exhausted too many times. Check your API usage limits.")
            break # Exit the entire run loop
                
        except Exception as e: 
            # Handle unexpected errors
            ctx["Successfully Executed"] = str(e).replace("\n", " ")[:250] # Truncate long error messages
            flush_context_to_csv(ctx, CSV_FILE)
            
        finally:
            # Report end of the run to console
            status_msg = "Everything went well!" if ctx["Successfully Executed"] is True else "Exited with some errors."
            pbar.write(f"Logged Run {ctx['Run ID']} to CSV. {status_msg}") 
            pbar.update(1) # Increment progress bar by 1