In [25]:
import ollama
import re
from pyparsing import Word, alphas, Suppress, Group, Forward, OneOrMore, opAssoc, infixNotation
from pyparsing import Keyword, CaselessKeyword, printables
from pyparsing import restOfLine, quotedString
from collections import deque
from nltk.translate.bleu_score import sentence_bleu

from sympy import Symbol
from sympy.logic.boolalg import Equivalent

import nltk
# nltk.download('punkt')
# nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize

In [26]:
# Add a function to check for logical equivalence (conceptual)
def are_logically_equivalent(logic1: str, logic2: str, var_map: dict) -> bool:
    """
    CONCEPTUAL FUNCTION:
    Checks if two boolean logic strings are semantically equivalent.
    This would be implemented using a formal logic library like pyeda or sympy.logic.
    For now, it returns True only if the strings are identical, which is a weak check.
    """
    # In a real implementation:
    # 1. Parse logic1 and logic2 into ASTs.
    # 2. Normalize variables using var_map.
    # 3. Use pyeda.expr.expr('logic_string').equivalent(pyeda.expr.expr('other_logic_string'))
    # For this conceptual implementation, we'll just do a strict string comparison.
    return logic1.strip() == logic2.strip()

In [27]:
def ask_llm_for_boolean_logic(natural_language_requirement: str, bool_vars:list) -> str:
    prompt = f"""
    You are an expert in formal logic and system specifications.
    Your task is to translate natural language design requirements into Boolean logic expressions.

    Rules for Boolean Logic:
    - Use 'AND' for logical conjunction.
    - Use 'OR' for logical disjunction.
    - Use 'NOT' for logical negation.
    - Use 'IMPLIES' for logical implication (A IMPLIES B).
    - Use parentheses for grouping everywhere so that its neatly understandable.
    - Identify the clauses properly with more precision in the sentence and convert them into boolean specifications properly.
    - Identify the main subject (e.g., 'software installation', 'system activation', 'alarm sounding') and include it as a variable in the final Boolean logic expression.
    - Do not include any explanations, preamble, or additional text. Only output the Boolean logic expression.
    - When you see 'only if' (not 'if') in the requirement like A only if B it is 'A implies B' not 'B implies A'
    - When you see 'if and only if' (not 'only if') in the requirement like A if and only if B it is 'A implies B AND B implies A'

    Use these boolean variables : "{bool_vars}"

    Natural Language Requirement: "{natural_language_requirement}"

    Boolean Logic:
    """
    try:
        response = ollama.chat(
            model='llama3',
            messages=[{'role': 'user', 'content': prompt}],
            options={'temperature': 0.1, 'num_predict': 128}
        )
        llm_output = response['message']['content'].strip()
        # print(f"LLM Raw Output:\n{llm_output}\n")
        lines = [line.strip() for line in llm_output.split('\n') if line.strip()]
        if lines:
            return lines[0]
        else:
            return "Error: LLM returned empty response."
    except Exception as e:
        return f"Error communicating with Ollama: {e}"

In [28]:
def measure_completeness(original_requirement: str, full_logic: str):
    """
    Measures LLM completeness by removing sentences and checking if the logic changes.
    """
    print("\n--- Measuring Completeness via Sentence Deletion ---")
    
    # 1. Split the original requirement into sentences
    sentences = sent_tokenize(original_requirement)
    if len(sentences) <= 1:
        print("  Skipping completeness check: Requirement is a single sentence.")
        return 0, 0 # Returns (sentences_incorporated, total_sentences)
    
    sentences_incorporated = 0
    total_sentences = len(sentences)
    
    for i, sentence_to_remove in enumerate(sentences):
        # 2. Create a modified requirement by removing one sentence
        modified_sentences = [s for j, s in enumerate(sentences) if i != j]
        modified_requirement = " ".join(modified_sentences)
        
        print(f"\n  --- Deleting Sentence #{i+1} ---")
        print(f"  Sentence Removed: '{sentence_to_remove}'")
        print(f"  Modified Requirement: '{modified_requirement}'")
        
        # 3. Get the LLM's new logic for the modified requirement
        try:
            modified_logic = ask_llm_for_boolean_logic(modified_requirement)
        except Exception as e:
            print(f"  Error getting modified logic from LLM: {e}")
            continue

        print(f"  Modified Logic: '{modified_logic}'")
        print(f"  Full Logic:     '{full_logic}'")

        # 4. Check for logical equivalence (Conceptual)
        # This part requires a real equivalence checker. For now, we'll just do a strict string match.
        is_equivalent = are_logically_equivalent(full_logic, modified_logic, {})
        
        if not is_equivalent:
            print("  -> LOGIC CHANGED. Sentence was successfully incorporated.")
            sentences_incorporated += 1
        else:
            print("  -> LOGIC DID NOT CHANGE. Sentence was NOT incorporated (possible incompleteness).")
            # Note: This could also mean the sentence was redundant.
            
    completeness_score = sentences_incorporated / total_sentences if total_sentences > 0 else 0
    print(f"\nCompleteness Score: {sentences_incorporated}/{total_sentences} ({completeness_score:.2f})")
    return sentences_incorporated, total_sentences

In [29]:
# --- Pyparsing Grammar and Helper Functions ---
def parse_boolean_logic(expression_string: str):
    AND = CaselessKeyword("AND")
    OR = CaselessKeyword("OR")
    NOT = CaselessKeyword("NOT")
    IMPLIES = CaselessKeyword("IMPLIES")
    variable_name = Word(alphas, alphas + "_")
    operand = Group(variable_name | Suppress("(") + Forward().setResultsName("nested_expr") + Suppress(")"))
    boolean_expression = Forward()
    boolean_expression <<= infixNotation(
    operand,
    [
        (NOT, 1, opAssoc.RIGHT),  # Highest precedence
        (AND, 2, opAssoc.LEFT),
        (OR, 2, opAssoc.LEFT),
        (IMPLIES, 2, opAssoc.RIGHT),  # Lowest precedence
    ]
    )
    try:
        parsed_expression = boolean_expression.parseString(expression_string, parseAll=True)
        return parsed_expression[0]
    except Exception as e:
        print(f"Error parsing expression: {e}")
        return None

In [30]:
def _get_operator_and_operands(node):
    if not isinstance(node, list):
        return None, node
    if len(node) == 2 and isinstance(node[0], str) and node[0].upper() == 'NOT':
        return node[0].upper(), [node[1]]
    if len(node) > 3 and all(isinstance(node[i], str) and node[i].upper() == node[1].upper() for i in range(1, len(node), 2)):
        current_op = node[1].upper()
        last_op_index = -1
        for i in range(len(node) - 1, 0, -1):
            if isinstance(node[i], str) and node[i].upper() == current_op:
                last_op_index = i
                break
        if last_op_index != -1:
            left_subtree_node = node[0:last_op_index]
            right_operand_node = node[last_op_index+1]
            return current_op, [left_subtree_node, right_operand_node]
    elif len(node) == 3 and isinstance(node[1], str) and node[1].upper() in ['AND', 'OR', 'IMPLIES']:
        return node[1].upper(), [node[0], node[2]]
    elif len(node) == 1:
        return _get_operator_and_operands(node[0])
    return None, None

In [31]:
def visualize_tree(node, level=0, prefix="Node: "):
    indent = "  " * level
    op, operands = _get_operator_and_operands(node)
    if op:
        print(f"{indent}{prefix}{op}")
        if op == 'NOT':
            visualize_tree(operands[0], level + 1, "Operand: ")
        elif op in ['AND', 'OR', 'IMPLIES']:
            visualize_tree(operands[0], level + 1, "Left: ")
            visualize_tree(operands[1], level + 1, "Right: ")
    elif operands is not None:
        print(f"{indent}{prefix}{operands}")
    else:
        print(f"{indent}{prefix}UNEXPECTED NODE (Please report): {node} (Type: {type(node).__name__})")
        if isinstance(node, list):
            for item in node:
                visualize_tree(item, level + 1, "Item: ")

In [32]:
def pretty_print_tree(node, indent=0):
    prefix = "  " * indent
    op, operands = _get_operator_and_operands(node)
    if op:
        print(f"{prefix}{op}")
        for operand in operands:
            pretty_print_tree(operand, indent + 1)
    elif operands is not None:
        print(f"{prefix}{operands}")
    else:
        print(f"{prefix}UNEXPECTED NODE (Please report): {node} (Type: {type(node).__name__})")
        if isinstance(node, list):
            for item in node:
                pretty_print_tree(item, indent + 1)

In [33]:
def extract_variables(node, variables: set):
    op, operands = _get_operator_and_operands(node)
    if op:
        for operand in operands:
            extract_variables(operand, variables)
    elif operands is not None:
        variables.add(operands)

In [34]:
# --- New Function for Semantic Equivalence Mapping ---
def get_semantic_equivalents(variable_name: str) -> list[str]:
    """
    Asks the LLM for a list of semantically equivalent terms for a given variable.
    """
    prompt = f"""
    Given the following concept expressed as a capitalized variable name, provide a comma-separated list of 
    semantically equivalent or synonymous terms. The terms should also be in capitalized, underscore-separated format.
    Do not include any explanations, preamble, or the original term itself.
    
    Example:
    'TEMPERATURE_HIGH' -> 'HIGH_TEMPERATURE,TEMP_EXCEEDS_THRESHOLD'
    'USER_AUTHENTICATED' -> 'AUTHENTICATED_USER,USER_LOGGED_IN'
    
    Input Variable: '{variable_name}'
    
    Equivalent Terms:
    """
    try:
        response = ollama.chat(
            model='llama3',
            messages=[{'role': 'user', 'content': prompt}],
            options={'temperature': 0.1, 'num_predict': 128}
        )
        llm_output = response['message']['content'].strip()
        terms = [term.strip() for term in llm_output.split(',') if term.strip()]
        return terms
    except Exception as e:
        print(f"Error getting semantic equivalents for '{variable_name}': {e}")
        return []

In [35]:
# --- Your original evaluation metrics (commented out for now) ---
def evaluate_metrics(llm_outputs: list[str], ground_truths: list[str]):
    """
    Evaluates a batch of LLM outputs against their corresponding ground truths.
    Calculates overall accuracy and false positive rate.
    """
    assert len(llm_outputs) == len(ground_truths), "Mismatched list lengths"
    R = W = 0
    GT = RS = len(ground_truths)
    for llm_output, ground_truth in zip(llm_outputs, ground_truths):
        llm_clean = llm_output.strip().replace(" ", "")
        gt_clean = ground_truth.strip().replace(" ", "")
        if llm_clean == gt_clean:
            R += 1
        else:
            W += 1
    accuracy = R / GT if GT > 0 else 0
    false_positive = W / RS if RS > 0 else 0
    return accuracy, false_positive, R, W, GT, RS

In [36]:
def calculate_bleu_score(llm_output: str, ground_truth: str) -> float:
    reference = [ground_truth.split()]
    candidate = llm_output.split()
    return sentence_bleu(reference, candidate)

In [37]:
# This function sends a prompt to the LLM to perform deconstruction.
def ask_llm_for_deconstruction(natural_language_requirement: str) -> str:
    prompt = f"""
    You are an expert in formal logic and system specifications.
    Your task is to break down a complex natural language requirement into a list of simple, atomic sentences.
    Each sentence should represent a single, clear logical proposition.
    Ensure that the output is concise and free of repetitive or redundant sentences; if similar sentences occur, only include one version.

    Rules for Output:
    - Return a comma-separated list of the unique simple sentences.
    - Do not include any explanations, preamble, or additional text.
    - Do not use any boolean logic keywords (AND, OR, NOT, IMPLIES).
    
    Example:
    Input: "If a fire is detected, an alert must activate, and the system should only be active during business hours."
    Output: "a fire is detected, an alert must activate, the system should only be active during business hours"

    Natural Language Requirement: "{natural_language_requirement}"

    Simple Sentences:
    """
    try:
        response = ollama.chat(
            model='llama3',
            messages=[{'role': 'user', 'content': prompt}],
            options={'temperature': 0.1, 'num_predict': 256}
        )
        llm_output = response['message']['content'].strip()
        return llm_output
    except Exception as e:
        return f"Error communicating with Ollama: {e}"

In [38]:
# This is the new `deconstruct_requirement` function.
def deconstruct_requirement(natural_language: str) -> list[str]:
    """
    Asks an LLM to deconstruct a single, complex requirement into a list of simple sentences.
    """
    print("DEBUG: Asking LLM to deconstruct the requirement.")
    llm_output_string = ask_llm_for_deconstruction(natural_language)
    
    # Parse the comma-separated list from the LLM output.
    simple_sentences = [s.strip() for s in llm_output_string.split(',') if s.strip()]
    
    return simple_sentences

In [39]:
def are_logically_equivalent_conceptual(logic1: str, logic2: str) -> bool:
    """
    CONCEPTUAL IMPLEMENTATION: Checks if two boolean logic strings are semantically equivalent
    using a formal logic library. This code is for demonstration and will not run
    without the 'sympy' library installed.
    """
    try:
        normalized_logic1 = logic1
        normalized_logic2 = logic2
        
        parsed_expr1 = parse_boolean_expression_to_sympy(normalized_logic1)
        parsed_expr2 = parse_boolean_expression_to_sympy(normalized_logic2)
        
        return bool(Equivalent(parsed_expr1, parsed_expr2))

    except Exception as e:
        print(f"Error during logical equivalence check: {e}")
        return False
    
def parse_boolean_expression_to_sympy(expression: str):

    expression = expression.replace('AND', '&').replace('OR', '|').replace('NOT', '~').replace('IMPLIES', '>>')
    variables = re.findall(r'\b[A-Z_]+\b', expression)
    symbols = {var: Symbol(var) for var in variables}

    return expression

In [40]:
def check_logic(c: str, s_list: list[str], d: str, pos: int, n: int, logic_main: str) -> None:
    if(pos==n-1):
        prompt_and_s = f"{d} Additionally, {s_list[pos]}."
        print(f"\n  Testing Another Case: ")
        print(f"  Input Prompt: '{prompt_and_s}'")
        
        logic_and_s = ask_llm_for_boolean_logic(prompt_and_s)
        print(f"  LLM Output: {logic_and_s}")
        s_boolean = ask_llm_for_boolean_logic(s_list[pos])
        curr_logic = f"{logic_main} AND ({s_boolean})"

        are_they = are_logically_equivalent_conceptual(logic_and_s, curr_logic)
        if(are_they):
            print("Consistent!!!!!!")
        else:
            print("Not Consistent!!!!!")

        print("\n------------------------------------")

        prompt_and_not_s = f"{d} However, it is not the case that {s_list[pos]}."
        print(f"\n  Testing Another Case: ")
        print(f"  Input Prompt: '{prompt_and_not_s}'")
        logic_and_not_s = ask_llm_for_boolean_logic(prompt_and_not_s)
        print(f"  LLM Output: {logic_and_not_s}")
        not_logic = f"It is not the case that {s_list[pos]}"
        s_boolean = ask_llm_for_boolean_logic(not_logic)
        curr_logic = f"{logic_main} AND ({s_boolean})"

        are_they = are_logically_equivalent_conceptual(logic_and_not_s, curr_logic)
        if(are_they):
            print("Consistent!!!!!!")
        else:
            print("Not Consistent!!!!!")

        print("\n------------------------------------")

        prompt = f"{d}"
        print(f"\n  Testing Another Case: ")
        print(f"  Input Prompt: '{prompt}'")
        logic = ask_llm_for_boolean_logic(prompt)
        print(f"  LLM Output: {logic}")

        are_they = are_logically_equivalent_conceptual(logic, logic_main)
        if(are_they):
            print("Consistent!!!!!!")
        else:
            print("Not Consistent!!!!!")

        print("\n------------------------------------")
    else:
        prompt_and_s = f"{d} Additionally, {s_list[pos]}."
        s_boolean = ask_llm_for_boolean_logic(s_list[pos])
        curr_logic = f"{logic_main} AND ({s_boolean})"
        check_logic(c,s_list,prompt_and_s,pos+1,n,curr_logic)

        prompt_and_not_s = f"{d} However, it is not the case that {s_list[pos]}."
        not_logic = f"It is not the case that {s_list[pos]}"
        s_boolean = ask_llm_for_boolean_logic(not_logic)
        curr_logic = f"{logic_main} AND ({s_boolean})"
        check_logic(c,s_list,prompt_and_not_s,pos+1,n,curr_logic)
        
        prompt = f"{d}"
        check_logic(c,s_list,prompt,pos+1,n,logic_main)

In [41]:
def evaluate_compositional_logic(c: str, s_list: list[str], logic: str) -> None:
    """
    Evaluates the LLM's ability to handle compositional logic (c AND s) and (c AND (NOT s)).
    """
    print("\n--- Evaluating Compositional Logic ---")
    
    n = len(s_list)
    check_logic(c,s_list,c,0,n,logic)

In [42]:
def ask_llm_direct(prompt: str) -> str:
    """
    Sends the exact prompt string to the LLM without any additional formatting.
    Returns the raw LLM response.
    """
    try:
        response = ollama.chat(
            model='llama3',
            messages=[{'role': 'user', 'content': prompt}],
            options={'temperature': 0.1, 'num_predict': 128}
        )
        return response['message']['content'].strip()
    except Exception as e:
        return f"Error communicating with Ollama: {e}"

def manual_prompt_testing(natural_language_requirement: str = None, llm_boolean_logic_output: str = None):
    """
    Allows you to manually test different prompts and count attempts.
    """
    attempts = 0
    results = []
    
    print("Manual Prompt Testing Mode - Type your prompts directly")
    print("Type 'quit' to exit\n")
    
    while True:
        user_prompt = input("Enter your prompt: ").strip()
        
        if user_prompt.lower() == 'quit':
            break

        prompt = f"""
                    You are an expert in formal logic and system specifications.
                    Your task is to translate natural language design requirements into Boolean logic expressions.

    Rules for Boolean Logic:
    - Use 'AND' for logical conjunction.
    - Use 'OR' for logical disjunction.
    - Use 'NOT' for logical negation.
    - Use 'IMPLIES' for logical implication (A IMPLIES B).
    - Use parentheses for grouping everywhere so that its neatly understandable.
    - Identify the clauses properly with more precision in the sentence and convert them into boolean specifications properly.
    - Variables should be capitalized single words (e.g., 'DOOR_OPEN', 'ALARM_ACTIVE').
    - Identify the main subject (e.g., 'software installation', 'system activation', 'alarm sounding') and include it as a variable in the final Boolean logic expression.
    - Do not include any explanations, preamble, or additional text. Only output the Boolean logic expression.
    - When you see 'only if' (not 'if') in the requirement like A only if B it is 'A implies B' not 'B implies A'
    - When you see 'if and only if' (not 'only if') in the requirement like A if and only if B it is 'A implies B AND B implies A'

                    Natural Language Requirement: "{natural_language_requirement}"

                    LLM Boolean Logic Output: "{llm_boolean_logic_output}"
                    The issue: {user_prompt}
                    
                    Generate the appropriate boolean logic expression for the above problem.
                    Give only the boolean logic expression as output.
                    """
            
        attempts += 1
        llm_response = ask_llm_direct(prompt)
        
        print(f"\nAttempt {attempts}:")
        print(f"Your prompt: '{prompt}'")
        print(f"LLM response: {llm_response}\n")
        
        results.append({
            'attempt': attempts,
            'prompt': user_prompt,
            'response': llm_response
        })
        llm_boolean_logic_output = llm_response
    
    print(f"\nTesting completed. Total attempts: {attempts}")
    return results, attempts


In [43]:
def ask_llm_for_boolean_variables(simple_sentences: list):
    """
    Breaks a requirement into simple sentences and asks the LLM to
    convert each sentence into a single Boolean variable name.
    """
    
    variable_map = {}

    for sentence in enumerate(simple_sentences, start=1):
        prompt = f"""
        You are an expert in formal logic and system specifications.
        Convert the following simple requirement sentence into a Boolean variable name.

        Rules:
        - Use CAPITALIZED words with underscores (e.g., DOOR_OPEN, SYSTEM_ACTIVE).
        - The variable should directly represent the condition or event described in the sentence.
        - Do NOT write Boolean operators (AND, OR, NOT, IMPLIES).
        - If the sentence expresses a negative condition (e.g., contains "not", "no", etc.), generate the variable name representing the positive form. For example, "Notifications will not appear" should become "NOTIFICATIONS_APPEARS".
        - Output only the variable name, nothing else.

        Sentence: "{sentence}"

        Boolean Variable:
        """
        try:
            response = ollama.chat(
                model="llama3",
                messages=[{"role": "user", "content": prompt}],
                options={"temperature": 0.1, "num_predict": 32}
            )
            var_name = response["message"]["content"].strip().split()[0]
            variable_map[sentence] = var_name
        except Exception as e:
            variable_map[sentence] = f"Error: {e}"

    return variable_map


In [44]:
def check_semantic_consistency(requirement: str, boolean_logic: str, bool_vars: list) -> str:
    """
    Asks the LLM to check if the Boolean logic is semantically consistent 
    with the natural language requirement.
    Returns 'consistent', 'inconsistent', or a descriptive correction.
    """
    prompt = f"""
    Requirement: "{requirement}"
    Boolean Logic: "{boolean_logic}"
    Allowed Boolean Variables: {bool_vars}

    Task:
    - Check if the Boolean logic correctly represents the requirement.
    - If consistent, reply exactly: CONSISTENT
    - If inconsistent, reply with the corrected boolean logic. Give only the boolean logic expression as output.

    """

    try:
        response = ollama.chat(
            model="llama3",
            messages=[{"role": "user", "content": prompt}],
            options={"temperature": 0.2, "num_predict": 256}
        )
        return response["message"]["content"].strip()
    except Exception as e:
        return f"Error checking consistency: {e}"

In [45]:
# Robust comparator that works with SymPy exprs, pyparsing ParseResults, and nested lists/tuples.
from sympy import sympify, Symbol
import re
try:
    from pyparsing import ParseResults
except Exception:
    class ParseResults:  # fallback dummy if pyparsing not available at import time
        pass

VAR_TOKEN_RE = re.compile(r"\b[A-Z][A-Z0-9_]*\b")  # heuristic for variables like SERVICE_STARTS_AUTOMATICALLY

def is_sympy_expr(obj):
    # ensure 'atoms' is callable (to avoid objects with an 'atoms' attribute that's not the method)
    return hasattr(obj, 'atoms') and callable(getattr(obj, 'atoms')) and hasattr(obj, 'args')

def is_parse_results(obj):
    return isinstance(obj, ParseResults)

def normalize_expr_if_str(expr_input):
    """If expr_input is a string using NOT/AND/OR/IF tokens, try to sympify, otherwise return original."""
    if expr_input is None:
        return None
    if isinstance(expr_input, str):
        s = expr_input.upper()
        s = re.sub(r"\bNOT\b", "~", s)
        s = re.sub(r"\bAND\b", "&", s)
        s = re.sub(r"\bOR\b", "|", s)
        s = re.sub(r"\bIF\b", ">>", s)
        try:
            return sympify(s, evaluate=False)
        except Exception:
            try:
                return sympify(s)
            except Exception:
                # not a sympy parseable string, keep original string (we'll parse tree-style later)
                return expr_input
    return expr_input

def walk_obj_nodes(node):
    """
    Generic walker: yields (node_obj, parent_obj) for all sub-nodes in the parsed structure.
    Supports:
      - SymPy expressions: yields the sympy nodes
      - pyparsing.ParseResults: yields each ParseResults and its children
      - lists / tuples / dicts: yields each element
      - strings / atoms: yields themselves as leaves
    """
    seen = set()
    def rec(n, parent=None):
        nid = id(n)
        if nid in seen:
            return
        seen.add(nid)
        yield n, parent
        # SymPy: iterate .args
        if is_sympy_expr(n):
            for a in getattr(n, 'args', ()):
                yield from rec(a, n)
        # ParseResults: iterate like list/dict
        elif is_parse_results(n):
            for a in list(n):
                yield from rec(a, n)
            # ParseResults can have named results too
            try:
                for k in n.keys():
                    try:
                        v = n.get(k)
                        yield from rec(v, n)
                    except Exception:
                        pass
            except Exception:
                pass
        # list/tuple/set
        elif isinstance(n, (list, tuple, set)):
            for a in n:
                yield from rec(a, n)
        # dict
        elif isinstance(n, dict):
            for k, v in n.items():
                yield from rec(k, n)
                yield from rec(v, n)
        # strings or leaves: no further descend
        else:
            return
    yield from rec(node)

def atoms_from_node(node):
    """
    Return a sorted list of candidate atom names (variable names) found in the node.
    - For SymPy expressions use .atoms(Symbol) safely (guard against unexpected attributes)
    - For other nodes, heuristically extract tokens matching VAR_TOKEN_RE from str(node)
    """
    if node is None:
        return []
    # Safe SymPy path
    if is_sympy_expr(node):
        try:
            sym_atoms = node.atoms(Symbol)
            return sorted({str(a) for a in sym_atoms if isinstance(a, Symbol)})
        except Exception:
            # fallback to textual heuristics
            pass
    # If ParseResults or other object, try string heuristics
    s = str(node)
    found = VAR_TOKEN_RE.findall(s)
    # filter out operator-like tokens if any (e.g., AND/OR/NOT might be uppercase; remove them)
    found = [f for f in found if f not in ("AND","OR","NOT","IF","IMPLIES","TRUE","FALSE")]
    return sorted(set(found))

def node_type_name(node):
    """Readable type name for reporting."""
    try:
        return type(node).__name__
    except Exception:
        return str(type(node))

def detect_nl_exception_structure(nl_text):
    # be defensive: accept non-string nl_text by casting to str
    if nl_text is None:
        s = ""
    else:
        s = str(nl_text).lower()
    flags = {"has_unless_like": False, "exception_connective": None, "implication_like": False}
    if re.search(r"\b(unless|but not if|except if|except when|but not when)\b", s):
        flags["has_unless_like"] = True
    if re.search(r"\bif\b", s):
        flags["implication_like"] = True
    m = re.search(r"(but not if|unless|except if|except when)(.*)", s)
    if m:
        part = m.group(2)
        if ' or ' in part:
            flags["exception_connective"] = 'or'
        elif ' and ' in part:
            flags["exception_connective"] = 'and'
    else:
        if re.search(r"\bnot\b.*\bor\b", s) or re.search(r"\bor\b.*\bnot\b", s):
            flags["exception_connective"] = 'or'
        elif ' and ' in s and 'not' in s:
            flags["exception_connective"] = 'and'
    return flags

def compare_subtrees_to_nl_general(parsed_tree, nl_requirement, semantic_map=None):
    """
    General comparator that accepts:
      - SymPy expressions (or strings that sympify)
      - pyparsing.ParseResults
      - nested lists/tuples/dicts
    semantic_map: optional dict mapping variable name -> NL fragment
    Returns: list of issue dicts. Each issue includes:
      - 'node' : repr(node)  (string repr for human reading)
      - 'node_type': actual type name
      - 'vars': vars found in that node (list)
      - 'issue_type', 'message'
    """
    # Normalize only if it's a string that looks like SymPy-friendly boolean expression
    parsed = normalize_expr_if_str(parsed_tree)

    nl_flags = detect_nl_exception_structure(nl_requirement)
    issues = []

    # Build set of mapped variables that are actually mentioned in NL (verbatim)
    nl_mentions = set()
    if semantic_map:
        for v, frag in semantic_map.items():
            if frag and frag.lower() in str(nl_requirement).lower():
                nl_mentions.add(v)

    # Walk the nodes
    for node, parent in walk_obj_nodes(parsed):
        node_repr = repr(node)
        node_str = str(node)
        node_tname = node_type_name(node)
        vars_here = atoms_from_node(node)

        # Heuristic: unexpected negation (detect if node is a negation node in SymPy or if its string begins with 'Not(' or '~' or 'NOT ')
        is_negation_node = False
        if is_sympy_expr(node) and hasattr(node, 'func') and getattr(node, 'func').__name__.lower() == "not":
            is_negation_node = True
        else:
            # textual heuristics for parse trees: start with NOT/NOT( or prefix '~'
            if isinstance(node, str):
                if re.match(r'^\s*(NOT\b|~|NOT\()', node_str, flags=re.IGNORECASE):
                    is_negation_node = True
            else:
                # look at repr for common Not indicators
                if re.search(r'\bNot\b|\bNOT\b|~', node_repr):
                    # but be conservative: ensure it contains a single variable or small expression
                    is_negation_node = True

        if is_negation_node and len(vars_here) == 1:
            atom = vars_here[0]
            frag = semantic_map.get(atom, "") if semantic_map else ""
            # If mapping exists and NL doesn't contain a negation for that fragment -> warn
            if frag and ('not' not in frag.lower()) and (frag.lower() not in str(nl_requirement).lower()):
                issues.append({
                    "node": node_repr,
                    "node_type": node_tname,
                    "vars": vars_here,
                    "issue_type": "unexpected_negation",
                    "message": f"Node appears to be a negation of {atom}, but NL fragment mapped to {atom!r} is not negative in requirement."
                })

        # Heuristic: connective mismatch for exception phrases
        # Identify connective type of this node if possible: 'And' or 'Or'
        node_connective = None
        if is_sympy_expr(node) and hasattr(node, 'func'):
            node_connective = getattr(node, 'func').__name__ if hasattr(node, 'func') else None
        else:
            # textual heuristics
            if re.search(r'\bAND\b', node_str, flags=re.IGNORECASE) or re.search(r'&', node_str):
                node_connective = "And"
            elif re.search(r'\bOR\b', node_str, flags=re.IGNORECASE) or re.search(r'\|', node_str):
                node_connective = "Or"

        if nl_flags["has_unless_like"] and len(vars_here) >= 2 and node_connective in ("And", "Or"):
            if nl_flags["exception_connective"] == 'or' and node_connective == "And":
                issues.append({
                    "node": node_repr,
                    "node_type": node_tname,
                    "vars": vars_here,
                    "issue_type": "connective_mismatch",
                    "message": f"NL suggests 'or' between exceptions, but node uses AND: {node_str}"
                })
            if nl_flags["exception_connective"] == 'and' and node_connective == "Or":
                issues.append({
                    "node": node_repr,
                    "node_type": node_tname,
                    "vars": vars_here,
                    "issue_type": "connective_mismatch",
                    "message": f"NL suggests 'and' between exceptions, but node uses OR: {node_str}"
                })

        # Heuristic: implication translated as disjunction
        # If NL implication-like and top-level object is an OR that contains negation of primary atom
        # We'll only perform this check on the root node (i.e., parent is None)
        if nl_flags["implication_like"] and parent is None:
            # consider current node as root
            root_str = str(parsed)
            # detect OR at top level heuristically
            root_is_or = False
            if is_sympy_expr(parsed) and getattr(parsed, 'func', None) is not None and getattr(parsed, 'func').__name__ == "Or":
                root_is_or = True
            else:
                if re.search(r'\bOR\b', root_str) or '|' in root_str:
                    root_is_or = True
            if root_is_or:
                # find primary candidates from semantic_map
                primary_candidates = []
                if semantic_map:
                    for v, frag in semantic_map.items():
                        if frag and re.search(r"\b(start|enable|allow|open|activate|authenticate|login|startup)\b", frag.lower()):
                            primary_candidates.append(v)
                if not primary_candidates:
                    primary_candidates = [v for v,f in (semantic_map or {}).items() if f and f.lower() in str(nl_requirement).lower()]
                if not primary_candidates:
                    primary_candidates = atoms_from_node(parsed)
                for cand in primary_candidates:
                    if re.search(r'(~|Not\()' + re.escape(cand), root_str) or f"~{cand}" in root_str or f"Not({cand})" in root_str:
                        issues.append({
                            "node": repr(parsed),
                            "node_type": node_type_name(parsed),
                            "vars": atoms_from_node(parsed),
                            "issue_type": "implication_translated_as_disjunction",
                            "message": f"NL suggests an implication/exception, but root expression is an OR containing negation of primary atom ({cand}): {root_str}"
                        })
                        break

        # Heuristic: unmapped variable checks (var appears but its mapped NL fragment not present verbatim in requirement)
        if semantic_map:
            for v in vars_here:
                if semantic_map.get(v) and semantic_map[v].lower() not in str(nl_requirement).lower():
                    issues.append({
                        "node": node_repr,
                        "node_type": node_tname,
                        "vars": vars_here,
                        "issue_type": "unmapped_variable",
                        "message": f"Variable {v} appears in node but its mapped NL fragment is not found verbatim in requirement. Mapped fragment: {semantic_map[v]!r}"
                    })

    # de-duplicate issues by (node, issue_type, message) for clarity
    unique = []
    seen = set()
    for it in issues:
        key = (it.get('node'), it.get('issue_type'), it.get('message'))
        if key in seen:
            continue
        seen.add(key)
        unique.append(it)
    return unique

# Convenience wrapper that prints human-readable info (like run_comparator)
def run_comparator_general(parsed_tree, nl_requirement, semantic_map=None, pretty_print=True):
    issues = compare_subtrees_to_nl_general(parsed_tree, nl_requirement, semantic_map)
    if pretty_print:
        if not issues:
            print("No issues detected.")
            return issues
        print("Issues detected:")
        for i, it in enumerate(issues, 1):
            # print node type and a short repr to inspect actual parsed node object
            node_short = (it['node'][:400] + '...') if len(it['node']) > 400 else it['node']
            print(f"{i}. [{it['issue_type']}] {it['message']}")
            print(f"    node_type: {it['node_type']}  node_repr: {node_short}")
            print(f"    vars: {it.get('vars')}\n")
    return issues


In [46]:
def ask_llm_for_boolean_logic_revised(natural_language_requirement: str, bool_vars:list, issues, llm_boolean_logic_output) -> str:
    prompt = f"""
    You are an expert in formal logic and system specifications.
    Your task is to translate natural language design requirements into Boolean logic expressions.

    Rules for Boolean Logic:
    - Use 'AND' for logical conjunction.
    - Use 'OR' for logical disjunction.
    - Use 'NOT' for logical negation.
    - Use 'IMPLIES' for logical implication (A IMPLIES B).
    - Use parentheses for grouping everywhere so that its neatly understandable.
    - Identify the clauses properly with more precision in the sentence and convert them into boolean specifications properly.
    - Identify the main subject (e.g., 'software installation', 'system activation', 'alarm sounding') and include it as a variable in the final Boolean logic expression.
    - Do not include any explanations, preamble, or additional text. Only output the Boolean logic expression.
    - When you see 'only if' (not 'if') in the requirement like A only if B it is 'A implies B' not 'B implies A'
    - When you see 'if and only if' (not 'only if') in the requirement like A if and only if B it is 'A implies B AND B implies A'

    Use these boolean variables : "{bool_vars}"

    For this Natural Language Requirement: "{natural_language_requirement}", the boolean logic output LLM gave was 
    Boolean Logic: "{llm_boolean_logic_output}"

    But there are some issues here and they are,
    Issues: "{issues}"

    Fix the boolean logic and give me just the new corrected boolean logic.

    Corrected Boolean Logic:
    """
    try:
        response = ollama.chat(
            model='llama3',
            messages=[{'role': 'user', 'content': prompt}],
            options={'temperature': 0.1, 'num_predict': 128}
        )
        llm_output = response['message']['content'].strip()
        # print(f"LLM Raw Output:\n{llm_output}\n")
        lines = [line.strip() for line in llm_output.split('\n') if line.strip()]
        if lines:
            return lines[0]
        else:
            return "Error: LLM returned empty response."
    except Exception as e:
        return f"Error communicating with Ollama: {e}"

In [47]:
def ask_llm_to_review_boolean_logic(natural_language_requirement: str, bool_vars:list, llm_boolean_logic_output) -> str:
    prompt = f"""
    You are an expert in formal logic and system specifications.
    Your task is to review the Boolean logic expressions obtained from translate natural language design requirements.

    Rules for Boolean Logic:
    - Use 'AND' for logical conjunction.
    - Use 'OR' for logical disjunction.
    - Use 'NOT' for logical negation.
    - Use 'IMPLIES' for logical implication (A IMPLIES B).
    - Use parentheses for grouping everywhere so that its neatly understandable.
    - Identify the clauses properly with more precision in the sentence and convert them into boolean specifications properly.
    - Identify the main subject (e.g., 'software installation', 'system activation', 'alarm sounding') and include it as a variable in the final Boolean logic expression.
    - Do not include any explanations, preamble, or additional text. Only output the Boolean logic expression.
    - When you see 'only if' (not 'if') in the requirement like A only if B it is 'A implies B' not 'B implies A'
    - When you see 'if and only if' (not 'only if') in the requirement like A if and only if B it is 'A implies B AND B implies A'

    Used these boolean variables : "{bool_vars}"

    For this Natural Language Requirement: "{natural_language_requirement}", the boolean logic output LLM gave was 
    Boolean Logic: "{llm_boolean_logic_output}"

    Give me the issues point wise.

    Issues: 
    """
    try:
        response = ollama.chat(
            model='llama3',
            messages=[{'role': 'user', 'content': prompt}],
            options={'temperature': 0.1, 'num_predict': 128}
        )
        llm_output = response['message']['content'].strip()
        # print(f"LLM Raw Output:\n{llm_output}\n")
        lines = [line.strip() for line in llm_output.split('\n') if line.strip()]
        if lines:
            return lines[0]
        else:
            return "Error: LLM returned empty response."
    except Exception as e:
        return f"Error communicating with Ollama: {e}"

In [None]:
import pandas as pd
import csv

if __name__ == "__main__":
    llm_outputs = []
    ground_truths = []

    df = pd.read_csv('testreq.csv', dtype={'llm_output': str})

    # Fill any NaN values with empty strings
    df['llm_output'] = df['llm_output'].fillna('')

    results = []
    attempts = 0
    nreq=0

    for i, req_data in df.iterrows():
        req = req_data["natural_language"]
        ground_truth_req = req_data["ground_truth"]
        
        print(f"\n--- Requirement {i+1} ---")
        print(f"Natural Language: \"{req}\"")
        print(f"Ground Truth Boolean Logic: {ground_truth_req}\n")
        
        simple_sentences = deconstruct_requirement(req)
        for i in simple_sentences:
            print(i)
        bool_vars = ask_llm_for_boolean_variables(simple_sentences)
        vars = []
        cnt=1
        for sentence in enumerate(simple_sentences, start=1):
            print(cnt,":", bool_vars[sentence])
            vars.append(bool_vars[sentence])
        
        llm_boolean_logic_output = ask_llm_for_boolean_logic(req,vars)
        # print(f"LLM-generated Boolean Logic: {llm_boolean_logic_output}")
        # conistency = check_semantic_consistency(req, llm_boolean_logic_output, vars)
        # print(conistency)
        # continue
        # result,attempt = manual_prompt_testing(natural_language_requirement=req, llm_boolean_logic_output=llm_boolean_logic_output)
        # results.extend(result)
        # attempts += attempt
        # nreq+=1
        # Update the specific row with the LLM output
        df.at[i, 'llm_output'] = llm_boolean_logic_output
        
        # Write the updated DataFrame back to CSV after each iteration
        # df.to_csv('testreq.csv', index=False)
        # continue
        # evaluate_compositional_logic(req, simple_sentences, llm_boolean_logic_output)
        
        llm_outputs.append(llm_boolean_logic_output)
        ground_truths.append(ground_truth_req)

        parsed_tree = parse_boolean_logic(llm_boolean_logic_output)

        issues = run_comparator_general(parsed_tree, req, bool_vars)
        new_llm_boolean_logic_output = llm_boolean_logic_output
        new_parsed_tree = parsed_tree
        while issues!=[]:
            new_llm_boolean_logic_output = ask_llm_for_boolean_logic_revised(req, vars, issues, new_llm_boolean_logic_output)
            new_parsed_tree = parse_boolean_logic(new_llm_boolean_logic_output)   
            issues = run_comparator_general(new_parsed_tree, req, bool_vars)
            print(f"New Boolean Logic: {new_llm_boolean_logic_output}")
        print(f"Final Boolean Logic: {new_llm_boolean_logic_output}")
        conistency = check_semantic_consistency(req, new_llm_boolean_logic_output, vars)
        issues = ask_llm_to_review_boolean_logic(req, vars, new_llm_boolean_logic_output)
        while issues!=[]:
            new_llm_boolean_logic_output = ask_llm_for_boolean_logic_revised(req, vars, issues, new_llm_boolean_logic_output)
            new_parsed_tree = parse_boolean_logic(new_llm_boolean_logic_output) 
            issues = ask_llm_to_review_boolean_logic(req, vars, new_llm_boolean_logic_output)
            print(f"New Boolean Logic: {new_llm_boolean_logic_output}")
        print(f"Final Boolean Logic: {new_llm_boolean_logic_output}")
        continue
        
        if parsed_tree:
            print("--- Pyparsing ParseResults Object (Raw) ---")
            print(parsed_tree)
            print("\n--- Visualizing the Parse Tree (Simplified Method) ---")
            visualize_tree(parsed_tree.asList())
            print("\n--- Pretty Printing the Parse Tree (More Structured Method) ---")
            pretty_print_tree(parsed_tree.asList())
            # print(parsed_tree.asList())
            print("\n--- Transforming 'IMPLIES' to 'OR NOT' (Conceptual) ---")
            def transform_implies(node):
                op, operands = _get_operator_and_operands(node)
                # print(op, operands)
                if op == 'IMPLIES':
                    # print(f"Transforming 'IMPLIES' node: {node}")
                    antecedent = transform_implies(operands[0])
                    consequent = transform_implies(operands[1])
                    return [['NOT', antecedent], 'OR' , consequent]
                elif op:
                    transformed_operands = [transform_implies(o) for o in operands]
                    # print(transformed_operands, "sdgk")
                    if op == 'NOT':
                        return ['NOT', transformed_operands[0]]
                    elif op in ['AND', 'OR']:
                        return [[transformed_operands[0]], op, [transformed_operands[1]]]
                elif operands is not None:
                    return operands
                else:
                    print(f"WARNING: transform_implies encountered unhandled node: {node}")
                    return node
            transformed_tree = transform_implies(parsed_tree.asList())
            # print("MEOW")
            # print(parsed_tree.asList())
            # print("meowwww")
            # print(transformed_tree) 
            print("Transformed tree for 'IMPLIES' (Conceptual):")
            pretty_print_tree(transformed_tree)
            print("\n" + "="*70 + "\n")
            # continue
            # print("\n" + "="*50 + "\n")
            
            print("\n--- Processing LLM Output ---")
            
            llm_variables = set()
            extract_variables(parsed_tree.asList(), llm_variables)
            print(f"Extracted Variables from LLM: {llm_variables}")
            
            semantic_map = {}
            for var in llm_variables:
                equivalents = get_semantic_equivalents(var)
                print(f" -> Found equivalents for '{var}': {equivalents}")
                semantic_map[var] = [var] + equivalents
            
            print("\n--- Final Semantic Mapping Dictionary ---")
            print(semantic_map)
            
            parsed_human_tree = parse_boolean_logic(ground_truth_req)
            human_variables = set()
            if parsed_human_tree:
                 extract_variables(parsed_human_tree.asList(), human_variables)
                 print(f"\nExtracted Variables from Human Gold Standard: {human_variables}")
        
            original_req_text = req_data["natural_language"]
            full_logic = llm_boolean_logic_output
            
            sentences_incorporated, total_sentences = measure_completeness(original_req_text, full_logic)
            print("\n" + "="*70 + "\n")
        else:
            print("Skipping processing due to parsing error.")
            print("\n" + "="*70 + "\n") 
    print(f"\nAll requirements processed. Total requirements: {nreq}, Total attempts: {attempts}")
    print(f"Average attempts per requirement: {attempts/nreq if nreq>0 else 0}")


--- Requirement 1 ---
Natural Language: "The system will install updates, except if there is not enough disk space or the update is not available."
Ground Truth Boolean Logic: SOFTWARE_INSTALL IF ENOUGH_DISK_SPACE AND UPDATE_AVAILABLE

DEBUG: Asking LLM to deconstruct the requirement.
the system will install updates
there is not enough disk space
the update is not available
1 : SYSTEM_UPDATES_INSTALLED
1 : DISK_SPACE_AVAILABLE
1 : UPDATE_AVAILABLE
No issues detected.
Final Boolean Logic: (NOT SYSTEM_UPDATES_INSTALLED) IMPLIES ((NOT DISK_SPACE_AVAILABLE) OR (NOT UPDATE_AVAILABLE))
New Boolean Logic: SYSTEM_UPDATES_INSTALLED IMPLIES ((DISK_SPACE_AVAILABLE AND UPDATE_AVAILABLE) OR NOT SYSTEM_UPDATES_INSTALLED)
New Boolean Logic: NOT (SYSTEM_UPDATES_INSTALLED IMPLIES NOT (DISK_SPACE_AVAILABLE OR UPDATE_AVAILABLE))
New Boolean Logic: NOT (UPDATE_AVAILABLE IMPLIES (SYSTEM_UPDATES_INSTALLED AND DISK_SPACE_AVAILABLE))
New Boolean Logic: NOT (UPDATE_AVAILABLE OR NOT DISK_SPACE_AVAILABLE) IMPLI