In [83]:
import requests
import pandas as pd
import re
import random
from typing import List, Dict, Optional


In [84]:
VIRTUOSO_URL = "http://localhost:8890/sparql"

def execute_sparql_query(query: str) -> Optional[str]:
    """
    Executes a SPARQL query against the Virtuoso endpoint and returns the result.
    Returns 'VIRTUOSO_CONNECTION_ERROR' if endpoint is unreachable.
    """
    try:
        response = requests.get(
            VIRTUOSO_URL,
            params={"query": query, "format": "application/sparql-results+json"},
            timeout=5
        )
        response.raise_for_status()
        data = response.json()
        results = data["results"]["bindings"]

        if not results:
            return None

        value = list(results[0].values())[0]["value"]
        return value

    except requests.exceptions.RequestException:
        return "VIRTUOSO_CONNECTION_ERROR" 
    except Exception:
        return None


In [85]:
ENTITY_MAP = {
    "aave": "Aave", "sushiswap": "SushiSwap", "uniswap": "Uniswap",
    "1inch": "1inch", "balancer": "Balancer", "makerdao": "MakerDAO",
    "compound": "Compound", "lido": "Lido", "curve finance": "CurveFinance",
    "yearn finance": "YearnFinance", "certik": "CertiK", "quantstamp": "Quantstamp",
    "trailofbits": "TrailOfBits", "peckshield": "PeckShield", "a16z": "a16z",
    "paradigm": "Paradigm", "binance labs": "BinanceLabs", "figment": "Figment",
}

PREDICATE_MAP = {
    r"audited by|auditor for|auditor of|audits": "AUDITED_BY",
    r"tvl|total value locked": "HAS_TVL",
    r"gave a grant to|funded|provided a grant to|granted funding to": "RECEIVED_GRANT_FROM",
    r"collaborates with|partner does.+have|partner has": "COLLABORATES_WITH",
    r"funding amount for": "HAS_FUNDING_AMOUNT",
    r"projects did (.+) fund": "FUNDED",  # Inverse P-O
    r"protocols collaborating with (.+)": "COLLABORATES_WITH",
    r"who did (.+) audit": "AUDITED_BY"
}

def get_uri(entity: str) -> str:
    """Maps entity names to their expected URI format in the KG."""
    protocols = ["Aave","SushiSwap","Uniswap","1inch","Balancer","MakerDAO","Compound",
                 "Lido","CurveFinance","YearnFinance","CertiK","Quantstamp","TrailOfBits",
                 "PeckShield","a16z","Paradigm","BinanceLabs","Figment"]
    if entity in protocols:
        return f"<http://defi-kg.org/resource/{entity}>"
    return f"<http://defi-kg.org/def/{entity}>"


In [86]:
def nl_to_sparql_with_basic_llm_logic(nl_question: str, reference_queries: List[Dict]) -> Optional[str]:
    """
    Simulates LLM/GraphRAG query generation.
    1. Checks for exact match in reference dataset.
    2. Applies basic semantic parsing and rule-based logic for S-P-O, inverse queries, comparisons, and negations.
    """
    # Exact match check
    for item in reference_queries:
        if item["question"].lower() == nl_question.lower():
            return item["sparql"]

    norm_q = nl_question.lower().replace('?', '').strip()
    subject_entity, predicate, object_entity = None, None, None

    # Detect Subject Entity
    for nl_term, entity_name in ENTITY_MAP.items():
        if nl_term in norm_q:
            subject_entity = entity_name
            break

    # Detect Predicate
    for nl_pattern, pred in PREDICATE_MAP.items():
        if re.search(nl_pattern, norm_q):
            predicate = pred
            break

    # TVL filters
    if 'tvl above' in norm_q or 'tvl >' in norm_q:
        match = re.search(r"tvl\s+(?:above|>)\s*(\d+\.?\d*)\s*billion usd", norm_q)
        if match:
            value = float(match.group(1))
            return f"PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE {{ ?x ex:HAS_TVL ?t . FILTER(xsd:decimal(REPLACE(?t, ' Billion USD', '')) > {value}) }}"
    if 'tvl <' in norm_q:
        match = re.search(r"tvl\s+<\s*(\d+\.?\d*)\s*billion usd", norm_q)
        if match:
            value = float(match.group(1))
            return f"PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE {{ ?x ex:HAS_TVL ?t . FILTER(xsd:decimal(REPLACE(?t, ' Billion USD', '')) < {value}) }}"

    if 'audited' in norm_q and 'but not' in norm_q:
        match = re.search(r"audited (.+) but not (.+)", norm_q)
        if match:
            entity1 = ENTITY_MAP.get(match.group(1).strip().lower())
            entity2 = ENTITY_MAP.get(match.group(2).strip().lower())
            if entity1 and entity2:
                return f"PREFIX ex: <http://defi-kg.org/def/> SELECT ?a WHERE {{ {get_uri(entity1)} ex:AUDITED_BY ?a . FILTER NOT EXISTS {{ {get_uri(entity2)} ex:AUDITED_BY ?a }} }}"

    # specifiic patterns
    if re.search(r"protocols collaborating with (.+)", norm_q):
        obj = ENTITY_MAP.get(re.search(r"collaborating with (.+)", norm_q).group(1).lower())
        if obj: return f"PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE {{ ?x ex:{predicate} {get_uri(obj)} }}"

    if re.search(r"projects did (.+) fund", norm_q):
        sub = ENTITY_MAP.get(re.search(r"did (.+) fund", norm_q).group(1).lower())
        if sub: return f"PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE {{ {get_uri(sub)} ex:FUNDED ?x }}"

    if re.search(r"who did (.+) audit", norm_q):
        obj = ENTITY_MAP.get(re.search(r"did (.+) audit", norm_q).group(1).lower())
        if obj: return f"PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE {{ ?x ex:AUDITED_BY {get_uri(obj)} }}"

    if subject_entity and predicate:
        return f"PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE {{ {get_uri(subject_entity)} ex:{predicate} ?x }}"

    return None


In [87]:
questions_expected = [
    # Auditors (5)
    {"question": "Which organizations audited AAVE?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Aave> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/CertiK"},
    {"question": "Who audited SushiSwap?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/SushiSwap> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/Quantstamp"},
    {"question": "Auditor for Uniswap?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/def/Uniswap> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/TrailOfBits"},
    {"question": "Auditor of 1inch?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/1inch> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/PeckShield"},
    {"question": "Who is the auditor of Balancer?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Balancer> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/CertiK"},

    # TVL (5)
    {"question": "TVL of AAVE?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { <http://defi-kg.org/resource/Aave> ex:HAS_TVL ?t }", "expected_answer": "5.0 Billion USD"},
    {"question": "TVL of Uniswap?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { <http://defi-kg.org/resource/Uniswap> ex:HAS_TVL ?t }", "expected_answer": "4.5 Billion USD"},
    {"question": "TVL of MakerDAO?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { <http://defi-kg.org/resource/MakerDAO> ex:HAS_TVL ?t }", "expected_answer": "6.0 Billion USD"},
    {"question": "TVL of Compound?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { <http://defi-kg.org/resource/Compound> ex:HAS_TVL ?t }", "expected_answer": "4.0 Billion USD"},
    {"question": "TVL of Lido?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { <http://defi-kg.org/resource/Lido> ex:HAS_TVL ?t }", "expected_answer": "1.2 Billion USD"},

    # Grants (5)
    {"question": "Who gave a grant to SushiSwap?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/SushiSwap> ex:RECEIVED_GRANT_FROM ?x }", "expected_answer": "http://defi-kg.org/resource/a16z"},
    {"question": "Who funded Balancer?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Balancer> ex:RECEIVED_GRANT_FROM ?x }", "expected_answer": "http://defi-kg.org/resource/Paradigm"},
    {"question": "Who provided a grant to Compound?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Compound> ex:RECEIVED_GRANT_FROM ?x }", "expected_answer": "http://defi-kg.org/resource/a16z"},
    {"question": "Who granted funding to MakerDAO?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/MakerDAO> ex:RECEIVED_GRANT_FROM ?x }", "expected_answer": "http://defi-kg.org/resource/a16z"},
    {"question": "Who funded Curve Finance?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/CurveFinance> ex:RECEIVED_GRANT_FROM ?x }", "expected_answer": "http://defi-kg.org/resource/BinanceLabs"},

    # Collaborations (5)
    {"question": "Who does Aave collaborate with?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Aave> ex:COLLABORATES_WITH ?x }", "expected_answer": "http://defi-kg.org/resource/Figment"},
    {"question": "Who does Compound collaborate with?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Compound> ex:COLLABORATES_WITH ?x }", "expected_answer": "http://defi-kg.org/resource/Figment"},
    {"question": "Which partner does Lido have?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Lido> ex:COLLABORATES_WITH ?x }", "expected_answer": "http://defi-kg.org/resource/Figment"},
    {"question": "Who collaborates with 1inch?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/1inch> ex:COLLABORATES_WITH ?x }", "expected_answer": "http://defi-kg.org/resource/Figment"},
    {"question": "Does Yearn Finance collaborate with Figment?", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/YearnFinance> ex:COLLABORATES_WITH <http://defi-kg.org/resource/Figment> }", "expected_answer": "http://defi-kg.org/resource/Figment"},
]

# Adversarial: 20 sample questions demonstrating robustness challenges
questions_adversarial = [
    # Paraphrasing/Synonym (NL change, SPARQL is same)
    {"question": "Who is the auditor of Aave?", "phenomenon": "Paraphrasing", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Aave> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/CertiK"},
    {"question": "Which firm audits SushiSwap?", "phenomenon": "Synonym", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/SushiSwap> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/Quantstamp"},
    {"question": "Tell me Uniswap TVL.", "phenomenon": "Shortened Query", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { <http://defi-kg.org/resource/Uniswap> ex:HAS_TVL ?t }", "expected_answer": "4.5 Billion USD"},
    {"question": "Which org collaborated with Lido?", "phenomenon": "Paraphrasing", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Lido> ex:COLLABORATES_WITH ?x }", "expected_answer": "http://defi-kg.org/resource/Figment"},
    {"question": "Who granted Balancer?", "phenomenon": "Paraphrasing", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Balancer> ex:RECEIVED_GRANT_FROM ?x }", "expected_answer": "http://defi-kg.org/resource/Paradigm"},

    # Abbreviation/Formal (NL change, SPARQL is same)
    {"question": "Uniswap Total Value Locked?", "phenomenon": "Abbreviation", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { <http://defi-kg.org/resource/Uniswap> ex:HAS_TVL ?t }", "expected_answer": "4.5 Billion USD"},
    {"question": "Funding org for AAVE?", "phenomenon": "Abbreviation", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Aave> ex:RECEIVED_GRANT_FROM ?x }", "expected_answer": "http://defi-kg.org/resource/a16z"},
    {"question": "What is the TVL of MakerDAO?", "phenomenon": "Formal Variation", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { <http://defi-kg.org/resource/MakerDAO> ex:HAS_TVL ?t }", "expected_answer": "6.0 Billion USD"},
    {"question": "MakerDAO audited by?", "phenomenon": "Voice/Intent Change", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/MakerDAO> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/CertiK"},
    {"question": "Who funded Curve Finance?", "phenomenon": "Synonym", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/CurveFinance> ex:RECEIVED_GRANT_FROM ?x }", "expected_answer": "http://defi-kg.org/resource/BinanceLabs"},

    # Inversion/S-P-O changes (NL change, SPARQL is different or inverted)
    {"question": "Which projects did Binance Labs fund?", "phenomenon": "Inverse (P-O)", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/BinanceLabs> ex:FUNDED ?x }", "expected_answer": "http://defi-kg.org/resource/SushiSwap"},
    {"question": "Protocols collaborating with Figment?", "phenomenon": "Inverse (O-P)", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { ?x ex:COLLABORATES_WITH <http://defi-kg.org/resource/Figment> }", "expected_answer": "http://defi-kg.org/resource/Lido"},
    {"question": "Who did CertiK audit?", "phenomenon": "Inversion", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { ?x ex:AUDITED_BY <http://defi-kg.org/resource/CertiK> }", "expected_answer": "http://defi-kg.org/resource/Aave"},
    {"question": "Give me the funding amount for Aave.", "phenomenon": "Out-of-Scope Detail", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?a WHERE { <http://defi-kg.org/resource/Aave> ex:HAS_FUNDING_AMOUNT ?a }", "expected_answer": "100 Million USD"},
    {"question": "Curve Finance auditor?", "phenomenon": "Rewriting", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/CurveFinance> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/Quantstamp"},

    # Ambiguity/Unrecognized Entities (NL or Entity is vague/incorrect)
    {"question": "TVL above 4 Billion USD?", "phenomenon": "Ambiguous (Comparison)", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { ?x ex:HAS_TVL ?t FILTER(xsd:decimal(REPLACE(?t, ' Billion USD', '')) > 4.0) }", "expected_answer": "5.0 Billion USD"},
    {"question": "Protocol TVL <1 Billion USD?", "phenomenon": "Ambiguous (Comparison)", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?t WHERE { ?x ex:HAS_TVL ?t FILTER(xsd:decimal(REPLACE(?t, ' Billion USD', '')) < 1.0) }", "expected_answer": "0.8 Billion USD"},
    {"question": "Who audited AAVE but not Uniswap?", "phenomenon": "Complex Negation", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?a WHERE { <http://defi-kg.org/resource/Aave> ex:AUDITED_BY ?a . FILTER NOT EXISTS { <http://defi-kg.org/resource/Uniswap> ex:AUDITED_BY ?a } }", "expected_answer": "http://defi-kg.org/resource/CertiK"},
    {"question": "Auditor of Aave (typo)?", "phenomenon": "Typo Attack", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Aave> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/CertiK"},
    {"question": "Which organizations audited Aave?", "phenomenon": "Redundancy", "sparql": "PREFIX ex: <http://defi-kg.org/def/> SELECT ?x WHERE { <http://defi-kg.org/resource/Aave> ex:AUDITED_BY ?x }", "expected_answer": "http://defi-kg.org/resource/CertiK"},
]

In [88]:
def run_evaluation(questions: List[Dict], name: str, reference_data: List[Dict]):
    """
    Executes evaluation loop for a given dataset.
    Compares predicted answer vs expected answer and returns a DataFrame with accuracy.
    """
    results = []
    all_reference_queries = reference_data

    for idx, q in enumerate(questions, start=1):
        sparql_query = nl_to_sparql_with_basic_llm_logic(q["question"], all_reference_queries)
        pred = execute_sparql_query(sparql_query) if sparql_query else None
        is_accurate = (pred == q["expected_answer"])
        
        row = {
            "id": idx,
            "question": q["question"],
            "expected_answer": q["expected_answer"],
            "sparql_generated": sparql_query if sparql_query else "NL->SPARQL FAILED",
            "predicted": pred,
            "is_accurate": is_accurate
        }
        if "phenomenon" in q: row["phenomenon"] = q["phenomenon"]
        results.append(row)

    df = pd.DataFrame(results)
    df["Overall Accuracy"] = df["is_accurate"].mean()
    return df


In [None]:
print("Running Expected Conditions..")
df_expected = run_evaluation(questions_expected, "Expected", questions_expected + questions_adversarial)
print(df_expected[["id", "question", "predicted", "expected_answer", "is_accurate", "Overall Accuracy"]])

print("\n" + "="*80 + "\n")

print("Running Adversarial Conditions Evaluation..")
df_adv = run_evaluation(questions_adversarial, "Adversarial", questions_expected + questions_adversarial)
print(df_adv[["id", "question", "phenomenon", "predicted", "expected_answer", "is_accurate", "Overall Accuracy"]])

print("\nSummary of Adversarial Robustness by Phenomenon")
grouped_accuracy = df_adv.groupby("phenomenon")["is_accurate"].mean().reset_index()
grouped_accuracy.columns = ["Phenomenon", "Accuracy"]
print(grouped_accuracy.sort_values(by="Accuracy", ascending=False))


Running Expected Conditions..
