In [2]:
import sqlite3
import subprocess
import re
from pathlib import Path

from RAG import *
from PromptGenerator import *

In [3]:
# Create a database connection (creates file if it doesn't exist)
conn = sqlite3.connect('funsearch.db')

# Create a cursor
c = conn.cursor()

# Create the table
c.execute('''
    CREATE TABLE IF NOT EXISTS experiments (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        workload TEXT NOT NULL,
        policy TEXT NOT NULL,
        policy_description TEXT NOT NULL,
        workload_description TEXT NOT NULL,
        cpp_file_path TEXT NOT NULL,
        cache_hit_rate REAL NOT NULL,
        score REAL NOT NULL
    )
''')

# Commit and close
conn.commit()
conn.close()

In [4]:
workload = "Astar"
policy = ["LRU", "Hawkeye", "Less is More", "Multiperspective", "Reordering-based Cache Replacement", "Ship++"]
workload_description = "astar is derived from a portable 2D path-finding library that is used in game's AI. This library implements three different path-finding algorithms: First is the well known A* algorithm for maps with passable and non-passable terrain types. Second is a modification of the A* path finding algorithm for maps with different terrain types and different move speed. Third is an implementation of A* algorithm for graphs. This is formed by map regions with neighborhood relationship. The library also includes pseudo-intellectual functions for map region determination."
policy_description = ["Evicts the least recently used cache line", 
                      "Hawkeye uses a machine learning-based prediction model to foresee future memory accesses and prioritizes the caching of those that are more likely to be reused. It aims to maximize cache hit rates by improving the decision-making process of which items to retain", 
                      "This policy advocates for keeping fewer entries in the cache but with higher predictability and utility. The idea is that sometimes it's better to maintain fewer, more relevant items, rather than overloading the cache with too many, potentially irrelevant, entries.",
                      "This policy evaluates cache replacement decisions from multiple angles or prespectives, such as spatial locality, temporal locality, or access frequency. By combining different criteria, it aims to optimize cache hits across varying usage patterns.",
                      "ReD reorganizes memory access sequences in the cache to exploit temporal locality more effectively. By maintaining a more efficient ordering of memory accesses, it improves cache hit rates and reduces the need to evict useful data.",
                      "Ship++ is an extension of the Ship policy, designed to evaluate and optimize cache replacement decisions. It uses predictive models and heuristics to decide which data to cache, based on observed patterns in a specific workload."]
file_path =["./ChampSim_CRC2/example/lru.cc", "./ChampSim_CRC2/example/hawkeye_final.cc", "./ChampSim_CRC2/example/lime.cc", "./ChampSim_CRC2/example/dancrc2.cc", "./ChampSim_CRC2/example/red.cc", "./ChampSim_CRC2/example/ship++.cc"]
cache_hit_rates =[0.4545, 0.3574, 0.3382, 0.371, 0.4110, 0.3382]
scores =[0.4545, 0.3574, 0.3382, 0.371, 0.4110, 0.3382]

In [5]:
conn = sqlite3.connect('funsearch.db')
c = conn.cursor()

# Insert the data for each policy into the database
for i in range(len(policy)):
    c.execute('''
        INSERT INTO experiments (workload, policy, policy_description, workload_description, cpp_file_path, cache_hit_rate, score)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    ''', (
        workload,                      # workload
        policy[i],                      # policy
        policy_description[i],          # policy description
        workload_description,           # workload description (this is a string)
        file_path[i],                   # C++ file path
        cache_hit_rates[i],             # cache hit rate
        scores[i]                       # funsearch score
    ))

# Commit the changes and close the connection
conn.commit()
conn.close()

print("Data has been successfully inserted into the funsearch.db database!")

Data has been successfully inserted into the funsearch.db database!


## Note: Run only till the above cell. Rest of the below code has been integrated in run_loop.py

In [1]:
from RAG import ExperimentRAG

In [3]:
rag = ExperimentRAG('funsearch.db')  

try:
    workload_name = "Astar"  
    response = rag.generate_response(workload_name)
    print(response)
    
    # Example 2: Get raw policy data for further processing
    top_policies = rag.get_top_policies_by_cache_hit(workload_name)
    print("\nRaw policy data:")
    for policy in top_policies:
        print(f"Policy: {policy['policy']}")
        print(f"Policy Description: {policy['policy_description']}")
        print(f"Workload Description: {policy['workload_description']}")
        print(f"Cache hit rate: {policy['cache_hit_rate']:.2%}")
        print(f"CPP file: {policy['cpp_file_path']}\n")
        
finally:
    # Ensure the connection is closed
    rag.close()

Workload: Astar
Description: astar is derived from a portable 2D path-finding library that is used in game's AI. This library implements three different path-finding algorithms: First is the well known A* algorithm for maps with passable and non-passable terrain types. Second is a modification of the A* path finding algorithm for maps with different terrain types and different move speed. Third is an implementation of A* algorithm for graphs. This is formed by map regions with neighborhood relationship. The library also includes pseudo-intellectual functions for map region determination.

Top 2 policies by cache hit rate:

1. Policy: LRU
   Description: Evicts the least recently used cache line
   Cache Hit Rate: 45.45%
   CPP File Path: C:/GenAI_Project/ChampSim_CRC2/example/lru.cc

2. Policy: Reordering-based Cache Replacement
   Description: ReD reorganizes memory access sequences in the cache to exploit temporal locality more effectively. By maintaining a more efficient ordering of

In [4]:
generator = PolicyPromptGenerator('funsearch.db')
prompt=""
try:
    # Generate the prompt
    prompt = generator.generate_prompt("Astar")
    
    # Print or send to LLM
    print("Generated Prompt:")
      
    
finally:
    generator.close()

print(prompt)

Generated Prompt:
You are a cache policy design expert. Analyze the workload and top policies, then create a new improved policy.

# Workload
Name: Astar
Description: astar is derived from a portable 2D path-finding library that is used in game's AI. This library implements three different path-finding algorithms: First is the well known A* algorithm for maps with passable and non-passable terrain types. Second is a modification of the A* path finding algorithm for maps with different terrain types and different move speed. Third is an implementation of A* algorithm for graphs. This is formed by map regions with neighborhood relationship. The library also includes pseudo-intellectual functions for map region determination.

# Examples
## Policy 1
Name: LRU
Description: Evicts the least recently used cache line
Cache Hit Rate: 45.45%
Implementation:
```cpp
#include "champsim_crc2.h"

#define NUM_CORE 1
#define LLC_SETS NUM_CORE*2048
#define LLC_WAYS 16

uint32_t lru[LLC_SETS][LLC_WAYS];

In [None]:
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv(dotenv_path=Path(".env"), override=False)
client = OpenAI(
        api_key=os.getenv("OPENAI_API_KEY"),
)

resp = client.chat.completions.create(
    model="o4-mini",
    store=True,
    messages=[{"role": "user", "content": prompt}],
    # temperature=0.3,
)

In [None]:
text = resp.choices[0].message.content
print(text)

In [5]:
prompt_file = Path("prompts/policy_prompt.txt")
prompt_text = prompt_file.read_text()

In [12]:
import re
from pathlib import Path
from typing import Optional, Tuple

# ───────────────────────── helpers ──────────────────────────
def _extract(pattern: str, text: str) -> Optional[str]:
    """Return first capture group or None if not found."""
    m = re.search(pattern, text, flags=re.DOTALL | re.IGNORECASE)
    return m.group(1).strip() if m else None

# ───────────────────────── parser ───────────────────────────
def parse_policy_content(file_path: str) -> Tuple[Optional[str], ...]:
    content = Path(file_path).read_text(encoding="utf-8")

    newline = r'(?:\r\n|\n|\r)'        # tolerate any line ending
    name_pat = rf'##\s*Policy\s*Name\s*{newline}(.*?){newline}'
    desc_pat = rf'##\s*Policy\s*Description\s*{newline}(.*?){newline}##'
    code_pat = r'```cpp\s*(.*?)\s*```'            # code fence
    class_pat = r'class\s+(\w+)\s*(?:[:{])'       # optional `:`

    policy_name = _extract(name_pat,  content)
    policy_desc = _extract(desc_pat,  content)
    cpp_code    = _extract(code_pat,   content)
    class_name  = _extract(class_pat,  cpp_code or "")  # search only in code

    return policy_name, policy_desc, cpp_code, class_name


In [13]:
policy_name, policy_desc, cpp_code, class_name = parse_policy_content("generated_policies/generated_policy2.txt")

print("Name :", policy_name)
print("Desc :", policy_desc[:80], "…")
print("Code :", cpp_code)
print("Class:", class_name)

Name : ASTAR-DSRRIP
Desc : ASTAR-DSRRIP combines signature-based SHiP feedback with Dynamic Static Re-Refer …
Code : #include <vector>
#include <cstdint>
#include <iostream>
#include "../inc/champsim_crc2.h"

#define NUM_CORE    1
#define LLC_SETS    (NUM_CORE * 2048)
#define LLC_WAYS    16

// SRRIP parameters
static const uint8_t  maxRRPV    = 3;

// Signature History Counter Table parameters
static const uint32_t SHCT_SIZE  = 1024;
static const uint8_t  SHCT_MAX   = 3;

// Replacement state
static uint8_t   rrpv[LLC_SETS][LLC_WAYS];
static uint16_t  blockSig[LLC_SETS][LLC_WAYS];
static uint8_t   SHCT[SHCT_SIZE];

// Eviction feedback
static uint16_t  pendingSig[LLC_SETS];
static uint8_t   pendingRRPV[LLC_SETS];
static bool      pendingValid[LLC_SETS];

// Statistics
static uint64_t total_accesses;
static uint64_t total_hits;
static uint64_t total_misses;
static uint64_t bypass_count;

// Initialize replacement state
void InitReplacementState() {
    std::cout << "Initialize ASTAR-DS

In [14]:
test_cc = Path("./ChampSim_CRC2/example/test.cc")
test_cc.write_text(cpp_code, encoding="utf-8")

4896

In [15]:
result = subprocess.run(
    f"g++ -Wall -std=c++17 -o ChampSim_CRC2/example/test.out ChampSim_CRC2/example/test.cc ChampSim_CRC2/lib/config1.a",
    shell=True, capture_output=True, text=True)

if result.stderr.strip():              # any warnings or errors?
    stderr_log = result.stderr

In [17]:
run_cmd = [
    "ChampSim_CRC2/example/test.out",
    "-warmup_instructions", "2000000",
    "-simulation_instructions", "10000000",
    "-traces", "ChampSim_CRC2/trace/astar_313B.trace.gz"
]
result = subprocess.run(run_cmd, check=True, capture_output=True, text=True)
output = result.stdout

In [18]:
print(output)


*** ChampSim Multicore Out-of-Order Simulator ***

Warmup Instructions: 2000000
Simulation Instructions: 10000000
Configuration: 1
Number of CPUs: 1
LLC sets: 2048
LLC ways: 16
Off-chip DRAM Channels: 1 Width: 64-bit Data Rate: 1600 MT/s

CPU 0 runs ChampSim_CRC2/trace/astar_313B.trace.gz
Initialize ASTAR-DSRRIP replacement state
Heartbeat CPU 0 instructions: 1000001 cycles: 678651 heartbeat IPC: 1.47351 cumulative IPC: 1.47351 (Simulation time: 0 hr 0 min 6 sec) 
[Heartbeat] accesses=27181 hits=5724 misses=16637 bypasses=10544
Heartbeat CPU 0 instructions: 2000003 cycles: 1269477 heartbeat IPC: 1.69255 cumulative IPC: 1.57545 (Simulation time: 0 hr 0 min 12 sec) 
[Heartbeat] accesses=169010 hits=29679 misses=81521 bypasses=87489

Warmup complete CPU 0 instructions: 2000003 cycles: 1269477 (Simulation time: 0 hr 0 min 12 sec) 

Heartbeat CPU 0 instructions: 3000003 cycles: 8678335 heartbeat IPC: 0.134974 cumulative IPC: 0.134974 (Simulation time: 0 hr 0 min 18 sec) 
[Heartbeat] access

In [19]:
m = re.search(
    r"LLC TOTAL\s+ACCESS:\s+(\d+)\s+HIT:\s+(\d+)\s+MISS:\s+(\d+)",
    output
)
if not m:
    raise RuntimeError("Failed to find LLC TOTAL statistics in Champsim output")

accesses = int(m.group(1))
hits     = int(m.group(2))
misses   = int(m.group(3))

hit_rate = hits / accesses   # e.g. 10423 / 33934 ≈ 0.3073
print(f"Measured LLC hit rate: {hit_rate:.2%}")

Measured LLC hit rate: 17.66%


In [34]:
db = sqlite3.connect("funsearch.db")
c  = db.cursor()

# Fill in these from your context:
workload             = "Astar"
policy_name          = policy_name         # from your parser
policy_description   = policy_description[-1]  # from your parser
workload_description = workload_description  # from your RAG
cpp_file_path        = str(test_cc)               # always this path
score                = hit_rate                   # or your custom scoring

c.execute(
    """
    INSERT INTO experiments
      ( workload,
        policy,
        policy_description,
        workload_description,
        cpp_file_path,
        cache_hit_rate,
        score
      )
    VALUES (?, ?, ?, ?, ?, ?, ?)
    """,
    (
        workload,
        policy_name,
        policy_description,
        workload_description,
        cpp_file_path,
        hit_rate,
        score
    )
)

db.commit()
db.close()

print("✅ New experiment recorded in funsearch.db")

✅ New experiment recorded in funsearch.db
