In [None]:
!pip install datasets
!pip install transformers
!pip install huggingface_hub
!pip install openrouter
!pip install python-dotenv
!pip install sentence_transformers
!pip install scikit-learn
!pip install vllm


In [None]:
import os
import random
import json
import pandas as pd
import time
import torch
from typing import Tuple, Dict, List
from tqdm import tqdm
from vllm import LLM
from vllm.sampling_params import SamplingParams
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
from sentence_transformers import SentenceTransformer, util

##Feature Extraction

In [None]:
def parse_cnf(filename):
    num_vars, num_clauses = 0, 0
    clauses = []
    with open(filename) as f:
        for line in f:
            if line.startswith('p'):
                parts = line.split()
                num_vars = int(parts[2])
                num_clauses = int(parts[3])
            elif line.strip() and not line.startswith(('c','p')):
                try:
                    clause = [int(x) for x in line.strip().split() if x != '0']
                    if clause:
                        clauses.append(clause)
                except ValueError:
                    # Skip lines that cannot be parsed as integers
                    continue
    return num_vars, num_clauses, clauses

def compute_features(filename):
    num_vars, num_clauses, clauses = parse_cnf(filename)
    clause_lens = [len(c) for c in clauses]
    num_binary = sum(1 for l in clause_lens if l == 2)
    num_ternary = sum(1 for l in clause_lens if l == 3)
    num_horn = sum(1 for c in clauses if sum(1 for lit in c if lit > 0) <= 1)
    features = {
        'filename': os.path.basename(filename),
        'num_vars': num_vars,
        'num_clauses': num_clauses,
        'clause_var_ratio': num_clauses / num_vars if num_vars else 0,
        'fraction_binary': num_binary / num_clauses if num_clauses else 0,
        'fraction_ternary': num_ternary / num_clauses if num_clauses else 0,
        'fraction_horn': num_horn / num_clauses if num_clauses else 0,
    }
    return features

###Sanity Check


In [None]:
test_cnf = "/content/uf20-04.cnf.txt"
features = compute_features(test_cnf)
print(features)

##Strategy Analysis

In [None]:
# DeepSeek-R1 (paper) --> DeepSeek-R1-Distill-Qwen-32B
model_name_strategy = "deepseek-ai/DeepSeek-V2-Lite" # "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
tokenizer_strategy = AutoTokenizer.from_pretrained(model_name_strategy)
model_strategy = AutoModelForCausalLM.from_pretrained(
    model_name_strategy,
    device_map="auto",
    dtype=torch.float16,
    offload_folder="offload_dir",
)
strategy_generator = pipeline(
    "text-generation",
    model=model_strategy,
    tokenizer=tokenizer_strategy,
)

##Code Patch Generation

In [None]:
# Claude-3.7 (paper) --> Qwen3-32B
model_name_code = "Qwen/Qwen2.5-Coder-14B" # "Qwen/Qwen3-32B"
tokenizer_code = AutoTokenizer.from_pretrained(model_name_code)
model_code = AutoModelForCausalLM.from_pretrained(model_name_code, device_map="auto", dtype=torch.float16)
code_generator = pipeline(
    "text-generation",
    model=model_code,
    tokenizer=tokenizer_code,
)

### LLM Sanity Check

In [None]:
# model_ids = ["deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "Qwen/Qwen2.5-32B"]
model_ids = ["deepseek-ai/DeepSeek-V2-Lite", "Qwen/Qwen2.5-Coder-14B"]

prompt = "What is the capital of France?"

for model_id in model_ids:
    print(f"Testing model: {model_id}")

    llm = LLM(model=model_id, gpu_memory_utilization=0.9, max_model_len=2048, dtype="float16")
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    input_text = tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False)
    sampling_params = SamplingParams(temperature=0.7, max_tokens=32)
    result = llm.generate(input_text, sampling_params)
    output = [output.outputs[0].text for output in result]
    print(f"Model response: {output[0]}\n")


##Prompting & LLM Wrapper

In [None]:
def prepare_strategy_prompt(features: dict) -> str:
    prompt = (f"You are a researcher optimizing the heuristics of a SAT Solver. Given these observed features about the formula:\n"
              f"Clause/Variable ratio: {features['clause_var_ratio']:.2f}\n"
              f"Fraction of binary clauses: {features['fraction_binary']:.2f}\n"
              f"Fraction of ternary clauses: {features['fraction_ternary']:.2f}\n"
              f"Fraction of Horn clauses: {features['fraction_horn']:.2f}\n\n"
              f"Suggest an effective heuristic modification for the restart function in Kissat.\n"
              f"Explain briefly why this would improve solver performance.")
    return prompt

def generate_strategy_patch(features: dict) -> str:
    prompt = prepare_strategy_prompt(features)
    print("\n[INFO] Sending prompt to strategy LLM. Prompt preview:")
    print(prompt[:300] + "..." if len(prompt) > 300 else prompt)
    st = time.time()
    response = strategy_generator(
        prompt,
        max_length=512,
        do_sample=True,
        temperature=0.6
    )
    et = time.time()
    print(f"[INFO] Strategy LLM responded in {et-st:.2f} seconds.")
    return response[0]['generated_text']

def prepare_code_prompt(strategy_text: str) -> str:
    prompt = (f"You are a developer optimizing the restart heuristic of a SAT solver.\n"
              f"Based on the following analysis and suggested improvements:\n{strategy_text}\n\n"
              f"Please write a concise C code patch implementing the suggested heuristic.")
    return prompt

def generate_code_patch(strategy_text: str) -> str:
    prompt = prepare_code_prompt(strategy_text)
    print("\n[INFO] Sending prompt to code LLM. Prompt preview:")
    print(prompt[:300] + "..." if len(prompt) > 300 else prompt)
    st = time.time()
    response = code_generator(
        prompt,
        max_length=1024,
        do_sample=True,
        temperature=0.6
      )
    et = time.time()
    print(f"[INFO] Code LLM responded in {et-st:.2f} seconds.")
    return response[0]['generated_text']

def generate_chatgpt_patch(prompt: str) -> str:
    print("\n[INFO] Sending prompt to ChatGPT. Prompt preview:")
    print(prompt[:300] + "..." if len(prompt) > 300 else prompt)
    st = time.time()
    response = openai.ChatCompletion.create(
        model="gpt-5.0-turbo",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=512,
        temperature=0.7
    )
    et = time.time()
    print(f"[INFO] ChatGPT responded in {et-st:.2f} seconds.")
    return response['choices'][0]['message']['content']

###Example Run

In [None]:
import datetime

# Prepare log dataframe
log_columns = ['timestamp', 'cnf_file', 'features', 'strategy_prompt',
               'strategy_response', 'code_prompt', 'code_response',
               'chatgpt_prompt', 'chatgpt_response']
logs = pd.DataFrame(columns=log_columns)

cnf_path = "/content/uf50-05.cnf.txt"
print("[INFO] Starting feature extraction for:", cnf_path)
features = compute_features(cnf_path)
print("[INFO] Finished computing features:", features)

print("[INFO] Generating strategy analysis suggestion...")
strategy_resp = generate_strategy_patch(features)
print("[INFO] Finished generating strategy patch.")

print("[INFO] Generating code patch...")
code_resp = generate_code_patch(strategy_resp)
print("[INFO] Finished generating code patch.")

chatgpt_prompt = (f"Refine and improve this heuristic code patch:\n{code_resp}")
print("[INFO] Generating refinement via ChatGPT (if available)...")
chatgpt_resp = generate_chatgpt_patch(chatgpt_prompt)
print("[INFO] Finished generating ChatGPT refinement.")

# Log the whole flow
log_entry = {
    'timestamp': datetime.datetime.now(),
    'cnf_file': cnf_path,
    'features': features,
    'strategy_prompt': prepare_strategy_prompt(features),
    'strategy_response': strategy_resp,
    'code_prompt': prepare_code_prompt(strategy_resp),
    'code_response': code_resp,
    'chatgpt_prompt': chatgpt_prompt,
    'chatgpt_response': chatgpt_resp
}

logs = logs.append(log_entry, ignore_index=True)

print("Strategy Suggestion:\n", strategy_resp)
print("\nCode Patch:\n", code_resp)
print("\nChatGPT Refinement:\n", chatgpt_resp)


In [None]:
logs.to_csv("logs/generation_log.csv", index=False)
print("Logs saved to logs/generation_log.csv")
