## Get LLM response for datasets

### Model Definition

In [6]:
import os
from openai import OpenAI
from pathlib import Path
import json
import pandas as pd
import sys
sys.path.append('../configs')
from importlib import reload
from config import directory_data, llm_test_config

print(directory_data)

{'processed_dataset_dir': '../data/processed', 'deepseek': {'baseline': {'response_dir': '../data/output/response/deepseek/baseline', 'eval_dir': '../data/output/evaluation/deepseek/baseline'}, 'mitigation': {'response_dir': '../data/output/response/deepseek/mitigation', 'eval_dir': '../data/output/evaluation/deepseek/mitigation'}}}


In [7]:

class LLMModel:
    def __init__(self, model_name, api_key):
        self.client = OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=api_key,
        )
        self.model_name = model_name

    def get_response(self, prompt, temperature=0.01, max_tokens=500): # Low temperature for reproducability
        completion = self.client.chat.completions.create(
            model=self.model_name,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
            max_tokens=max_tokens,
        )
        return completion.choices[0].message

def create_llm_instance(alias):
    # Find the model config by alias
    model_config = next((m for m in llm_test_config["models"] if m["alias"] == alias), None)
    if not model_config:
        raise ValueError(f"Alias '{alias}' not found in llm_test_config models")
    # Get API key from config
    api_key = llm_test_config.get("api_key")
    if not api_key:
        raise ValueError("API key not found in llm_test_config")
    return LLMModel(model_config["model_name"], api_key)

def get_llm_response(llm_instance, prompt):
    return llm_instance.get_response(prompt)


In [2]:
# Create instance
deepseek_llm = create_llm_instance("deepseek") # use alias from llm_test_config

# Test model response

response = get_llm_response(deepseek_llm, "say hello")
print(response)

ChatCompletionMessage(content='Hello! How can I assist you today?', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None)


## Generate and Save LLM Response

In [3]:
def generate_prompt(record):
    prompt = (
        f"{record['prompt']}\n"
        f"Option 1: {record['option_1']}\n"
        f"Option 2: {record['option_2']}"
    )
    return prompt

def generate_response(
    jsonl_input_file: Path, 
    jsonl_output_file_path: Path,
    llm_instance: LLMModel
):
    
    records = []
    with open(jsonl_input_file, "r", encoding="utf-8") as f:
        for line in f:
            records.append(json.loads(line))

    results = []
    for record in records:
        prompt = generate_prompt(record)
        raw_response = get_llm_response(llm_instance, prompt)
        record["raw_response"] = raw_response.content if hasattr(raw_response, "content") else str(raw_response)
        results.append(record)

    # Ensure output directory exists
    jsonl_output_file_path.mkdir(parents=True, exist_ok=True)

    # Save dataset with same file name in output/response dir
    output_filename = jsonl_input_file.name
    output_file_path = jsonl_output_file_path / output_filename

    # Write records as JSONL
    with open(output_file_path, "w", encoding="utf-8") as f_out:
        for res in results:
            json_line = json.dumps(res, ensure_ascii=False)
            f_out.write(json_line + "\n")

    print(f"Results saved to {output_file_path}")

In [4]:
generate_response(
    jsonl_input_file = Path(directory_data["processed_dataset_dir"]) / "combined_datasets.jsonl",
    jsonl_output_file_path = Path(directory_data["deepseek"]["baseline"]["response_dir"]),
    llm_instance = deepseek_llm
)


Results saved to ..\data\output\response\deepseek\baseline\combined_datasets.jsonl
