In [None]:

import pandas as pd
import random
import time
import uuid

# =========================================================
# CONFIGURATION

INPUT_CSV = r"C:\Users\mikha\OneDrive\Documents\Demos\AI Observability\OpenAI_MockResponses.csv"
OUTPUT_CSV = r"C:\Users\mikha\OneDrive\Documents\Demos\AI Observability\llm_observability_demo.csv"

NUM_MODELS = [
    "gpt-4.1",
    "gpt-4o-mini",
    "gpt-3.5-turbo",
    "llama2-13b",
    "mistral-7b-instruct"
]

RESPONSE_QUALITY = ["Good", "Average", "Poor"]
RESPONSE_TYPE = ["Informative", "Creative", "Actionable"]
COMPLEXITY_LEVELS = ["low", "medium", "high"]

LATENCY_RANGE = (0.1, 1.5)
INPUT_TOKENS_RANGE = (10, 80)
OUTPUT_TOKENS_RANGE = (30, 200)
READABILITY_RANGE = (30, 90)
COMPLEXITY_RANGE = (1, 10)

RANDOM_SEED = 42
random.seed(RANDOM_SEED)

# =========================================================
# LOAD INPUT CSV

df_input = pd.read_csv(INPUT_CSV)
num_input_rows = len(df_input)
print(f"Loaded {num_input_rows} rows from {INPUT_CSV}")

# =========================================================
# GENERATE DATASET

rows = []

for idx, row_input in df_input.iterrows():
    # deterministic per-row seed for reproducibility
    row_seed = RANDOM_SEED + idx
    random.seed(row_seed)
    
    for model in NUM_MODELS:
        # Reset seed for each model to get deterministic variation per model
        random.seed(row_seed + hash(model) % 10000)
        
        row = row_input.to_dict()  # preserve all input columns
        
        # Add deterministic trace_id
        uuid_input = f"{idx}-{model}"
        trace_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, uuid_input))
        
        # Add categorical descriptors
        row.update({
            "trace_id": trace_id,
            "model": model,
            "response_quality": random.choice(RESPONSE_QUALITY),
            "response_type": random.choice(RESPONSE_TYPE),
            "complexity": random.choice(COMPLEXITY_LEVELS),
            "generation_timestamp": time.time(),
        })
        
        # Add deterministic metrics
        model_output = row_input["mock_response"]
        input_tokens = random.randint(*INPUT_TOKENS_RANGE)
        output_tokens = random.randint(*OUTPUT_TOKENS_RANGE)
        row.update({
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "total_tokens": input_tokens + output_tokens,
            "latency_sec": round(random.uniform(*LATENCY_RANGE),3),
            "estimated_cost_usd": round((input_tokens/1000)*0.0002 + (output_tokens/1000)*0.0008,6),
            "num_sentences": len(model_output.split(".")),
            "num_words": len(model_output.split()),
            "num_special_tokens": random.randint(0,5),
            "prompt_complexity_score": random.randint(*COMPLEXITY_RANGE),
            "output_readability_score": random.randint(*READABILITY_RANGE),
            "hallucination_score": round(random.uniform(0,1),3),
            "repetition_score": round(random.uniform(0,1),3),
            "sentiment_score": round(random.uniform(-1,1),3),
            "response_variation_score": round(random.uniform(0,1),3),
            "toxicity_score": round(random.uniform(0,1),3),
            "model_version": f"v{random.randint(1,3)}.{random.randint(0,9)}.{random.randint(0,9)}",
            "num_edits_required": random.randint(0,5)
        })
        
        rows.append(row)

# =========================================================
# BUILD DATAFRAME AND SAVE CSV

df_output = pd.DataFrame(rows)
print(f"Generated {len(df_output)} rows")  # Should be 200 x 5 = 1000
df_output.to_csv(OUTPUT_CSV, index=False)
print(f"Dataset saved to '{OUTPUT_CSV}'")


Loaded 110 rows from C:\Users\mikha\OneDrive\Documents\Demos\AI Observability\OpenAI_MockResponses.csv
Generated 550 rows
Dataset saved to 'C:\Users\mikha\OneDrive\Documents\Demos\AI Observability\llm_observability_demo.csv'
