In [69]:
import os
import asyncio
import aiohttp
import json
import re
from random import shuffle
import random
import pandas as pd
from dataclasses import dataclass

In [70]:
from card_utils import read_all_game_data, Card, PlayerCards, PlayerStats

In [71]:
random.seed(42)

In [72]:
from deck_rate_fetcher import fetch_deck_rating

In [73]:
import dotenv
dotenv.load_dotenv()

import nest_asyncio
nest_asyncio.apply()

In [74]:
from langchain_openai import AzureChatOpenAI
from openai import AsyncAzureOpenAI
from langchain_core.messages import SystemMessage, HumanMessage

In [75]:
game_cards_list, player_cards_list, player_stats, player_tags_list = read_all_game_data()

In [76]:
top_decks_per_card = pd.read_json("data/top_decks_per_card.json").to_dict()

In [77]:
for card_name in top_decks_per_card:
    for key in top_decks_per_card[card_name].keys():
        deck_list = top_decks_per_card[card_name][key].split(',')
        shuffle(deck_list)
        top_decks_per_card[card_name][key] = deck_list

In [78]:
@dataclass
class LLMModel:
    name: str
    instance: AzureChatOpenAI | AsyncAzureOpenAI

In [79]:
llm_gpt3 = LLMModel(
    name="gpt-35-turbo",
    instance=AzureChatOpenAI(
        model_name="gpt-35-turbo",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
    )
)

llm_gpt4o = LLMModel(
    name="gpt-4o",
    instance=AzureChatOpenAI(
        model_name="gpt-4o",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
    )
)

llm_gpt5 = LLMModel(
    name="gpt-5-chat",
    instance=AzureChatOpenAI(
        model_name="gpt-5-chat",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
    )
)

In [80]:
class SimpleAIMessage:
    """Clase simple para empaquetar la respuesta con un atributo .content"""
    def __init__(self, content, finish_reason=None):
        self.content = content
        self.finish_reason = finish_reason

class OpenAIClientAdapter:
    def __init__(
        self,
        client: AsyncAzureOpenAI,
        model_name: str,
        temperature: float = 0,
        max_tokens: int = 2048,
        request_timeout: int = 300
    ):
        self.client = client
        self.model_name = model_name
        self.temperature = temperature
        self.max_tokens = max_tokens
        self.request_timeout = request_timeout
        print(f"Adaptador creado para el modelo: {self.model_name}")

    def _convert_lc_messages_to_dict(self, messages):
        output = []
        for msg in messages:
            role = msg.type
            if role == "human":
                role = "user"
            elif role == "ai":
                role = "assistant"
            output.append({"role": role, "content": msg.content})
        return output

    async def ainvoke(self, messages):
        try:
            dict_messages = self._convert_lc_messages_to_dict(messages)
            
            resp = await self.client.chat.completions.create(
                model=self.model_name,
                messages=dict_messages,
                temperature=self.temperature,
                max_tokens=self.max_tokens,
                extra_body={"max_output_tokens": self.max_tokens},
                stop=None,
                timeout=self.request_timeout,
            )

            choice = resp.choices[0]
            content = choice.message.content
            finish_reason = getattr(choice, "finish_reason", None)

            if finish_reason and finish_reason != "stop":
                print(f"[{self.model_name}] finish_reason={finish_reason}")

            return SimpleAIMessage(content=content, finish_reason=finish_reason)

        except Exception as e:
            print(f"Error en el adaptador de OpenAI ({self.model_name}): {e}")
            return SimpleAIMessage(content=None)


In [81]:
print("Creando clientes y adaptadores...")

async_openai_client = AsyncAzureOpenAI(
    azure_endpoint=dotenv.get_key(dotenv.find_dotenv(), "AZURE_OPENAI_ENDPOINT"),
    api_key=dotenv.get_key(dotenv.find_dotenv(), "AZURE_OPENAI_API_KEY"),
    api_version="2024-05-01-preview"
)

llm_grok_adapter = LLMModel(
    name="grok-4-fast-non-reasoning",
    instance=OpenAIClientAdapter(
        client=async_openai_client,
        model_name="grok-4-fast-non-reasoning",
        temperature=0
    )
)

llm_deepseek_adapter = LLMModel(
    name="DeepSeek-V3.1",
    instance=OpenAIClientAdapter(
        client=async_openai_client,
        model_name="DeepSeek-V3.1",
        temperature=0
    )
)

llm_llama_adapter = LLMModel(
    name="Llama-3.3-70B-Instruct",
    instance=OpenAIClientAdapter(
        client=async_openai_client,
        model_name="Llama-3.3-70B-Instruct",
        temperature=0
    )
)

Creando clientes y adaptadores...
Adaptador creado para el modelo: grok-4-fast-non-reasoning
Adaptador creado para el modelo: DeepSeek-V3.1
Adaptador creado para el modelo: Llama-3.3-70B-Instruct


### Test

In [82]:
import logging
import time
import sys

LOG_LEVEL = logging.INFO 

logging.basicConfig(
    level=LOG_LEVEL,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("logs/app_run{}.log".format(time.strftime('%Y%m%d_%H%M%S'))), 
        logging.StreamHandler(sys.stdout) 
    ],
    force=True 
)

logging.info("Logger configurado exitosamente.")

2025-11-28 01:33:24,140 - root - INFO - Logger configurado exitosamente.


In [83]:
import os

def process_template_text(text):
    """
    Aplica la lógica de formateo específica de tu notebook:
    1. Escapa las llaves originales para que no rompan el .format().
    2. Aplana los saltos de línea y escapa comillas.
    3. Reemplaza los marcadores especiales $ y % por llaves de formato reales.
    """
    text = text.replace("{", "{{").replace("}", "}}")
    text = text.replace("\n", " ").replace('"', '\\"')
    text = text.replace("$", "{").replace("%", "}")
    return text

def load_prompts_from_dir(directory="prompts"):
    prompts_dict = {}
    
    if not os.path.exists(directory):
        print(f"Advertencia: El directorio '{directory}' no existe.")
        return prompts_dict

    print(f"Cargando prompts desde '{directory}'...")
    
    files = [f for f in os.listdir(directory) if f.endswith(".txt")]
    
    for filename in files:
        # El nombre de la clave será el nombre del archivo sin extensión
        # Ej: "human_prompt_context_lite.txt" -> "human_prompt_context_lite"
        key_name = os.path.splitext(filename)[0]
        file_path = os.path.join(directory, filename)
        
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
            
        # Mantenemos la lógica original:
        # Si el archivo tiene "system" en el nombre, se carga crudo.
        # Si no (es un human/template), se procesa para .format().
        if "system" in key_name:
            prompts_dict[key_name] = content
        else:
            prompts_dict[key_name] = process_template_text(content)
            
        print(f" - Cargado: {key_name}")

    return prompts_dict

# Ejecución
prompts_data = load_prompts_from_dir("prompts")

# Extracción de variables individuales (Opcional, por compatibilidad hacia atrás)
# Si tu código más abajo usa explícitamente estas variables, puedes asignarlas así:
system_prompt = prompts_data.get("system_prompt_english", "")

Cargando prompts desde 'prompts'...
 - Cargado: human_prompt_context_multistep
 - Cargado: system_prompt
 - Cargado: human_prompt_no_context_english
 - Cargado: human_prompt_context_semi_lite
 - Cargado: human_prompt_context_lite_english
 - Cargado: human_prompt_no_context
 - Cargado: human_prompt_no_context_lite_english
 - Cargado: human_prompt_no_context_lite
 - Cargado: human_prompt_context_multistep_english
 - Cargado: human_prompt_context_english
 - Cargado: human_prompt_context_lite
 - Cargado: system_prompt_english
 - Cargado: human_prompt_no_context_semi_lite
 - Cargado: human_prompt_no_context_semi_lite_english
 - Cargado: human_prompt_context
 - Cargado: human_prompt_context_semi_lite_english


In [84]:
import uuid
import logging
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional

# --- Estructuras de Datos ---

@dataclass
class PromptConfig:
    name: str  # ej: "no_context_lite"
    template: str
    requires_context: bool

@dataclass
class TaskPayload:
    execution_id: str
    user_id: str
    llm_name: str
    prompt_name: str
    prompt_text: str
    deck_original: List[str]      # Nombres de cartas originales (las 4 fijas)
    cards_available: List[str]    # Pool disponible
    cards_deleted: List[str]      # Las 4 que borramos (para validar si el LLM las adivina)

# --- Utilidad de Parseo de JSON (Solo extracción) ---

def extract_json_payload(text: str) -> Optional[Dict]:
    """Intenta extraer y parsear un bloque JSON de un string sucio."""
    if not text:
        return None
    try:
        # Intento 1: Buscar bloques de código markdown ```json ... ```
        m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE)
        candidate = m.group(1).strip() if m else text.strip()

        # Intento 2: Buscar los límites de llaves {} o corchetes []
        first_brace = candidate.find("{")
        first_bracket = candidate.find("[")
        starts = [i for i in (first_brace, first_bracket) if i != -1]
        
        if not starts:
            return None

        start = min(starts)
        end = max(candidate.rfind("}"), candidate.rfind("]"))
        
        if end == -1 or end < start:
            return None
            
        candidate = candidate[start:end+1]
        return json.loads(candidate)
    except Exception:
        return None

In [85]:
def write_prompt_file(string, user_id):
    with open(f"final_prompts_lite/user_{user_id}.txt", "w", encoding="utf-8") as f:
        f.write(string)

In [86]:
def generate_human_prompt(human_prompt_template, top_decks, available_cards, selected_cards):
    rendered_human_prompt = human_prompt_template.format(
        TOP_DECKS=top_decks,
        CARTAS_DISPONIBLES=available_cards,
        CARTAS_SELECCIONADAS=selected_cards
    )
    return rendered_human_prompt

In [87]:
# --- Función Auxiliar: Barajar y Borrar Cartas ---
def shuffle_and_remove_cards(player_cards_list, num_to_remove=4):
    updated_player_cards_list = []
    for player_cards in player_cards_list:
        available_copy = list(player_cards.available_cards)
        selected_copy = list(player_cards.deck_cards)

        shuffle(available_copy)
        shuffle(selected_copy)

        to_delete = selected_copy[-num_to_remove:]
        selected_for_prompt = selected_copy[:-num_to_remove]

        updated_player_cards = PlayerCards(
            tag=player_cards.tag,
            available_cards=available_copy,
            deck_cards=selected_for_prompt,
            deleted_cards=to_delete
        )
        updated_player_cards_list.append(updated_player_cards)
    return updated_player_cards_list

# --- Lógica 1: Construcción del Prompt ---
def build_prompt_text(config: PromptConfig, player_cards: Any, top_decks_map: Dict) -> str:
    available_json = json.dumps([c.name for c in player_cards.available_cards])
    deck_json = json.dumps([c.name for c in player_cards.deck_cards])
    
    if config.requires_context:
        # Construcción del contexto XML
        string_top_decks = "<TopDecks>\n"
        for card in player_cards.deck_cards:
            card_name = card.name
            string_top_decks += f'  <CardGroup name="{card_name}">\n'
            decks = top_decks_map.get(card_name, {})
            for d_key, d_list in decks.items():
                string_top_decks += "    <Deck>\n"
                for c_name in d_list:
                    string_top_decks += f'      <Card>{c_name.strip()}</Card>\n'
                string_top_decks += "    </Deck>\n"
            string_top_decks += "  </CardGroup>\n"
        string_top_decks += "</TopDecks>"
        
        return config.template.format(
            TOP_DECKS=string_top_decks,
            CARTAS_DISPONIBLES=available_json,
            CARTAS_SELECCIONADAS=deck_json
        )
    else:
        return config.template.format(
            CARTAS_DISPONIBLES=available_json,
            CARTAS_SELECCIONADAS=deck_json
        )

# --- Lógica 2: Procesamiento de una Tarea (LLM Only) ---
async def process_single_task(llm_instance: Any, task: TaskPayload, file_lock: asyncio.Lock, output_file: str):
    messages = [
        SystemMessage(content=system_prompt), 
        HumanMessage(content=task.prompt_text)
    ]
    
    response_content = None
    parsed_json = None
    error_msg = None
    
    try:
        ai_msg = await llm_instance.instance.ainvoke(messages)
        response_content = ai_msg.content
        
        parsed_data = extract_json_payload(response_content)
        
        # Normalizar respuesta a {"seleccion": [...]}
        if parsed_data:
            if isinstance(parsed_data, list):
                 parsed_json = {"seleccion": parsed_data}
            elif isinstance(parsed_data, dict):
                 # Si viene {"data": ...} o {"seleccion": ...}
                 if "seleccion" in parsed_data:
                     parsed_json = parsed_data
                 elif "data" in parsed_data:
                     parsed_json = {"seleccion": parsed_data["data"]}
                 else:
                     parsed_json = {"seleccion": parsed_data} # Fallback
            else:
                 parsed_json = {"seleccion": []} # Formato irreconocible

    except Exception as e:
        error_msg = str(e)
        logging.error(f"Error en tarea {task.execution_id}: {e}")

    # Registro final
    result_record = {
        "execution_id": task.execution_id,
        "timestamp": pd.Timestamp.now().isoformat(),
        "user_id": task.user_id,
        "llm": task.llm_name,
        "prompt_type": task.prompt_name,
        "original_deck": task.deck_original,
        "deleted_cards": task.cards_deleted,
        "raw_response": response_content,
        "parsed_selection": parsed_json.get("seleccion") if parsed_json else None,
        "is_parsed": parsed_json is not None,
        "error": error_msg
    }

    async with file_lock:
        with open(output_file, 'a', encoding='utf-8') as f:
            f.write(json.dumps(result_record, ensure_ascii=False) + '\n')

# --- Lógica 3: Pipeline Principal de Generación ---
async def run_generation_pipeline(
    player_cards_list: List[Any],
    models: List[Any],
    prompts_config: List[PromptConfig],
    user_range: tuple = (0, None), 
    batch_size: int = 10,
    output_filename: str = "generation_results.jsonl"
):
    # 1. Preparar Usuarios (Shuffle + Slice Customizado)
    start, end = user_range
    active_players = shuffle_and_remove_cards(player_cards_list, num_to_remove=4)
    
    if end is None:
        target_players = active_players[start:]
    else:
        target_players = active_players[start:end]
        
    logging.info(f"Procesando {len(target_players)} usuarios (Rango: {start}-{end if end else 'End'}).")

    # 2. Generar Tareas (Usuario x Modelo x Prompt)
    all_tasks = []
    
    for player in target_players:
        player_tag = player.tag[1:]
        # Pre-computar prompt text para guardarlo si fuera necesario debugging
        current_deck_names = [c.name for c in player.deck_cards]
        deleted_names = [c.name for c in player.deleted_cards]
        available_names = [c.name for c in player.available_cards]
        
        for llm in models:
            for prompt_cfg in prompts_config:
                
                try:
                    p_text = build_prompt_text(prompt_cfg, player, top_decks_per_card)
                    
                    task = TaskPayload(
                        execution_id=uuid.uuid4().hex,
                        user_id=player_tag,
                        llm_name=llm.name,
                        prompt_name=prompt_cfg.name,
                        prompt_text=p_text,
                        deck_original=current_deck_names,
                        cards_available=available_names,
                        cards_deleted=deleted_names
                    )
                    all_tasks.append((llm, task))
                except Exception as e:
                    logging.error(f"Error construyendo prompt para {player_tag}: {e}")

    # 3. Barajar para alternar prompts/modelos
    random.shuffle(all_tasks)
    logging.info(f"Total de tareas generadas: {len(all_tasks)}")
    
    # 4. Ejecutar
    file_lock = asyncio.Lock()
    total_batches = (len(all_tasks) + batch_size - 1) // batch_size
    
    for i in range(0, len(all_tasks), batch_size):
        batch = all_tasks[i:i + batch_size]
        logging.info(f"--- Batch Generación {i//batch_size + 1}/{total_batches} (Size: {len(batch)}) ---")
        
        coroutines = [
            process_single_task(llm, task, file_lock, output_filename) 
            for (llm, task) in batch
        ]
        
        await asyncio.gather(*coroutines, return_exceptions=True)
        # Breve pausa para no saturar APIs
        await asyncio.sleep(0.5)

    logging.info(f"Pipeline de generación finalizado. Resultados en: {output_filename}")
    return output_filename

In [88]:
# --- Utilidades de Rating (Re-declarar si se borraron arriba) ---
score_mapping = {
    "RIP": 0, "Bad": 1, "Mediocre": 2, "Good": 3, "Great!": 4, "Godly!": 5,
}

def process_deck_rating(rating: str) -> dict:
    if not rating: return {}
    parts = rating.strip().split(" ")
    # Manejo de error simple por si el string no tiene el formato esperado
    try:
        return {
            "Attack": score_mapping.get(parts[1], 0),
            "Defense": score_mapping.get(parts[3], 0),
            "Synergy": score_mapping.get(parts[5], 0),
            "Versatility": score_mapping.get(parts[7], 0),
            "F2P score": score_mapping.get(parts[10], 0),
        }
    except:
        return {}

# --- Pipeline de Evaluación ---
async def run_rating_pipeline(input_filename: str, output_filename: str = "rating_results.jsonl"):
    logging.info(f"Iniciando evaluación de decks desde {input_filename}...")
    
    # 1. Cargar registros previos
    records = []
    try:
        with open(input_filename, 'r', encoding='utf-8') as f:
            for line in f:
                records.append(json.loads(line))
    except FileNotFoundError:
        logging.error("No existe el archivo de generación.")
        return

    # 2. Verificar procesados para no repetir si se corre de nuevo
    processed_ids = set()
    try:
        with open(output_filename, 'r', encoding='utf-8') as f:
             for line in f:
                processed_ids.add(json.loads(line).get("execution_id"))
    except FileNotFoundError:
        pass

    # 3. Función de evaluación individual
    sem = asyncio.Semaphore(5) # Concurrencia limitada para DeckShop
    
    async def rate_entry(record):
        eid = record["execution_id"]
        if eid in processed_ids: return

        async with sem:
            result = {"execution_id": eid, "was_improved": False}
            
            # Solo evaluamos si hubo parseo exitoso
            if record.get("is_parsed") and record.get("parsed_selection"):
                try:
                    # Reconstruir mazos completos
                    original_full = record["original_deck"] + record["deleted_cards"]
                    
                    selection = record["parsed_selection"]
                    # Asumiendo que selección son las cartas faltantes. Si el LLM devuelve todo, ajustar aquí.
                    new_full = record["original_deck"] + (selection if isinstance(selection, list) else [])

                    if len(new_full) == 8:
                        # Llamadas a DeckShop (bloqueantes -> thread pool)
                        loop = asyncio.get_running_loop()
                        
                        r_orig = await loop.run_in_executor(None, fetch_deck_rating, original_full)
                        r_new = await loop.run_in_executor(None, fetch_deck_rating, new_full)
                        
                        scores_orig = process_deck_rating(r_orig)
                        scores_new = process_deck_rating(r_new)
                        
                        total_orig = sum(scores_orig.values())
                        total_new = sum(scores_new.values())
                        
                        result.update({
                            "rating_status": "success",
                            "scores_original": scores_orig,
                            "scores_new": scores_new,
                            "total_original": total_orig,
                            "total_new": total_new,
                            "was_improved": total_new >= total_orig,
                            "correct_selection_count": sum(1 for c in record["deleted_cards"] if c in selection)
                        })
                    else:
                        result["rating_status"] = "invalid_deck_len"
                except Exception as e:
                    result["rating_status"] = f"error: {str(e)}"
            else:
                result["rating_status"] = "skipped_no_parse"

            # Escribir append
            with open(output_filename, 'a', encoding='utf-8') as f_out:
                f_out.write(json.dumps(result, ensure_ascii=False) + '\n')

    # 4. Ejecutar por lotes
    tasks = [rate_entry(rec) for rec in records]
    chunk_size = 10
    total = len(tasks)
    
    for i in range(0, total, chunk_size):
        await asyncio.gather(*tasks[i:i+chunk_size])
        logging.info(f"Evaluados {min(i+chunk_size, total)}/{total}")
        await asyncio.sleep(1) # Pausa amigable

    logging.info("Evaluación finalizada.")

In [89]:
import time

# --- A. Configuración de Ejecución ---

# 1. Define qué prompts quieres evaluar
prompts_to_run = [
    PromptConfig(name="context_multistep_english", template=prompts_data["human_prompt_context_multistep_english"], requires_context=True),
    # PromptConfig(name="semi_lite", template=prompts_data["human_prompt_context_semi_lite"], requires_context=True),
]

# 2. Define qué modelos usar
models_active = [
    llm_gpt3,
    llm_gpt4o,
    llm_gpt5,
    #llm_grok_adapter,
    llm_deepseek_adapter,
    llm_llama_adapter,
] 

# --- B. FASE 1: Generación (LLM) ---
timestamp = time.strftime('%Y%m%d_%H%M%S')
gen_file = f"results/generations_{timestamp}.jsonl"

logging.info(">>> INICIANDO FASE 1: GENERACIÓN LLM")

# AQUÍ defines el rango: (0, 10) procesa los primeros 10. (50, 100) procesa del 50 al 100.
await run_generation_pipeline(
    player_cards_list=player_cards_list,
    models=models_active,
    prompts_config=prompts_to_run,
    user_range=(0, 2),   # <--- MODIFICAR RANGO AQUI
    batch_size=10,       # Tamaño del lote asíncrono
    output_filename=gen_file
)

# --- C. FASE 2: Evaluación (DeckShop) ---
rating_file = f"results/ratings_{timestamp}.jsonl"

logging.info(">>> INICIANDO FASE 2: EVALUACIÓN DECKSHOP")

await run_rating_pipeline(
    input_filename=gen_file,
    output_filename=rating_file
)

logging.info(f"Proceso completo. \nGeneración: {gen_file} \nRatings: {rating_file}")

2025-11-28 01:33:24,196 - root - INFO - >>> INICIANDO FASE 1: GENERACIÓN LLM
2025-11-28 01:33:25,200 - root - INFO - Procesando 2 usuarios (Rango: 0-2).
2025-11-28 01:33:25,201 - root - INFO - Total de tareas generadas: 10
2025-11-28 01:33:25,201 - root - INFO - --- Batch Generación 1/1 (Size: 10) ---
2025-11-28 01:33:32,130 - httpx - INFO - HTTP Request: POST https://victo-mhcmsfx4-eastus2.cognitiveservices.azure.com/openai/deployments/gpt-5-chat/chat/completions?api-version=2025-01-01-preview "HTTP/1.1 200 OK"
2025-11-28 01:33:32,134 - httpx - INFO - HTTP Request: POST https://victo-mhcmsfx4-eastus2.cognitiveservices.azure.com/openai/deployments/gpt-5-chat/chat/completions?api-version=2025-01-01-preview "HTTP/1.1 200 OK"
2025-11-28 01:33:32,866 - httpx - INFO - HTTP Request: POST https://victo-mhcmsfx4-eastus2.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2025-01-01-preview "HTTP/1.1 200 OK"
2025-11-28 01:33:33,668 - httpx - INFO - HTTP Request: P

In [90]:
df = pd.read_json(output_filename, orient="records", lines=True)

NameError: name 'output_filename' is not defined

In [None]:
df['correct_selection_count'] = df.apply(
    lambda row: sum(1 for item in row['deleted'] if item in row['seleccion']),
    axis=1
)
df.head()

Unnamed: 0,seleccion,user_id,deleted,original,llm,with_context,original_deck_rating,selected_deck_rating,total_original_deck_rating,total_selected_deck_rating,was_improved,correct_selection_count
0,"[Goblin Barrel, The Log, Inferno Tower, Minions]",2G22QVP89,"[Royal Recruits, Goblin Cage, Zappies, Arrows]","[Electro Spirit, Flying Machine, Golden Knight...",DeepSeek-V3.1,False,"{'Attack': 3, 'Defense': 5, 'Synergy': 1, 'Ver...","{'Attack': 4, 'Defense': 5, 'Synergy': 2, 'Ver...",15,18,True,0
1,"[Tornado, Ice Golem, Skeletons, Cannon]",LU2QQJU0Y,"[Musketeer, Minions, Mega Knight, Firecracker]","[Electro Wizard, Hog Rider, Executioner, The Log]",DeepSeek-V3.1,False,"{'Attack': 4, 'Defense': 5, 'Synergy': 4, 'Ver...","{'Attack': 2, 'Defense': 5, 'Synergy': 2, 'Ver...",20,14,False,0
2,"[Poison, Skeletons, The Log, Goblin Gang]",999QV8RJU,"[The Log, Goblin Gang, Zap, Mega Knight]","[Wall Breakers, Miner, Cannon, Bats]",DeepSeek-V3.1,False,"{'Attack': 4, 'Defense': 5, 'Synergy': 5, 'Ver...","{'Attack': 3, 'Defense': 5, 'Synergy': 4, 'Ver...",20,17,False,2
3,"[Miner, Poison, Inferno Dragon, Goblin Gang]",VL0QCY9R8,"[Zap, Mega Knight, Bandit, Spear Goblins]","[Firecracker, Bats, Skeletons, Skeleton Barrel]",DeepSeek-V3.1,True,"{'Attack': 3, 'Defense': 5, 'Synergy': 4, 'Ver...","{'Attack': 4, 'Defense': 5, 'Synergy': 5, 'Ver...",20,22,True,0
4,"[Miner, Poison, Goblin Gang, Zap]",L0092LLGP,"[Bandit, Skeleton Barrel, Zap, Firecracker]","[Mega Knight, Spear Goblins, Bats, Skeletons]",DeepSeek-V3.1,True,"{'Attack': 3, 'Defense': 5, 'Synergy': 4, 'Ver...","{'Attack': 3, 'Defense': 5, 'Synergy': 5, 'Ver...",20,19,False,1


In [None]:
df['correct_selection_count'].groupby(df['llm']).value_counts()

llm                     correct_selection_count
DeepSeek-V3.1           0                          7
                        1                          1
                        2                          1
                        3                          1
Llama-3.3-70B-Instruct  0                          6
                        1                          1
Name: count, dtype: int64

In [None]:
res = (
    df.assign(wi=lambda d: d["was_improved"].astype(bool))
      .groupby("llm", as_index=False)
      .agg(total_mazos=("llm", "size"),
           mazos_mejorados=("wi", "sum"))
)
# Agregamos porcentaje de mazos mejorados
res['porcentaje_mejorados'] = (res['mazos_mejorados'] / res['total_mazos']) * 100
print(res)

                      llm  total_mazos  mazos_mejorados  porcentaje_mejorados
0           DeepSeek-V3.1           10                5             50.000000
1  Llama-3.3-70B-Instruct            7                2             28.571429


In [None]:
df_raw = pd.read_json(raw_filename, orient="records", lines=True)

In [None]:
# contamos cuantos validos e invalidos hay en raw_data para cada llm
df_raw['is_valid'] = df_raw['parsed_successfully'].astype(bool)
df_raw['llm'] = df_raw['llm'].astype('category')
df_raw['llm'].cat.categories
df_raw['is_valid'].groupby(df_raw['llm']).value_counts()
# Ponemos el porcentaje de validos por llm
df_raw.groupby('llm')['is_valid'].value_counts(normalize=True).mul(100)

  df_raw['is_valid'].groupby(df_raw['llm']).value_counts()
  df_raw.groupby('llm')['is_valid'].value_counts(normalize=True).mul(100)


llm                     is_valid
DeepSeek-V3.1           False       50.0
                        True        50.0
Llama-3.3-70B-Instruct  False       65.0
                        True        35.0
Name: proportion, dtype: float64

In [None]:
hola_serie = df_raw['raw_response'][(df_raw['user_id'] == '999QV8RJU') & (df_raw['llm'] == 'Llama-3.3-70B-Instruct')]

if not hola_serie.empty:
    texto_completo = hola_serie.iloc[0]
    
    print(texto_completo)
else:
    print("No se encontró esa fila.")

```json
{
  "seleccion": [
    "Zap",
    "Goblin Gang",
    "Mega Knight"
  ]
}
```
