In [None]:
import sys
import os
sys.path.append(os.path.abspath("../afasia_llm"))

import itertools, random, time, warnings, re, json, os, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from afasia_llm.generate import (
    generate_exercises_local,
    generate_exercises_llm,
    is_valid_exercise,
    EXERCISE_BANK,
)

warnings.filterwarnings("ignore")
plt.style.use("default")
sns.set_palette("husl")

In [None]:
# Aplanar el banco de ejercicios
BANK_SET = {
    ex for sev_dict in EXERCISE_BANK.values()
    for lst in sev_dict.values() for ex in lst
}


def novelty_ratio(generated: list[str]) -> float:
    """% de ejercicios que NO están en el banco clínico."""
    if not generated:
        return 0.0
    return sum(e not in BANK_SET for e in generated) / len(generated)

In [7]:
random.seed(42)

APHASIA_TYPES = ["Broca", "Wernicke", "TransMotor"]
SEVERITIES = ["Severe", "Moderate", "Mild"]

patients = [
    {"id": i, "type": t, "sev": s}
    for i, (t, s) in enumerate(
        random.choices(list(itertools.product(APHASIA_TYPES, SEVERITIES)), k=30)
    )
]

df_patients = pd.DataFrame(patients)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Usando dispositivo:", DEVICE)

LLM_NAME = "google/flan-t5-base"
LLM_MODEL = AutoModelForSeq2SeqLM.from_pretrained(LLM_NAME).to(DEVICE)
LLM_TOKENIZER = AutoTokenizer.from_pretrained(LLM_NAME)

In [8]:
rows = []

for p in patients:
    pid, typ, sev = p["id"], p["type"], p["sev"]

    # LLM
    t0 = time.time()
    ex_llm = generate_exercises_llm(sev, typ, "daily activities", n=5, max_tries=3)
    t_llm = time.time() - t0

    # Plantilla
    t0 = time.time()
    ex_loc = generate_exercises_local(sev, typ, "daily activities", n=5)
    t_loc = time.time() - t0

    rows.append(dict(
        id=pid,
        type=typ,
        sev=sev,
        llm_valid=sum(map(is_valid_exercise, ex_llm)),
        loc_valid=sum(map(is_valid_exercise, ex_loc)),
        llm_novel=novelty_ratio(ex_llm),
        loc_novel=novelty_ratio(ex_loc),
        llm_len=np.mean([len(e.split()) for e in ex_llm]),
        loc_len=np.mean([len(e.split()) for e in ex_loc]),
        t_llm=t_llm,
        t_loc=t_loc,
    ))

df = pd.DataFrame(rows)
print(f"Analizados {len(df)} pacientes sinteticos")

KeyboardInterrupt: 

In [None]:
metrics = df[["llm_valid", "loc_valid", "llm_novel", "loc_novel", "t_llm", "t_loc"]].describe().round(2).T
metrics

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

sns.boxplot(data=df.melt(value_vars=["llm_valid", "loc_valid"], var_name="mode", value_name="valid"), x="mode",
            y="valid", ax=axes[0])
axes[0].set_title("% válidos (5=max)")

sns.boxplot(data=df.melt(value_vars=["llm_novel", "loc_novel"], var_name="mode", value_name="novel"), x="mode",
            y="novel", ax=axes[1])
axes[1].set_title("Novedad")
axes[1].set_ylabel("Ratio")

sns.boxplot(data=df.melt(value_vars=["t_llm", "t_loc"], var_name="mode", value_name="time"), x="mode", y="time",
            ax=axes[2])
axes[2].set_title("Tiempo (s)")

plt.tight_layout()
plt.show()

In [None]:
kpis = pd.DataFrame({
    "Metric": [
        "Validez LLM (%)",
        "Novedad LLM (%)",
        "Tiempo LLM p50 (s)",
        "Tiempo LLM p95 (s)",
        "Fallback rate (%)"
    ],
    "Value": [
        df["llm_valid"].mean() * 20,
        df["llm_novel"].mean() * 100,
        df["t_llm"].median(),
        df["t_llm"].quantile(.95),
        100 - df["llm_valid"].mean() * 20
    ]
}).round(2)

kpis

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from sentence_transformers import SentenceTransformer, util

mdl_emb = SentenceTransformer("all-MiniLM-L6-v2")

def extra_metrics(batch: list[str]) -> dict[str, float]:
    toks = [w for ex in batch for w in ex.split()]
    ttr = len(set(toks)) / len(toks) if toks else 0
    embs = mdl_emb.encode(batch, convert_to_tensor=True)
    sims = util.cos_sim(embs, embs).cpu().numpy()
    upper = sims[np.triu_indices(len(batch), k=1)]
    div = 1 - upper.mean() if len(upper) else 0
    sb = np.mean([
        sentence_bleu([e2.split()], e1.split())
        for i, e1 in enumerate(batch) for j, e2 in enumerate(batch) if i != j
    ])
    return dict(
        length_tok=len(toks) / len(batch),
        type_token=ttr * 100,
        embed_div=div * 100,
        self_bleu=sb * 100
    )

llm_items = list(itertools.chain.from_iterable(
    generate_exercises_llm(p["sev"], p["type"], "daily activities", n=5)
    for p in patients[:60]
))

extra = extra_metrics(llm_items)

kpis_extra = pd.DataFrame({
    "Metric": ["Tokens / ex", "Type–token (%)", "Embed diversity (%)", "Self-BLEU (%)"],
    "Value": [round(extra["length_tok"], 2),
              round(extra["type_token"], 1),
              round(extra["embed_div"], 1),
              round(extra["self_bleu"], 1)]
})

kpis = pd.concat([kpis, kpis_extra], ignore_index=True)
display(kpis.reset_index(drop=True))

In [None]:
NAME_VALID = "Validez LLM (%)"
NAME_NOVEL = "Novedad LLM (%)"
NAME_LAT_P95 = "Tiempo LLM p95 (s)"

pretty = (
    df.groupby("type")
    .agg({
        NAME_VALID: ("llm_valid", lambda x: x.mean() * 20),
        NAME_NOVEL: ("llm_novel", lambda x: x.mean() * 100),
        NAME_LAT_P95: ("t_llm", lambda x: x.quantile(.95))
    })
    .round(1)
    .rename_axis("Tipo")
    .reset_index()
)

display(pretty)

# Validez y novedad por tipo
pretty.set_index("Tipo")[[NAME_VALID, NAME_NOVEL]].plot(
    kind="bar", figsize=(6, 4), color=["#2D7142", "#C9A42F"]
)
plt.ylabel("%")
plt.title("Validez / Novedad por tipo de afasia")
plt.ylim(0, 100)
plt.tight_layout()
plt.show()

# Por severidad
sev_stats = (
    df.groupby("sev")
    .agg({NAME_VALID: "mean", NAME_NOVEL: "mean"})
    .mul([20, 100])
    .round(1)
    .rename_axis("Severidad")
)

sev_stats[[NAME_VALID, NAME_NOVEL]].plot(
    kind="bar", figsize=(6, 4), color=["#4E95D9", "#C9A42F"]
)
plt.ylabel("%")
plt.title("Validez / Novedad por severidad")
plt.ylim(0, 100)
plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

sns.countplot(data=df_patients, x="type", ax=axes[0])
axes[0].set_title("Distribución por tipo de afasia")

sns.countplot(data=df_patients, x="sev", order=["Severe", "Moderate", "Mild"], ax=axes[1])
axes[1].set_title("Distribución por severidad")

plt.tight_layout()
plt.show()

# Crosstab conjunto
pd.crosstab(df_patients["type"], df_patients["sev"]).plot(
    kind="bar", stacked=True, colormap="Set2", figsize=(8, 4)
)
plt.title("Distribución conjunta (tipo × severidad)")
plt.ylabel("Nº pacientes")
plt.xlabel("Tipo de afasia")
plt.tight_layout()
plt.show()