In [3]:
pip install gradio

Collecting gradio
  Downloading gradio-5.24.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.16-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (41 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from g

In [1]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from safetensors.torch import load_model

# Configuración
MODEL_PATH = "./modelo"  # Ruta donde están tus archivos
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Cargar tokenizer y config
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Necesario para GPT-2

# Cargar modelo desde safetensors
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    local_files_only=True,
    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
).to(DEVICE)
model.eval()

def comentar_codigo(snippet):
    # Plantilla de prompt (ajusta según tu entrenamiento)
    prompt = f"Comenta este código Python:\n```python\n{snippet}\n```\nCódigo comentado:\n```python\n"
    
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=512
    ).to(DEVICE)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decodificar y limpiar la salida
    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    codigo_comentado = full_output.split("Código comentado:")[-1].strip()
    
    # Eliminar el cierre del code block si existe
    return codigo_comentado.replace("```", "").strip()

# Interfaz mejorada
css = """
.code-output {
    font-family: monospace !important;
    white-space: pre;
}
"""
with gr.Blocks(css=css, title="Comentador GPT-2") as demo:
    gr.Markdown("## 💻 Comentador Automático de Código (GPT-2)")
    
    with gr.Row():
        input_code = gr.Code(
            label="Tu código",
            language="python",
            lines=15,
            elem_classes="code-output"
        )
        output_code = gr.Code(
            label="Código Comentado",
            language="python",
            lines=15,
            elem_classes="code-output"
        )
    
    btn = gr.Button("Generar Comentarios", variant="primary")
    
    # Ejemplos prácticos
    examples = [
        ["def factorial(n):\n    if n == 0:\n        return 1\n    return n * factorial(n-1)"],
        ["class Calculadora:\n    def __init__(self):\n        self.resultado = 0\n\n    def suma(self, a, b):\n        return a + b"],
        ["for i in range(10):\n    print(f'Valor: {i}')"]
    ]
    
    gr.Examples(examples=examples, inputs=input_code, label="Ejemplos")
    
    btn.click(
        fn=comentar_codigo,
        inputs=input_code,
        outputs=output_code
    )

demo.launch(
    server_name="0.0.0.0",
    share=False  # True para enlace público temporal
)

* Running on local URL:  http://0.0.0.0:7860
* To create a public link, set `share=True` in `launch()`.




# Métricas

In [2]:
import evaluate
import pandas as pd
import nltk
nltk.download("punkt")  # Requerido para METEOR


[nltk_data] Downloading package punkt to /home/frank/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
import evaluate
import nltk
import pandas as pd
from datasets import load_dataset
nltk.download("punkt")  # Necesario para METEOR

# ✅ Paso 1: Cargar el dataset
data_files = {
    'train': 'dataset_train.jsonl',
    'validation': 'dataset_valid.jsonl',
    'test': 'dataset_test.jsonl'
}

raw_datasets = load_dataset('json', data_files=data_files)
test_dataset = raw_datasets["test"]


[nltk_data] Downloading package punkt to /home/frank/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
print(test_dataset)

Dataset({
    features: ['repo', 'path', 'func_name', 'original_string', 'language', 'code', 'code_tokens', 'docstring', 'docstring_tokens', 'sha', 'url', 'partition', 'code_clean'],
    num_rows: 19916
})


In [5]:

# ✅ Paso 2: Función para generar comentarios con tu modelo
def comentar_codigo(snippet):
    prompt = f"Comenta este código Python:\n```python\n{snippet}\n```\nCódigo comentado:\n```python\n"
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=512
    ).to(DEVICE)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    codigo_comentado = full_output.split("Código comentado:")[-1].strip()
    return codigo_comentado.replace("```", "").strip()



In [6]:

# ✅ Paso 3: Generar predicciones
preds = []
refs = []

for example in test_dataset:
    entrada = example['code_clean']
    salida_esperada = example["code"]
    comentario_generado = comentar_codigo(entrada)
    
    preds.append(comentario_generado)
    refs.append(salida_esperada)

# ✅ Paso 4: Cargar métricas
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
meteor = evaluate.load("meteor")

# ✅ Paso 5: Evaluar
bleu_score = bleu.compute(predictions=preds, references=refs)
rouge_score = rouge.compute(predictions=preds, references=refs)
meteor_score = meteor.compute(predictions=preds, references=refs)

# ✅ Paso 6: Mostrar resultados
print("\n BLEU:", bleu_score)
print("\n ROUGE:", rouge_score)
print("\n METEOR:", meteor_score)



KeyboardInterrupt: 

# Otra forma

In [30]:
pip install evaluate datasets


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from evaluate import load as load_metric
import numpy as np
from tqdm import tqdm


In [2]:
# Configuración
MODEL_PATH = "./modelo"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    local_files_only=True,
    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
).to(DEVICE)
model.eval()


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [3]:
# Cargar dataset completo
data_files = {
    'train': 'dataset_train_filtrado.jsonl',
    'validation': 'dataset_valid_filtrado.jsonl',
    'test': 'dataset_test_filtrado.jsonl'
}
raw_datasets = load_dataset('json', data_files=data_files)
test_dataset = raw_datasets["test"]

# Tomar solo la tercera parte
subset_size = len(test_dataset) // 10
test_subset = test_dataset.select(range(subset_size))


In [4]:
def comentar_codigo(snippet):
    prompt = f"Comenta este código Python:\n```python\n{snippet}\n```\nCódigo comentado:\n```python\n"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(DEVICE)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.7,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    comentado = full_output.split("Código comentado:")[-1].strip()
    return comentado.replace("```", "").strip()


In [5]:
rouge = load_metric("rouge")
meteor = load_metric("meteor")


[nltk_data] Downloading package wordnet to /home/frank/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/frank/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/frank/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [6]:
refs, preds = [], []

for item in tqdm(test_subset):
    codigo_limpio = item["code_clean"]
    codigo_comentado_ref = item["code"]

    codigo_comentado_pred = comentar_codigo(codigo_limpio)

    refs.append(codigo_comentado_ref)
    preds.append(codigo_comentado_pred)


  0%|                                                  | 0/1991 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  0%|                                        | 1/1991 [00:02<1:22:47,  2.50s/it]The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  0%|                                        | 2/1991 [00:04<1:13:47,  2.23s/it]The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  0%|                                          | 3/1991 [00:05<51:30,  1.55s/it]The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  0%|                                          | 4/1991 [00:07<58:17,  1.76s/it]The following generation flags are not valid and may be ignored:

In [7]:
# BLEU
smoothie = SmoothingFunction().method4
bleu_scores = [
    sentence_bleu(
        [ref.split()],
        pred.split(),
        smoothing_function=smoothie
    ) for ref, pred in zip(refs, preds)
]
bleu_avg = np.mean(bleu_scores)

# ROUGE
rouge_result = rouge.compute(predictions=preds, references=refs, use_stemmer=True)

# METEOR
meteor_result = meteor.compute(predictions=preds, references=refs)

# Mostrar resultados
print(f"\nBLEU score promedio: {bleu_avg:.4f}")
print(f"METEOR: {meteor_result['meteor']:.4f}")
print(f"ROUGE-L: {rouge_result['rougeL']:.4f}")




BLEU score promedio: 0.1075
METEOR: 0.2438
ROUGE-L: 0.3532
