In [None]:
#!/usr/bin/env python3

from google.colab import drive
drive.mount('/content/drive')

import csv
import torch
from torch.utils.data import Dataset 
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq
)
import numpy as np
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM


class EnigmaDataset(Dataset):
    def __init__(self, texts, scrambles, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.input_texts = scrambles
        self.target_texts = texts
        self.max_length = max_length

    def __len__(self):
        return len(self.input_texts)

    def __getitem__(self, idx):
        input_encoding = self.tokenizer(
            self.input_texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt"
        )

        target_encoding = self.tokenizer(
            self.target_texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt"
        )

        return {
            "input_ids": input_encoding["input_ids"].squeeze(),
            "attention_mask": input_encoding["attention_mask"].squeeze(),
            "labels": target_encoding["input_ids"].squeeze(),
        }

def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    pred_len = predictions.shape[1]

    if labels.shape[1] > pred_len:
        labels = labels[:, :pred_len]

    labels[labels == -100] = 0
    correct = (predictions == labels).sum()
    total = (labels != 0).sum()

    return {"accuracy": correct.item() / total.item() if total.item() > 0 else 0.0}

def main():
    torch.cuda.empty_cache()
    text = []
    scrambles = []
    firstline = 1
    line_count = 0

    with open('/content/drive/My Drive/Colab Notebooks/enigma_processed.csv', newline='', encoding='utf-8') as csvfile:
        lines = csv.reader(csvfile, delimiter=',')
        for line in lines:
            if firstline:
                firstline = 0
                continue
            text.append(line[0].lower())
            scrambles.append(line[1].lower())
            line_count += 1

    training_cap = int(0.1 * line_count)
    split_point = int(0.8 * training_cap)

    training_text = text[:split_point]
    training_scrambles = scrambles[:split_point]
    validation_text = text[split_point:training_cap]
    validation_scrambles = scrambles[split_point:training_cap]

    MODEL_NAME = "google/byt5-small"
    OUTPUT_DIR = "byt5_enigma_finetuned"

    tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
    model = AutoModelForSeq2SeqLM.from_pretrained("google/byt5-small")


    train_dataset = EnigmaDataset(training_text, training_scrambles, tokenizer)
    eval_dataset = EnigmaDataset(validation_text, validation_scrambles, tokenizer)

    training_args = Seq2SeqTrainingArguments(
        output_dir=OUTPUT_DIR,
        num_train_epochs=10,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=100,
        save_strategy="epoch",
        #fp16=torch.cuda.is_available(),
        fp16=True,
        predict_with_generate=True,
        report_to='none',
        gradient_accumulation_steps=16
    )

    data_collator = DataCollatorForSeq2Seq(
        tokenizer, model=model, padding="longest",
        label_pad_token_id=tokenizer.pad_token_id
    )

    trainer = Seq2SeqTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    print("Starting Fine-tuning...")
    trainer.train()

    trainer.save_model(OUTPUT_DIR)
    tokenizer.save_pretrained(OUTPUT_DIR)
    print(f"Fine-tuning complete. Model saved to {OUTPUT_DIR}")

    results = trainer.evaluate()
    print(f"Final Evaluation Results: {results}")

if __name__ == '__main__':
    main()

Mounted at /content/drive


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/698 [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.20G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

  trainer = Seq2SeqTrainer(


Starting Fine-tuning...


  batch["labels"] = torch.tensor(batch["labels"], dtype=torch.int64)


Step,Training Loss
100,20370014863.36
200,2.2255909749162768e+17
300,5.978330797310691e+20


Fine-tuning complete. Model saved to byt5_enigma_finetuned


Final Evaluation Results: {'eval_loss': nan, 'eval_accuracy': 0.04450782127733126, 'eval_runtime': 57.9696, 'eval_samples_per_second': 9.505, 'eval_steps_per_second': 2.381, 'epoch': 10.0}


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import time
from peft import PeftModel

MODEL_DIR = "byt5_enigma_finetuned"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_LENGTH = 150 

def load_and_test_model(model_dir, device):
    """Loads the fine-tuned model and provides an inference function."""

    print(f"Loading tokenizer from: {model_dir}...")
    tokenizer = AutoTokenizer.from_pretrained(model_dir)

    try:
        BASE_MODEL_NAME = "google/byt5-small"
        print(f"Loading base model ({BASE_MODEL_NAME}) for PEFT inference...")
        base_model = AutoModelForSeq2SeqLM.from_pretrained(
            BASE_MODEL_NAME,
            torch_dtype=torch.float16 if device == "cuda" else torch.float32 
        ).to(device)

        model = PeftModel.from_pretrained(base_model, model_dir).eval()
        print("Successfully loaded PEFT (LoRA) model.")

    except Exception as e:
        print(f"PEFT loading failed or was skipped. Loading full model from: {model_dir}...")
        model = AutoModelForSeq2SeqLM.from_pretrained(
            model_dir,
            torch_dtype=torch.float16 if device == "cuda" else torch.float32
        ).to(device).eval()
        print("Successfully loaded full fine-tuned model.")

    return model, tokenizer

try:
    fine_tuned_model, fine_tuned_tokenizer = load_and_test_model(MODEL_DIR, DEVICE)
except Exception as e:
    print(f"\nFATAL ERROR: Could not load model from {MODEL_DIR}. Ensure the training cell completed successfully and saved the files.")
    print(f"Error detail: {e}")
    exit() 


text = []
scrambles = []
firstline = 1
line_count = 0

with open('/content/drive/My Drive/Colab Notebooks/enigma_processed.csv', newline='', encoding='utf-8') as csvfile:
    lines = csv.reader(csvfile, delimiter=',')
    for line in lines:
        if firstline:
            firstline = 0
            continue
        text.append(line[0].lower())
        scrambles.append(line[1].lower())
        line_count += 1

training_cap = int(0.5 * line_count)
split_point = int(0.8 * training_cap)

training_text = text[:split_point]
training_scrambles = scrambles[:split_point]
validation_text = text[split_point:training_cap]
validation_scrambles = scrambles[split_point:training_cap]

train_dataset = EnigmaDataset(training_text, training_scrambles, fine_tuned_tokenizer)
eval_dataset = EnigmaDataset(validation_text, validation_scrambles, fine_tuned_tokenizer)

training_args = Seq2SeqTrainingArguments(
    output_dir=None,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    save_strategy="epoch",
    #fp16=torch.cuda.is_available(),
    fp16=True,
    predict_with_generate=True,
    report_to='none',
    gradient_accumulation_steps=16
)

data_collator = DataCollatorForSeq2Seq(
    fine_tuned_tokenizer, model=fine_tuned_model, padding="longest",
    label_pad_token_id=fine_tuned_tokenizer.pad_token_id
)

trainer = Seq2SeqTrainer(
    model=fine_tuned_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=fine_tuned_tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

results = trainer.evaluate()
print(f"Final Evaluation Results: {results}")

Loading tokenizer from: byt5_enigma_finetuned...
Loading base model (google/byt5-small) for PEFT inference...
PEFT loading failed or was skipped. Loading full model from: byt5_enigma_finetuned...
Successfully loaded full fine-tuned model.


  trainer = Seq2SeqTrainer(


Final Evaluation Results: {'eval_loss': 32.05694580078125, 'eval_model_preparation_time': 0.012, 'eval_accuracy': 0.025963018698216662, 'eval_runtime': 358.2855, 'eval_samples_per_second': 7.684, 'eval_steps_per_second': 1.923}
