In [None]:
!pip -q install -U transformers datasets evaluate sacrebleu accelerate sentencepiece

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/10.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/10.4 MB[0m [31m107.6 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m10.4/10.4 MB[0m [31m184.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m108.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/515.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m515.2/515.2 kB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━

In [None]:
!pip -q install sacremoses

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/897.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m890.9/897.5 kB[0m [31m32.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import os, time, json
import numpy as np
from datasets import load_dataset, DatasetDict
import evaluate
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
)

In [None]:
DATA_ROOT = "/content/drive/MyDrive/dataset_splits_opus100_10k"
OUT_ROOT  = "/content/drive/MyDrive/results_marianmt"
os.makedirs(OUT_ROOT, exist_ok=True)

In [None]:
bleu = evaluate.load("sacrebleu")
chrf = evaluate.load("chrf")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
PAIR_TO_MODEL = {
    ("en","id"): "Helsinki-NLP/opus-mt-en-id",
    ("id","en"): "Helsinki-NLP/opus-mt-id-en",
    ("en","vi"): "Helsinki-NLP/opus-mt-en-vi",
    ("vi","en"): "Helsinki-NLP/opus-mt-vi-en",
    ("en","ko"): "Helsinki-NLP/opus-mt-tc-big-en-ko",
    ("ko","en"): "Helsinki-NLP/opus-mt-ko-en",
}


In [None]:
PAIR_TO_FOLDER = {
    ("en","id"): "en_id",
    ("id","en"): "en_id",
    ("en","vi"): "en_vi",
    ("vi","en"): "en_vi",
    ("en","ko"): "en_ko",
    ("ko","en"): "en_ko",
}

In [None]:
def load_frozen_split(folder_name: str) -> DatasetDict:
    train_path = os.path.join(DATA_ROOT, folder_name, "train.csv")
    val_path   = os.path.join(DATA_ROOT, folder_name, "val.csv")
    test_path  = os.path.join(DATA_ROOT, folder_name, "test.csv")

    ds_train = load_dataset("csv", data_files=train_path, split="train")
    ds_val   = load_dataset("csv", data_files=val_path, split="train")
    ds_test  = load_dataset("csv", data_files=test_path, split="train")

    return DatasetDict(train=ds_train, validation=ds_val, test=ds_test)

In [None]:
def maybe_swap_columns(ds: DatasetDict, reverse: bool) -> DatasetDict:
    if not reverse:
        return ds
    def _swap(ex):
        return {"source": ex["target"], "target": ex["source"]}
    return DatasetDict(
        train=ds["train"].map(_swap),
        validation=ds["validation"].map(_swap),
        test=ds["test"].map(_swap)
    )

In [None]:
def tokenize_dataset(ds: DatasetDict, tokenizer, max_len=128) -> DatasetDict:
    def _tok(batch):
        model_inputs = tokenizer(
            batch["source"], max_length=max_len, truncation=True
        )
        labels = tokenizer(
              text_target=batch["target"],
              max_length=max_len,
              truncation=True,
          )

        model_inputs["labels"] = labels["input_ids"]
        return model_inputs

    return DatasetDict(
        train=ds["train"].map(_tok, batched=True, remove_columns=ds["train"].column_names),
        validation=ds["validation"].map(_tok, batched=True, remove_columns=ds["validation"].column_names),
        test=ds["test"].map(_tok, batched=True, remove_columns=ds["test"].column_names),
    )

In [None]:
def generate_predictions(model, tokenizer, texts, max_len=128, batch_size=16):
    model.eval()
    preds = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        enc = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=max_len)
        enc = {k: v.to(model.device) for k, v in enc.items()}
        with torch.no_grad():
            out = model.generate(**enc, max_length=max_len)
        preds.extend(tokenizer.batch_decode(out, skip_special_tokens=True))
    return preds

In [None]:
def eval_bleu_chrf(preds, refs):
    bleu_score = bleu.compute(predictions=preds, references=[[r] for r in refs])["score"]
    chrf_score = chrf.compute(predictions=preds, references=[[r] for r in refs])["score"]
    return float(bleu_score), float(chrf_score)

import torch

In [None]:
def run_marianmt_direction(src_lang: str, tgt_lang: str,
                          epochs=2, batch_size=16, lr=5e-5,
                          max_len=128, sample_n=10):
    assert (src_lang, tgt_lang) in PAIR_TO_MODEL

    model_name = PAIR_TO_MODEL[(src_lang, tgt_lang)]
    folder_name = PAIR_TO_FOLDER[(src_lang, tgt_lang)]
    reverse = (src_lang != "en")  # because saved datasets are EN->X canonical

    run_id = f"{src_lang}_to_{tgt_lang}_marianmt"
    out_dir = os.path.join(OUT_ROOT, run_id)
    os.makedirs(out_dir, exist_ok=True)

    # ---- Load frozen data ----
    ds = load_frozen_split(folder_name)
    ds = maybe_swap_columns(ds, reverse=reverse)

    # ---- Load model/tokenizer ----
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    model = model.float()
    model.config.use_cache = False

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    # ---- Baseline inference on test set ----
    test_src = ds["test"]["source"]
    test_ref = ds["test"]["target"]

    t0 = time.time()
    baseline_preds = []
    for i in range(0, len(test_src), batch_size):
        batch = test_src[i:i+batch_size]
        enc = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=max_len)
        enc = {k: v.to(device) for k, v in enc.items()}
        with torch.no_grad():
            out = model.generate(**enc, max_length=max_len)
        baseline_preds.extend(tokenizer.batch_decode(out, skip_special_tokens=True))
    baseline_time = time.time() - t0

    baseline_bleu, baseline_chrf = eval_bleu_chrf(baseline_preds, test_ref)

    # ---- Tokenize for fine-tuning ----
    tok_ds = tokenize_dataset(ds, tokenizer, max_len=max_len)
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

    # ---- Trainer ---
    args = Seq2SeqTrainingArguments(
    output_dir=os.path.join(out_dir, "checkpoints"),
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    save_total_limit=1,

    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    fp16=False,
    bf16=False,
    report_to="none",
    seed=42,

    label_smoothing_factor=0.1,
    )

    trainer = Seq2SeqTrainer(
        model=model,
        args=args,
        train_dataset=tok_ds["train"],
        eval_dataset=tok_ds["validation"],
        data_collator=data_collator,
    )

    # ---- Fine-tune ----
    train_t0 = time.time()
    train_output = trainer.train()
    train_time = time.time() - train_t0

    model = trainer.model
    model.eval()

    # ---- After-FT inference on test set ----
    t1 = time.time()
    finetuned_preds = []
    for i in range(0, len(test_src), batch_size):
        batch = test_src[i:i+batch_size]
        enc = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=max_len)
        enc = {k: v.to(device) for k, v in enc.items()}
        with torch.no_grad():
            out = model.generate(**enc, max_length=max_len)
        finetuned_preds.extend(tokenizer.batch_decode(out, skip_special_tokens=True))
    finetuned_time = time.time() - t1

    finetuned_bleu, finetuned_chrf = eval_bleu_chrf(finetuned_preds, test_ref)

    # ---- Save metrics ----
    gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "cpu"
    n_params = sum(p.numel() for p in model.parameters())

    metrics = {
        "direction": f"{src_lang}->{tgt_lang}",
        "model": model_name,
        "dataset_folder": folder_name,
        "reverse_columns_used": bool(reverse),
        "max_len": max_len,
        "epochs": epochs,
        "batch_size": batch_size,
        "learning_rate": lr,
        "gpu": gpu_name,
        "n_params": int(n_params),
        "baseline": {
            "bleu": baseline_bleu,
            "chrf": baseline_chrf,
            "inference_time_sec": float(baseline_time),
        },
        "finetuned": {
            "bleu": finetuned_bleu,
            "chrf": finetuned_chrf,
            "inference_time_sec": float(finetuned_time),
        },
        "train_time_sec": float(train_time),
        "trainer_log_history": trainer.state.log_history,
    }

    with open(os.path.join(out_dir, "metrics.json"), "w", encoding="utf-8") as f:
        json.dump(metrics, f, indent=2)

    # ---- Save sample outputs for error analysis ----
    idx = np.linspace(0, len(test_src)-1, num=min(sample_n, len(test_src)), dtype=int).tolist()
    samples = []
    for k in idx:
        samples.append({
            "source": test_src[k],
            "reference": test_ref[k],
            "before_ft": baseline_preds[k],
            "after_ft": finetuned_preds[k],
        })

    import pandas as pd
    pd.DataFrame(samples).to_csv(os.path.join(out_dir, "samples_before_after.csv"), index=False, encoding="utf-8")

    # Save final model (optional, but useful)
    model.save_pretrained(os.path.join(out_dir, "final_model"))
    tokenizer.save_pretrained(os.path.join(out_dir, "final_model"))

    print(f"Done {src_lang}->{tgt_lang}")
    print(f"Baseline:  BLEU={baseline_bleu:.2f}, chrF={baseline_chrf:.2f}")
    print(f"Fine-tuned: BLEU={finetuned_bleu:.2f}, chrF={finetuned_chrf:.2f}")
    print(f"Saved to: {out_dir}")

    return metrics

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "Helsinki-NLP/opus-mt-tc-big-en-ko"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cuda").eval()

texts = [
    "Are you seeing anyone?",
    "We need to find cover now.",
    "I can't believe you did that.",
    "This is not what I expected.",
    "Where are you going tonight?"
]

enc = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=96).to("cuda")
out = model.generate(**enc, max_length=96, num_beams=4)
print(tokenizer.batch_decode(out, skip_special_tokens=True))

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/341 [00:00<?, ?B/s]



source.spm:   0%|          | 0.00/790k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/815k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/418M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/254 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

['메종 제너럴 for', '코펜하겐 심장 Rockwell KC.', 'US Greece Hotel From 일반적인 연속 값.', 'PI 인기는 결국 US.', '1:26 신뢰할 수 있는 generalfolk 에 대 한']


In [None]:
directions = [("en","id"), ("id","en"),
              ("en","vi"), ("vi","en"),
              ("en","ko"), ("ko","en")]

all_metrics = []
for src, tgt in directions:
    all_metrics.append(run_marianmt_direction(src, tgt, epochs=1, batch_size=8, lr=1e-5, max_len=96))

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/796k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/801k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/291M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/258 [00:00<?, ?it/s]



model.safetensors:   0%|          | 0.00/291M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,2.519278,2.506184


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight', 'lm_head.weight'].


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Done en->id
Baseline:  BLEU=36.83, chrF=59.49
Fine-tuned: BLEU=37.08, chrF=60.25
Saved to: /content/drive/MyDrive/results_marianmt/en_to_id_marianmt


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/801k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/796k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/291M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/258 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/291M [00:00<?, ?B/s]



generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,2.455448,2.393497


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight', 'lm_head.weight'].


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Done id->en
Baseline:  BLEU=40.12, chrF=56.62
Fine-tuned: BLEU=39.79, chrF=56.29
Saved to: /content/drive/MyDrive/results_marianmt/id_to_en_marianmt


Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/809k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/756k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/289M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/258 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/289M [00:00<?, ?B/s]



generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,2.975439,2.835734


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight', 'lm_head.weight'].


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Done en->vi
Baseline:  BLEU=23.29, chrF=39.46
Fine-tuned: BLEU=27.38, chrF=43.96
Saved to: /content/drive/MyDrive/results_marianmt/en_to_vi_marianmt


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/756k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/809k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/289M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/258 [00:00<?, ?it/s]



model.safetensors:   0%|          | 0.00/289M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,2.778486,2.657877


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight', 'lm_head.weight'].


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Done vi->en
Baseline:  BLEU=31.68, chrF=48.56
Fine-tuned: BLEU=31.60, chrF=48.51
Saved to: /content/drive/MyDrive/results_marianmt/vi_to_en_marianmt


Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Loading weights:   0%|          | 0/254 [00:00<?, ?it/s]

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,5.369438,5.168222


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.encoder.embed_positions.weight', 'model.decoder.embed_tokens.weight', 'model.decoder.embed_positions.weight', 'lm_head.weight'].


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Done en->ko
Baseline:  BLEU=0.04, chrF=1.08
Fine-tuned: BLEU=0.26, chrF=2.57
Saved to: /content/drive/MyDrive/results_marianmt/en_to_ko_marianmt


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/842k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/813k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/258 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]



generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,3.195761,3.048954


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight', 'lm_head.weight'].


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Done ko->en
Baseline:  BLEU=20.81, chrF=39.07
Fine-tuned: BLEU=21.48, chrF=39.35
Saved to: /content/drive/MyDrive/results_marianmt/ko_to_en_marianmt
