# SEG-PEFT

In [None]:
%%capture
!git clone https://github.com/rossoc/SEG-PEFT
%cd SEG-PEFT
!pip install evaluate

In [1]:
import torch
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback
from src.segpeft import kvasir_dataset, compute_metrics_fn, segformer, set_seed, Metrics
import time
import yaml
import pandas as pd
import os
import zipfile
from peft import get_peft_model, LoraConfig

set_seed(42)

  from .autonotebook import tqdm as notebook_tqdm


## Dataset
You can check out the dataset at the following link
[Kvasir-SEG](https://datasets.simula.no/kvasir-seg/).

In [4]:
dataset_dir = "data"
os.makedirs(dataset_dir, exist_ok=True)
!wget --no-check-certificate https://datasets.simula.no/downloads/kvasir-seg.zip -O kvasir-seg.zip

with zipfile.ZipFile("kvasir-seg.zip", "r") as zip_ref:
    zip_ref.extractall(dataset_dir)

--2025-11-05 09:01:02--  https://datasets.simula.no/downloads/kvasir-seg.zip
Resolving datasets.simula.no (datasets.simula.no)... 128.39.36.14
Connecting to datasets.simula.no (datasets.simula.no)|128.39.36.14|:443... connected.
  Unable to locally verify the issuer's authority.
HTTP request sent, awaiting response... 200 OK
Length: 46227172 (44M) [application/zip]
Saving to: ‘kvasir-seg.zip’


2025-11-05 09:01:08 (7.24 MB/s) - ‘kvasir-seg.zip’ saved [46227172/46227172]



## Train [SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer) FFT

In [None]:
batch_size = 64
gradient_accumulation_steps = 4
use_bf16 = True
dataloader_num_workers = 8


def train_segformer_fft(epochs, lr, save_dir):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    test_size = 0.2
    model, model_name, _ = segformer()
    train_dataset, test_dataset = kvasir_dataset(model_name, test_size)

    training_args = TrainingArguments(
        output_dir="./outputs/" + save_dir,
        num_train_epochs=epochs,
        per_device_train_batch_size=1,
        per_device_eval_batch_size=1,
        learning_rate=lr,
        logging_steps=3,
        save_total_limit=2,
        prediction_loss_only=False,
        remove_unused_columns=True,
        push_to_hub=False,
        report_to="none",
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        logging_dir=f"./outputs/{save_dir}/logs",
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics_fn(model_name),  # type: ignore
        callbacks=[EarlyStoppingCallback(early_stopping_patience=5)],
    )

    print("Starting training...")
    start_time = time.time()
    trainer.train()
    end_time = time.time() - start_time

    all_metrics = {
        "training_history": trainer.state.log_history,
        "final_evaluation": trainer.evaluate(),
        "training_time": end_time,
    }

    with open(f"./outputs/{save_dir}/all_metrics.json", "w") as f:
        yaml.dump(all_metrics, f, indent=2)

    df = pd.DataFrame(trainer.state.log_history)
    df.to_csv(f"./outputs/{save_dir}/training_history.csv", index=False)
    trainer.save_model(f"./outputs/{save_dir}/final")

    log = trainer.state.log_history.copy()
    final_train_metrics = trainer.evaluate(eval_dataset=train_dataset)
    log.append({"epoch": epochs, "loss": final_train_metrics["eval_loss"]})
    metrics = Metrics(f"./outputs/{save_dir}/")
    metrics.plot_curves(log)
    return trainer

In [6]:
epochs = 5
learning_rate = 5e-5
save_dir = "test_transformer_fft"

In [7]:
fft_trainer = train_segformer_fft(epochs, learning_rate, save_dir)

Using device: cpu


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b0-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([2]) in the model instantiated
- decode_head.classifier.weight: found shape torch.Size([150, 256, 1, 1]) in the checkpoint and torch.Size([2, 256, 1, 1]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training...


Step,Training Loss,Validation Loss,Mean Iou,Mean Dice,Per Class Dice,Mean Accuracy,Overall Accuracy,Per Class Iou
20,0.5905,0.470265,0.497639,0.5263,"[0.9200389719269012, 0.13256068058526355]",0.995277,0.995277,"[0.9952770408627537, 0.0]"
40,0.4957,0.393427,0.478947,0.757264,"[0.9353344769741262, 0.5791935132470973]",0.957894,0.957894,"[0.9578938190646098, 0.0]"
60,0.3939,0.324189,0.482137,0.786589,"[0.9427006459757015, 0.6304780082612608]",0.964273,0.964273,"[0.9642733440459877, 0.0]"
80,0.3936,0.310534,0.48309,0.805431,"[0.9469213705386227, 0.6639414397124116]",0.966181,0.966181,"[0.9661806595588401, 0.0]"
100,0.3936,0.306328,0.468681,0.812506,"[0.9411266527151112, 0.683884644034503]",0.937363,0.937363,"[0.9373627237394877, 0.0]"
120,0.3224,0.29556,0.487348,0.802763,"[0.9485910039150358, 0.6569345447980733]",0.974696,0.974696,"[0.974696277858126, 0.0]"
140,0.3039,0.290858,0.474242,0.816873,"[0.9449101238106131, 0.6888361051154688]",0.948484,0.948484,"[0.9484835290121221, 0.0]"
160,0.2828,0.27494,0.484718,0.815874,"[0.9498557054713086, 0.6818919811220986]",0.969436,0.969436,"[0.9694356052450692, 0.0]"
180,0.3157,0.269942,0.481566,0.817951,"[0.9487430587045138, 0.6871598368368448]",0.963133,0.963133,"[0.9631326099549804, 0.0]"
200,0.2833,0.258419,0.482396,0.820033,"[0.9495868722429958, 0.6904791945137954]",0.964793,0.964793,"[0.9647925786210747, 0.0]"


  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label
  acc = total_area_intersect / total_area_label


  acc = total_area_intersect / total_area_label


In [None]:
fft_trainer.state.log_history

In [None]:
Y = {
    "Evaluation": [
        (entry["eval_loss"], entry["epoch"])
        for entry in fft_trainer.state.log_history
        if "eval_loss" in entry.keys()
    ],
}

## Train
[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer) with
LoRA.
Namely, we use [PEFT](https://github.com/huggingface/peft) to implmenent LoRA.

In [11]:
batch_size = 64
gradient_accumulation_steps = 4
use_bf16 = True
dataloader_num_workers = 8


def train_segformer_lora(epochs, lr, r, lora_alpha, lora_dropout, save_dir):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    test_size = 0.2
    model, model_name, modules = segformer()

    peft_config = LoraConfig(
        r=r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        target_modules=modules,
    )

    model = get_peft_model(model, peft_config)

    model.print_trainable_parameters()

    train_dataset, test_dataset = kvasir_dataset(model_name, test_size)
    N = len(train_dataset)

    TrainingArguments(
        output_dir="./outputs/" + save_dir,
        num_train_epochs=epochs,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size * 2,
        gradient_accumulation_steps=batch_size,
        bf16=use_bf16 and torch.cuda.is_available(),
        bf16_full_eval=use_bf16 and torch.cuda.is_available(),
        dataloader_num_workers=dataloader_num_workers,
        dataloader_pin_memory=True,
        dataloader_prefetch_factor=2,
        save_steps=1,
        eval_steps=1,
        logging_steps=1,
        learning_rate=lr,
        save_total_limit=2,
        prediction_loss_only=False,
        remove_unused_columns=True,
        push_to_hub=False,
        report_to="none",
        eval_strategy="epochs",
        save_strategy="epochs",
        load_best_model_at_end=True,
        logging_dir=f"./outputs/{save_dir}/logs",
        optim="adamw_torch_fused" if torch.cuda.is_available() else "adamw_torch",
        warmup_ratio=0.1,
        lr_scheduler_type="cosine",
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics_fn(model_name),  # type: ignore
        callbacks=[EarlyStoppingCallback(early_stopping_patience=N * 5)],
    )

    print("Starting training...")
    start_time = time.time()
    trainer.train()
    end_time = time.time() - start_time

    all_metrics = {
        "training_history": trainer.state.log_history,
        "final_evaluation": trainer.evaluate(),
        "training_time": end_time,
    }

    with open(f"./outputs/{save_dir}/all_metrics.json", "w") as f:
        yaml.dump(all_metrics, f, indent=2)

    df = pd.DataFrame(trainer.state.log_history)
    df.to_csv(f"./outputs/{save_dir}/training_history.csv", index=False)
    trainer.save_model(f"./outputs/{save_dir}/final")

    metrics = Metrics(f"./outputs/{save_dir}/")
    metrics.plot_curves(trainer.state.log_history)
    return trainer

In [12]:
epochs = 30
learning_rate = 5e-4
rank = 8
lora_alpha = 32
lora_dropout = 0.05
save_dir = "test_transformer_lora"

In [None]:
fft_trainer = train_segformer_lora(
    epochs, learning_rate, rank, lora_alpha, lora_dropout, save_dir
)

Using device: cpu


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b0-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([2]) in the model instantiated
- decode_head.classifier.weight: found shape torch.Size([150, 256, 1, 1]) in the checkpoint and torch.Size([2, 256, 1, 1]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 65,536 || all params: 3,780,194 || trainable%: 1.7337


  image_processor = cls(**image_processor_dict)


Starting training...




Step,Training Loss,Validation Loss,Mean Iou,Mean Dice,Per Class Dice,Mean Accuracy,Overall Accuracy,Per Class Iou
800,0.5339,0.424841,0.469631,0.853864,"[0.9503281164107954, 0.7574001067861984]",0.939262,0.939262,"[0.9392618230303632, 0.0]"


  acc = total_area_intersect / total_area_label
