In [1]:
import os
from recruitair.modeling.custom_qwen import customize_qwen_model, freeze_custom_qwen_backbone
import torch
from torch import nn
import contextlib
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
import pathlib
from codecarbon import EmissionsTracker
from tqdm import tqdm
from recruitair.modeling.tokenize import ResumeAndCriteriaTokenizer
import mlflow
from tempfile import TemporaryDirectory

# os.environ["STORE_INPUT_TENSORS"] = "true" # Uncomment this line to store input tensors that cause OOM errors
# os.environ["STORE_MEMORY_SNAPSHOTS"] = "true" # Uncomment this line to store CUDA memory snapshots on OOM errors
# Use tool https://docs.pytorch.org/memory_viz to analyze the memory snapshots
os.environ["MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING"] = "true"
os.environ["codecarbon_log_level"] = "WARNING"  # Disable most of the loggings
mlflow.set_tracking_uri("http://nattech.fib.upc.edu:40380/")
mlflow.set_experiment("criteria-evaluation/custom-qwen-finetune")
mlflow.pytorch.autolog()

device = "cuda" if torch.cuda.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
NUM_EPOCHS = 1
ORIGINAL_MODEL_NAME = "Qwen/Qwen3-0.6B"
BATCH_SIZE = 8
ADAM_LEARNING_RATE = 1e-3
ADAM_WEIGHT_DECAY = 1e-4

In [3]:
# Load pretrained model
original_model = AutoModelForCausalLM.from_pretrained(ORIGINAL_MODEL_NAME, torch_dtype="auto", device_map="cpu")
tokenizer = AutoTokenizer.from_pretrained(ORIGINAL_MODEL_NAME)
model = customize_qwen_model(original_model)
freeze_custom_qwen_backbone(model)

`torch_dtype` is deprecated! Use `dtype` instead!


In [4]:
# Load the training and testing datasets from "data/processed/train.jsonl" and "data/processed/validation.jsonl"
import pandas as pd

train_df = pd.read_json("../data/processed/train.jsonl", lines=True, encoding="utf-8")
val_df = pd.read_json("../data/processed/validation.jsonl", lines=True, encoding="utf-8")

# Convert the DataFrames to PyTorch Datasets
from torch.utils.data import Dataset, DataLoader


class CriteriaDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        return row["resume"], row["criteria"], row["score"] / 10.0  # Normalize score to [0, 1]


train_dataset = CriteriaDataset(train_df)
val_dataset = CriteriaDataset(val_df)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [5]:
# optimizer only for head
model.to(device)
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=ADAM_LEARNING_RATE, weight_decay=ADAM_WEIGHT_DECAY
)
criterion = nn.MSELoss()

custom_tokenizer = ResumeAndCriteriaTokenizer(tokenizer)

In [None]:
if os.getenv("STORE_INPUT_TENSORS", "false").lower() == "true":
    pathlib.Path("../data/raw/input-data").mkdir(parents=True, exist_ok=True)
if os.getenv("STORE_INPUT_TENSORS", "false").lower() == "true":
    pathlib.Path("../data/raw/input-data").mkdir(parents=True, exist_ok=True)

with mlflow.start_run(run_name="custom-qwen-finetune") as run:
    mlflow.log_params(
        {
            "original_model": ORIGINAL_MODEL_NAME,
            "num_epochs": NUM_EPOCHS,
            "optimizer": "Adam",
            "optimizer/Adam/learning_rate": ADAM_LEARNING_RATE,
            "optimizer/Adam/weight_decay": ADAM_WEIGHT_DECAY,
            "criterion": "MSELoss",
            "batch_size": BATCH_SIZE,
            "original-model": ORIGINAL_MODEL_NAME,
        }
    )

    train_start = time.monotonic()

    tracker = EmissionsTracker(measure_power_secs=1, tracking_mode="process", save_to_file=False)
    torch.cuda.memory._record_memory_history()
    tracker.start()
    for epoch in range(NUM_EPOCHS):
        epoch_start = time.monotonic()
        model.train()
        model.backbone.eval()
        model.head.train()
        running_loss = 0.0
        bar = tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{NUM_EPOCHS}", leave=False, unit="batch")
        for resume_batch, criteria_batch, score_batch in train_loader:
            encoded_inputs = custom_tokenizer(
                resume_batch, criteria_batch, padding=True, return_tensors="pt", padding_side="left"
            ).to(device)
            score_batch = score_batch.type(torch.float32).unsqueeze(1).to(device)

            optimizer.zero_grad()

            if os.getenv("STORE_INPUT_TENSORS", "false").lower() == "true":
                # Save the input values that caused the error for debugging
                torch.save(
                    {
                        "encoded_inputs": encoded_inputs,
                        "score_batch": score_batch,
                        "resume_batch": resume_batch,
                        "criteria_batch": criteria_batch,
                        "epoch": epoch,
                        "batch_index": bar.n,
                    },
                    f"../data/raw/input-data/epoch{epoch+1}-batch{bar.n + 1}-inputs.pth",
                )

            try:
                preds = model(**encoded_inputs)
            except Exception as e:
                if os.getenv("STORE_INPUT_TENSORS", "false").lower() == "true":
                    with contextlib.suppress(Exception):
                        torch.cuda.memory._dump_snapshot(
                            f"../data/raw/cuda-mem-snapshots/epoch{epoch+1}-batch{bar.n + 1}-error.pickle"
                        )
                raise e from e

            loss = criterion(preds, score_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * len(resume_batch)
            # Update progress bar's description with current loss
            bar.set_postfix(loss=loss.item())
            bar.update(1)
            if os.getenv("STORE_MEMORY_SNAPSHOTS", "false").lower() == "true":
                try:
                    torch.cuda.memory._dump_snapshot(
                        f"../data/raw/cuda-mem-snapshots/epoch{epoch+1}-batch{bar.n}.pickle"
                    )
                except Exception as e:
                    print(f"Could not dump CUDA memory snapshot: {e}")
        bar.close()
        epoch_loss = running_loss / len(train_loader.dataset)
        mlflow.log_metric("epoch_duration_seconds", time.monotonic() - epoch_start, step=epoch)
        mlflow.log_metric("train_loss", epoch_loss, step=epoch)
        print(f"Epoch {epoch+1}/{NUM_EPOCHS} loss: {epoch_loss:.4f}")
        # Validation
        validation_start = time.monotonic()
        model.eval()
        val_loss = 0.0
        bar = tqdm(total=len(val_loader), desc=f"Validation {epoch+1}/{NUM_EPOCHS}", leave=False, unit="batch")
        with torch.no_grad():
            for resume_batch, criteria_batch, score_batch in val_loader:
                encoded_inputs = custom_tokenizer(
                    resume_batch, criteria_batch, padding=True, return_tensors="pt", padding_side="left"
                ).to(device)
                score_batch = score_batch.type(torch.float32).unsqueeze(1).to(device)

                preds = model(**encoded_inputs)
                loss = criterion(preds, score_batch)
                val_loss += loss.item() * len(resume_batch)
                bar.set_postfix(loss=loss.item())
                bar.update(1)
        bar.close()
        val_epoch_loss = val_loss / len(val_loader.dataset)
        print(f"Validation loss: {val_epoch_loss:.4f}")
        mlflow.log_metric("validation_loss", val_epoch_loss, step=epoch)
        mlflow.log_metric("validation_duration_seconds", time.monotonic() - validation_start, step=epoch)

    mlflow.log_metric("total_training_duration_seconds", time.monotonic() - train_start)
    tracker.stop()
    all_metrics = tracker.final_emissions_data.values
    num_metrics = {f"emissions-tracker/{k}": v for k, v in all_metrics.items() if isinstance(v, (int, float))}
    mlflow.log_metrics(num_metrics, run_id=run.info.run_id)
    # Log the model and the tokenizer
    with TemporaryDirectory() as tmpdir:
        tokenizer_dir = os.path.join(tmpdir, "tokenizer")
        custom_tokenizer.save_pretrained(tokenizer_dir)
        mlflow.pytorch.log_model(
            model,
            artifact_path="model",
            registered_model_name="custom-qwen-finetuned",
            step=epoch,
            extra_files=[tokenizer_dir],
            run_id=run.info.run_id,
        )

2025/10/13 01:09:50 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
 Windows OS detected: Please install Intel Power Gadget to measure CPU

                                                                         

Epoch 1/1 loss: 0.0005


                                                                             

Validation loss: 0.0036


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 747.04it/s] 
Registered model 'custom-qwen-finetuned' already exists. Creating a new version of this model...
2025/10/13 01:10:19 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: custom-qwen-finetuned, version 15
Created version '15' of model 'custom-qwen-finetuned'.
2025/10/13 01:10:19 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/10/13 01:10:19 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


🏃 View run custom-qwen-finetune at: http://nattech.fib.upc.edu:40380/#/experiments/2/runs/ae57ddc8ff964449ba2466934a1162de
🧪 View experiment at: http://nattech.fib.upc.edu:40380/#/experiments/2
