In [1]:
import os
from recruitair.modeling.custom_qwen import customize_qwen_model, freeze_custom_qwen_backbone
import torch
from torch import nn
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
from tqdm import tqdm
from recruitair.modeling.tokenize import ResumeAndCriteriaTokenizer
import mlflow

os.environ["MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING"] = "true"
mlflow.set_tracking_uri("http://nattech.fib.upc.edu:40380/")
mlflow.set_experiment("criteria-evaluation/custom-qwen-finetune")
mlflow.pytorch.autolog()

device = "cuda" if torch.cuda.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load pretrained model
model_name = "Qwen/Qwen3-0.6B"
original_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="cpu")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = customize_qwen_model(original_model)
freeze_custom_qwen_backbone(model)

`torch_dtype` is deprecated! Use `dtype` instead!


In [3]:
# Load the training and testing datasets from "data/processed/train.jsonl" and "data/processed/validation.jsonl"
import pandas as pd

train_df = pd.read_json("../data/processed/train.jsonl", lines=True, encoding="utf-8")
val_df = pd.read_json("../data/processed/validation.jsonl", lines=True, encoding="utf-8")

# Convert the DataFrames to PyTorch Datasets
from torch.utils.data import Dataset, DataLoader


class CriteriaDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        return row["resume"], row["criteria"], row["score"] / 5.0  # Normalize score to [0, 1]


train_dataset = CriteriaDataset(train_df)
val_dataset = CriteriaDataset(val_df)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

In [4]:
# optimizer only for head
model.to(device)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, weight_decay=1e-4)
criterion = nn.MSELoss()

custom_tokenizer = ResumeAndCriteriaTokenizer(tokenizer)

In [None]:
num_epochs = 5
with mlflow.start_run(run_name="custom-qwen-finetune"):
    mlflow.log_params(
        {
            "num_epochs": num_epochs,
            "optimizer": "Adam",
            "optimizer/Adam/learning_rate": 1e-3,
            "optimizer/Adam/weight_decay": 1e-4,
            "criterion": "MSELoss",
            "batch_size": 8,
            "original-model": model_name,
        }
    )

    train_start = time.monotonic()

    for epoch in range(num_epochs):
        epoch_start = time.monotonic()
        model.train()
        model.backbone.eval()
        model.head.train()
        running_loss = 0.0
        bar = tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}", leave=False, unit="batch")
        for resume_batch, criteria_batch, score_batch in train_loader:
            padded_input_tokens, attention_mask = custom_tokenizer(resume_batch, criteria_batch)
            padded_input_tokens = padded_input_tokens.to(device)
            attention_mask = attention_mask.to(device)
            score_batch = score_batch.type(torch.float32).unsqueeze(1).to(device)

            optimizer.zero_grad()
            preds = model(
                input_ids=padded_input_tokens, attention_mask=attention_mask
            )  # forward: frozen backbone -> trainable head
            loss = criterion(preds, score_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * len(resume_batch)
            # Update progress bar's description with current loss
            bar.set_postfix(loss=loss.item())
            bar.update(1)
        bar.close()
        epoch_loss = running_loss / len(train_loader.dataset)
        mlflow.log_metric("epoch_duration_seconds", time.monotonic() - epoch_start, step=epoch)
        mlflow.log_metric("train_loss", epoch_loss, step=epoch)
        print(f"Epoch {epoch+1}/{num_epochs} loss: {epoch_loss:.4f}")
        # Validation
        with mlflow.start_run(run_name="custom-qwen-finetune-validation", nested=True):
            validation_start = time.monotonic()
            model.eval()
            val_loss = 0.0
            bar = tqdm(total=len(val_loader), desc=f"Validation {epoch+1}/{num_epochs}", leave=False, unit="batch")
            with torch.no_grad():
                for resume_batch, criteria_batch, score_batch in val_loader:
                    padded_input_tokens, attention_mask = custom_tokenizer(resume_batch, criteria_batch)
                    padded_input_tokens = padded_input_tokens.to(device)
                    attention_mask = attention_mask.to(device)
                    score_batch = score_batch.type(torch.float32).unsqueeze(1).to(device)

                    preds = model(input_ids=padded_input_tokens, attention_mask=attention_mask)
                    loss = criterion(preds, score_batch)
                    val_loss += loss.item() * len(resume_batch)
                    bar.set_postfix(loss=loss.item())
                    bar.update(1)
            bar.close()
            val_epoch_loss = val_loss / len(val_loader.dataset)
            print(f"Validation loss: {val_epoch_loss:.4f}")
            mlflow.log_metric("validation_loss", val_epoch_loss, step=epoch)
            mlflow.log_metric("validation_duration_seconds", time.monotonic() - validation_start, step=epoch)
        mlflow.pytorch.log_model(
            model, artifact_path="model", registered_model_name="custom-qwen-finetuned", step=epoch
        )
    mlflow.log_metric("total_training_duration_seconds", time.monotonic() - train_start)

2025/10/09 00:55:57 INFO mlflow.system_metrics.system_metrics_monitor: Skip logging GPU metrics. Set logger level to DEBUG for more details.
2025/10/09 00:55:57 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
Epoch 1/5:   2%|▏         | 7/381 [00:34<32:55,  5.28s/batch, loss=0.38] 