# AI-Powered Tag Recommendation for Stack Overflow using Transfer Learning
This notebook uses pre-trained transformer models DistilBERT for multi-label classification


# Install required packages

In [1]:
!pip install transformers accelerate torchmetrics

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.wh

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from transformers import AutoTokenizer, AutoModel, AutoConfig, get_linear_schedule_with_warmup
from torchmetrics.classification import MultilabelF1Score, MultilabelRecall, MultilabelPrecision
from torchmetrics.classification import MultilabelAccuracy, MultilabelExactMatch
from tqdm.auto import tqdm
from timeit import default_timer as timer
import pickle
import os


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

# Mount Google Drive

In [4]:
# from google.colab import drive
# drive.mount('/content/drive')

# Navigate to project directory

In [5]:
# %cd /content/drive/MyDrive/DS_PROJECTS/AI_Powered_Tag_Recommendation_for_Stack_Overflow

In [6]:
cwd = os.getcwd()
cwd

'/kaggle/working'

# Load data

In [7]:
train_data_file_path = '/kaggle/input/data-files/train.csv'
val_data_file_path = '/kaggle/input/data-files/val.csv'
test_data_file_path = '/kaggle/input/data-files/test.csv'

train_df = pd.read_csv(train_data_file_path)
val_df = pd.read_csv(val_data_file_path)
test_df = pd.read_csv(test_data_file_path)

print(f"Train shape: {train_df.shape}, Val shape: {val_df.shape}, Test shape: {test_df.shape}")


Train shape: (179903, 101), Val shape: (20015, 101), Test shape: (10738, 101)


# Extract texts and labels

In [8]:
label_cols = train_df.columns[1:]
num_labels = len(label_cols)
print(f"Number of labels: {num_labels}")
print(f"Labels: {list(label_cols)[:10]}...")

Number of labels: 100
Labels: ['.net', 'ajax', 'algorithm', 'amazon-web-services', 'android', 'android-studio', 'angular', 'angularjs', 'arrays', 'asp.net']...


# Initialize tokenizer (using DistilBERT for efficiency)

In [9]:
MODEL_NAME = "distilbert-base-uncased"  # Can also use "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

DistilBertTokenizerFast(name_or_path='distilbert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

# Custom Dataset for Transformer

In [10]:
class TransformerTextDataset(Dataset):
  """
  Dataset class for transformer-based models.
  Args:
      texts (list): List of text strings.
      labels (torch.Tensor): Multi-hot encoded labels.
      tokenizer: Hugging Face tokenizer.
      max_length (int): Maximum sequence length for tokenization.
  """
  def __init__(self, texts, labels, tokenizer, max_length=128):
    super().__init__()
    self.texts = texts
    self.labels = labels
    self.tokenizer = tokenizer
    self.max_length = max_length

  def __len__(self):
    return len(self.texts)

  def __getitem__(self, idx):
    text = str(self.texts[idx])
    label = self.labels[idx]

    # Tokenize text
    encoding = self.tokenizer(
      text,
      add_special_tokens=True,
      max_length=self.max_length,
      padding='max_length',
      truncation=True,
      return_attention_mask=True,
      return_tensors='pt'
    )

    return {
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'labels': label
    }

# Prepare datasets

In [11]:
train_texts = train_df['question_summary'].tolist()
train_labels = torch.tensor(train_df[label_cols].values, dtype=torch.float32)

val_texts = val_df['question_summary'].tolist()
val_labels = torch.tensor(val_df[label_cols].values, dtype=torch.float32)

test_texts = test_df['question_summary'].tolist()
test_labels = torch.tensor(test_df[label_cols].values, dtype=torch.float32)


# Create datasets

In [12]:
MAX_LENGTH = 128
train_dataset = TransformerTextDataset(train_texts, train_labels, tokenizer, MAX_LENGTH)
val_dataset = TransformerTextDataset(val_texts, val_labels, tokenizer, MAX_LENGTH)
test_dataset = TransformerTextDataset(test_texts, test_labels, tokenizer, MAX_LENGTH)

# Create dataloaders

In [13]:
BATCH_SIZE = 256
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
print(f"Train batches: {len(train_dataloader)}, Val batches: {len(val_dataloader)}")

Train batches: 703, Val batches: 79


# Define Transformer-based Model

In [14]:
class AiStackOverflowTagRecommendation(nn.Module):
    """
    Multi-label classifier using pre-trained transformer model.
    Args:
        model_name (str): Name of pre-trained model from Hugging Face.
        num_labels (int): Number of output labels.
        dropout (float): Dropout probability.
    """
    def __init__(self, model_name, num_labels, dropout=0.3):
        super().__init__()
        self.config = AutoConfig.from_pretrained(model_name, output_hidden_states=False)
        self.transformer = AutoModel.from_pretrained(model_name, config=self.config)
        self.dropout = nn.Dropout(dropout)
        
        # Get hidden size from transformer config
        hidden_size = self.transformer.config.hidden_size
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, num_labels)
        )
    
    def forward(self, input_ids, attention_mask):
        # Get transformer outputs
        outputs = self.transformer(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        
        # Use [CLS] token representation
        pooled_output = outputs.last_hidden_state[:, 0, :]
        pooled_output = self.dropout(pooled_output)
        
        # Get logits
        logits = self.classifier(pooled_output)
        return logits

In [15]:
# class AiStackOverflowTagRecommendation(nn.Module):
#   """
#   Multi-label classifier using pre-trained transformer model.

#   Args:
#       model_name (str): Name of pre-trained model from Hugging Face.
#       num_labels (int): Number of output labels.
#       dropout (float): Dropout probability.
#   """
#   def __init__(self, model_name, num_labels, dropout=0.1):
#       super().__init__()
#       self.config = AutoConfig.from_pretrained(model_name, output_hidden_states=False)
#       self.backbone = AutoModel.from_pretrained(model_name, config=self.config)
#       hidden_size = self.backbone.config.hidden_size
#       self.dropout = nn.Dropout(dropout)
#       self.classifier = nn.Linear(hidden_size, num_labels)

#   def forward(self, input_ids, attention_mask):
#       outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
#       pooled = outputs.last_hidden_state[:, 0, :]  # Use [CLS] token
#       x = self.dropout(pooled)
#       logits = self.classifier(x)
#       return logits

# Initialize model

In [16]:
model = AiStackOverflowTagRecommendation(MODEL_NAME, num_labels, dropout=0.3)
model.to(device)
print(f"Model initialized with {sum(p.numel() for p in model.parameters()):,} parameters")
model

2025-11-25 17:39:10.504462: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764092350.666903      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764092350.722792      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Model initialized with 67,030,372 parameters


AiStackOverflowTagRecommendation(
  (transformer): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
  

# Training function

In [17]:
def train_step(model, dataloader, loss_fn, optimizer, scheduler, device, num_labels):
  """Train for one epoch."""
  model.train()
  train_loss = 0
  THRESHOLD = 0.5

  # Initialize metrics
  exact_match_metric = MultilabelExactMatch(num_labels=num_labels, threshold=THRESHOLD).to(device)
  micro_f1_metric = MultilabelF1Score(num_labels=num_labels, threshold=THRESHOLD, average='micro').to(device)
  macro_f1_metric = MultilabelF1Score(num_labels=num_labels, threshold=THRESHOLD, average='macro').to(device)
  micro_recall_metric = MultilabelRecall(num_labels=num_labels, threshold=THRESHOLD, average='micro').to(device)
  micro_precision_metric = MultilabelPrecision(num_labels=num_labels, threshold=THRESHOLD, average='micro').to(device)

  for batch in tqdm(dataloader, desc="Training", leave=False):
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['labels'].to(device)

    # Forward pass
    logits = model(input_ids, attention_mask)
    loss = loss_fn(logits, labels)
    train_loss += loss.item()

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
    optimizer.step()
    scheduler.step()

    # Update metrics
    exact_match_metric.update(logits, labels)
    micro_f1_metric.update(logits, labels)
    macro_f1_metric.update(logits, labels)
    micro_recall_metric.update(logits, labels)
    micro_precision_metric.update(logits, labels)

  # Calculate average loss and metrics
  train_loss_avg = train_loss / len(dataloader)

  results = {
      "train_loss": train_loss_avg,
      "train_exact_match_acc": exact_match_metric.compute().item(),
      "train_micro_f1": micro_f1_metric.compute().item(),
      "train_macro_f1": macro_f1_metric.compute().item(),
      "train_micro_recall": micro_recall_metric.compute().item(),
      "train_micro_precision": micro_precision_metric.compute().item(),
  }

  # Reset metrics
  for metric in [exact_match_metric, micro_f1_metric, macro_f1_metric, micro_recall_metric, micro_precision_metric]:
    metric.reset()

  return results

# Validation function

In [18]:
def test_step(model, dataloader, loss_fn, device, num_labels):
  """Evaluate on validation/test set."""
  model.eval()
  test_loss = 0
  THRESHOLD = 0.5

  # Initialize metrics
  exact_match_metric = MultilabelExactMatch(num_labels=num_labels, threshold=THRESHOLD).to(device)
  micro_f1_metric = MultilabelF1Score(num_labels=num_labels, threshold=THRESHOLD, average='micro').to(device)
  macro_f1_metric = MultilabelF1Score(num_labels=num_labels, threshold=THRESHOLD, average='macro').to(device)
  micro_recall_metric = MultilabelRecall(num_labels=num_labels, threshold=THRESHOLD, average='micro').to(device)
  micro_precision_metric = MultilabelPrecision(num_labels=num_labels, threshold=THRESHOLD, average='micro').to(device)

  with torch.no_grad():
    for batch in tqdm(dataloader, desc="Evaluating", leave=False):
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)

      # Forward pass
      logits = model(input_ids, attention_mask)
      loss = loss_fn(logits, labels)
      test_loss += loss.item()

      # Update metrics
      exact_match_metric.update(logits, labels)
      micro_f1_metric.update(logits, labels)
      macro_f1_metric.update(logits, labels)
      micro_recall_metric.update(logits, labels)
      micro_precision_metric.update(logits, labels)

  # Calculate average loss and metrics
  test_loss_avg = test_loss / len(dataloader)

  results = {
    "test_loss": test_loss_avg,
    "test_exact_match_acc": exact_match_metric.compute().item(),
    "test_micro_f1": micro_f1_metric.compute().item(),
    "test_macro_f1": macro_f1_metric.compute().item(),
    "test_micro_recall": micro_recall_metric.compute().item(),
    "test_micro_precision": micro_precision_metric.compute().item()
  }

  # Reset metrics
  for metric in [exact_match_metric, micro_f1_metric, macro_f1_metric, micro_recall_metric, micro_precision_metric]:
    metric.reset()

  return results


# Setup training

In [19]:
EPOCHS = 10   # Start with fewer epochs for transformers
LEARNING_RATE = 3e-5  # Common learning rate for fine-tuning transformers


# Loss function and optimizer

In [20]:
from torch.optim import AdamW
loss_fn = nn.BCEWithLogitsLoss()
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)

# Learning rate scheduler

In [21]:
total_steps = len(train_dataloader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * total_steps),  # 10% warmup
    num_training_steps=total_steps
)

print(f"Training for {EPOCHS} epochs with learning rate {LEARNING_RATE}")
print(f"Total training steps: {total_steps}")

Training for 10 epochs with learning rate 3e-05
Total training steps: 7030


# Training loop

In [22]:
train_start_time = timer()

for epoch in range(EPOCHS):
  print(f"\nEpoch {epoch + 1}/{EPOCHS}")
  print("-" * 50)

  # Train
  train_metrics = train_step(
    model=model,
    dataloader=train_dataloader,
    loss_fn=loss_fn,
    optimizer=optimizer,
    scheduler=scheduler,
    device=device,
    num_labels=num_labels
  )

  print(f"Train Loss: {train_metrics['train_loss']:.5f} | "
        f"Train Exact Match Acc: {train_metrics['train_exact_match_acc']*100:.2f}% | "
        f"Train Macro F1/Micro F1: {train_metrics['train_macro_f1']:.4f}/{train_metrics['train_micro_f1']:.4f} | "
        f"Train Micro Recall/Precision: {train_metrics['train_micro_recall']}/{train_metrics['train_micro_precision']}")

  # Validate
  val_metrics = test_step(
    model=model,
    dataloader=val_dataloader,
    loss_fn=loss_fn,
    device=device,
    num_labels=num_labels
  )

  print(f"Val Loss: {val_metrics['test_loss']:.5f} | "
        f"Val Exact Match Acc: {val_metrics['test_exact_match_acc']*100:.2f}% | "
        f"Val Macro F1/Micro F1: {val_metrics['test_macro_f1']:.4f}/{val_metrics['test_micro_f1']:.4f} | "
        f"Val Micro Recall/Precision: {val_metrics['test_micro_recall']}/{val_metrics['test_micro_precision']}")

train_end_time = timer()
total_time = train_end_time - train_start_time
print(f"\nTotal training time: {total_time:.2f} seconds ({total_time/60:.2f} minutes)")



Epoch 1/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.23755 | Train Exact Match Acc: 0.04% | Train Macro F1/Micro F1: 0.0191/0.0267 | Train Micro Recall/Precision: 0.0599282868206501/0.017169035971164703


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.07089 | Val Exact Match Acc: 0.00% | Val Macro F1/Micro F1: 0.0000/0.0000 | Val Micro Recall/Precision: 0.0/0.0

Epoch 2/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.05088 | Train Exact Match Acc: 17.93% | Train Macro F1/Micro F1: 0.1006/0.3618 | Train Micro Recall/Precision: 0.23015011847019196/0.8457943797111511


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.03270 | Val Exact Match Acc: 34.47% | Val Macro F1/Micro F1: 0.2179/0.5826 | Val Micro Recall/Precision: 0.43441715836524963/0.8841726183891296

Epoch 3/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.03092 | Train Exact Match Acc: 38.46% | Train Macro F1/Micro F1: 0.3831/0.6319 | Train Micro Recall/Precision: 0.5031946897506714/0.8491642475128174


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.02429 | Val Exact Match Acc: 47.95% | Val Macro F1/Micro F1: 0.5433/0.7217 | Val Micro Recall/Precision: 0.6130123138427734/0.8771929740905762

Epoch 4/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.02539 | Train Exact Match Acc: 45.91% | Train Macro F1/Micro F1: 0.5479/0.7084 | Train Micro Recall/Precision: 0.6074404716491699/0.8495620489120483


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.02095 | Val Exact Match Acc: 52.34% | Val Macro F1/Micro F1: 0.6402/0.7634 | Val Micro Recall/Precision: 0.6825923323631287/0.8658463358879089

Epoch 5/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.02270 | Train Exact Match Acc: 49.19% | Train Macro F1/Micro F1: 0.6137/0.7392 | Train Micro Recall/Precision: 0.6531809568405151/0.8512917757034302


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.01912 | Val Exact Match Acc: 54.77% | Val Macro F1/Micro F1: 0.6756/0.7809 | Val Micro Recall/Precision: 0.7080608010292053/0.8703826069831848

Epoch 6/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.02103 | Train Exact Match Acc: 51.10% | Train Macro F1/Micro F1: 0.6467/0.7563 | Train Micro Recall/Precision: 0.6792635321617126/0.8530901670455933


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.01801 | Val Exact Match Acc: 55.99% | Val Macro F1/Micro F1: 0.6945/0.7892 | Val Micro Recall/Precision: 0.7183935046195984/0.8756017684936523

Epoch 7/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.01982 | Train Exact Match Acc: 52.48% | Train Macro F1/Micro F1: 0.6690/0.7687 | Train Micro Recall/Precision: 0.6982945203781128/0.8548021912574768


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.01711 | Val Exact Match Acc: 57.51% | Val Macro F1/Micro F1: 0.7139/0.8011 | Val Micro Recall/Precision: 0.7417764663696289/0.8707021474838257

Epoch 8/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.01894 | Train Exact Match Acc: 53.66% | Train Macro F1/Micro F1: 0.6841/0.7779 | Train Micro Recall/Precision: 0.7109782695770264/0.8586079478263855


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.01647 | Val Exact Match Acc: 58.51% | Val Macro F1/Micro F1: 0.7213/0.8080 | Val Micro Recall/Precision: 0.7483173608779907/0.8780867457389832

Epoch 9/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.01834 | Train Exact Match Acc: 54.64% | Train Macro F1/Micro F1: 0.6944/0.7845 | Train Micro Recall/Precision: 0.720150351524353/0.8614144921302795


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.01612 | Val Exact Match Acc: 59.08% | Val Macro F1/Micro F1: 0.7338/0.8134 | Val Micro Recall/Precision: 0.7606092095375061/0.8740377426147461

Epoch 10/10
--------------------------------------------------


Training:   0%|          | 0/703 [00:00<?, ?it/s]

Train Loss: 0.01791 | Train Exact Match Acc: 55.13% | Train Macro F1/Micro F1: 0.7007/0.7891 | Train Micro Recall/Precision: 0.7266982793807983/0.8631482124328613


Evaluating:   0%|          | 0/79 [00:00<?, ?it/s]

Val Loss: 0.01598 | Val Exact Match Acc: 59.22% | Val Macro F1/Micro F1: 0.7344/0.8144 | Val Micro Recall/Precision: 0.7630423307418823/0.8732190728187561

Total training time: 11054.37 seconds (184.24 minutes)


# Save model

In [23]:
os.makedirs("model", exist_ok=True)
model_save_path = 'model/transformer_stackoverflow_tag_recommendation.pth'
torch.save({
    'model_state_dict': model.state_dict(),
    'model_name': MODEL_NAME,
    'num_labels': num_labels,
    'max_length': MAX_LENGTH
}, model_save_path)
print(f"Model saved to {model_save_path}")
# Save label columns
labels_save_path = 'model/transformer_labels.pkl'
with open(labels_save_path, 'wb') as f:
    pickle.dump(label_cols.tolist(), f)
print(f"Labels saved to {labels_save_path}")

Model saved to model/transformer_stackoverflow_tag_recommendation.pth
Labels saved to model/transformer_labels.pkl


# Evaluate on test set

In [24]:
print("\n" + "="*50)
print("EVALUATING ON TEST SET")
print("="*50)

test_metrics = test_step(
  model=model,
  dataloader=test_dataloader,
  loss_fn=loss_fn,
  device=device,
  num_labels=num_labels
)

print(f"\nFinal Test Results:")
print(f"Test Loss: {test_metrics['test_loss']:.5f}")
print(f"Test Exact Match Acc: {test_metrics['test_exact_match_acc']*100:.2f}%")
print(f"Test Macro F1: {test_metrics['test_macro_f1']:.4f}")
print(f"Test Micro F1: {test_metrics['test_micro_f1']:.4f}")
print(f"Test Micro Recall: {test_metrics['test_micro_recall']:.4f}")
print(f"Test Micro Precision: {test_metrics['test_micro_precision']:.4f}")



EVALUATING ON TEST SET


Evaluating:   0%|          | 0/42 [00:00<?, ?it/s]


Final Test Results:
Test Loss: 0.01595
Test Exact Match Acc: 59.15%
Test Macro F1: 0.7378
Test Micro F1: 0.8162
Test Micro Recall: 0.7617
Test Micro Precision: 0.8790


# Inference example

In [25]:
def predict_tags(text, model, tokenizer, label_cols, max_length=128, threshold=0.5):
  """Predict tags for a single text."""
  model.eval()

  # Tokenize
  encoding = tokenizer(
    text,
    add_special_tokens=True,
    max_length=max_length,
    padding='max_length',
    truncation=True,
    return_attention_mask=True,
    return_tensors='pt'
  )

  input_ids = encoding['input_ids'].to(device)
  attention_mask = encoding['attention_mask'].to(device)

  # Predict
  with torch.no_grad():
      logits = model(input_ids, attention_mask)
      probs = torch.sigmoid(logits)
      predictions = (probs > threshold).int()

  # Get predicted tags
  predicted_indices = torch.nonzero(predictions[0]).flatten().tolist()
  predicted_tags = [label_cols[idx] for idx in predicted_indices]

  return predicted_tags, probs[0].cpu().numpy()


# Test inference on sample

In [26]:
print("\n" + "="*50)
print("TESTING INFERENCE")
print("="*50)

sample_questions = test_df.sample(n=5, random_state=42)

for idx, row in sample_questions.iterrows():
  question = row['question_summary']
  true_tags = [label_cols[i] for i, val in enumerate(row[label_cols]) if val == 1]

  predicted_tags, probs = predict_tags(question, model, tokenizer, label_cols, MAX_LENGTH)

  print(f"\nQuestion: {question[:100]}...")
  print(f"True Tags: {true_tags}")
  print(f"Predicted Tags: {predicted_tags}")
  print("-" * 50)


TESTING INFERENCE

Question: Python: access class property from string [duplicate] I have a class like the following: class User:...
True Tags: ['python']
Predicted Tags: ['python']
--------------------------------------------------

Question: MySQL PHP - SELECT WHERE id = array()? [duplicate] Possible Duplicate: MySQL query using an array Pa...
True Tags: ['arrays', 'html', 'mysql', 'php']
Predicted Tags: ['mysql', 'php']
--------------------------------------------------

Question: Position fixed doesn't work when using -webkit-transform I am using -webkit-transform (and -moz-tran...
True Tags: ['css', 'html']
Predicted Tags: ['css']
--------------------------------------------------

Question: Logging values of variables in Android native ndk I set up logging with C++ in Android NDK. I can pr...
True Tags: ['android', 'c++', 'debugging']
Predicted Tags: ['android', 'c++']
--------------------------------------------------

Question: How can I access a static property of type T in a