In [1]:
# ===== 1. Imports =====
import ast
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
# ===== 2. Load CSV and build multi-label targets =====
df = pd.read_csv("train_df.csv")
test_df = pd.read_csv("test_df.csv")

# Tags we focus on
TARGET_TAGS = ['math', 'graphs', 'strings', 'number theory',
               'trees', 'geometry', 'games', 'probabilities']


In [None]:
!pip install -U wandb



In [None]:
import wandb

In [None]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mjradi-ahmed[0m ([33mjradi-ahmed-none[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
sweep_config = {
    "method": "bayes",
    "metric": {
        "name": "eval_micro_f1",
        "goal": "maximize",
    },
    "parameters": {
        "learning_rate": {
            "values": [1e-5, 2e-5, 3e-5, 5e-5],
        },
        "train_batch_size": {
            "values": [8, 16],
        },
        "eval_batch_size": {
            "values": [16, 32],
        },
        "num_epochs": {
            "values": [5, 8, 12],
        },
        "weight_decay": {
            "values": [0.0, 0.01, 0.1],
        },
    },
}


In [None]:
import pprint
pprint.pprint(sweep_config)

{'method': 'bayes',
 'metric': {'goal': 'maximize', 'name': 'eval_micro_f1'},
 'parameters': {'eval_batch_size': {'values': [16, 32]},
                'learning_rate': {'values': [1e-05, 2e-05, 3e-05, 5e-05]},
                'num_epochs': {'values': [5, 8, 12]},
                'train_batch_size': {'values': [8, 16]},
                'weight_decay': {'values': [0.0, 0.01, 0.1]}}}


In [None]:
sweep_id = wandb.sweep(sweep_config, project="test_technique")

  | |_| | '_ \/ _` / _` |  _/ -_)


Create sweep with ID: lxcfaqiu
Sweep URL: https://wandb.ai/jradi-ahmed-none/test_technique/sweeps/lxcfaqiu


In [3]:

def parse_tags(x):
    if isinstance(x, str):
        try:
            return ast.literal_eval(x)
        except Exception:
            return []
    elif isinstance(x, list):
        return x
    else:
        return []

# If you already have train_df and test_df separately, make sure BOTH have tag_list and tag columns:

for df_part in [df, test_df]:
    df_part["tag_list"] = df_part["tags_filtered"].apply(parse_tags)
    for tag in TARGET_TAGS:
        df_part[tag] = df_part["tag_list"].apply(lambda tags: 1 if tag in tags else 0)


In [4]:
# ===== 3. Build labels matrix and compute pos_weight =====

labels_np = df[TARGET_TAGS].values.astype(np.float32)  # shape (N, 8)
num_samples, num_labels = labels_np.shape
print("Labels shape:", labels_np.shape)

pos_counts = labels_np.sum(axis=0)                  # positives per label
neg_counts = num_samples - pos_counts               # negatives per label

# Avoid division by zero
pos_counts_safe = np.where(pos_counts == 0, 1, pos_counts)

pos_weight_np = neg_counts / pos_counts_safe        # shape (8,)
print("pos_counts:", pos_counts)
print("neg_counts:", neg_counts)
print("pos_weight_np:", pos_weight_np)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pos_weight = torch.tensor(pos_weight_np, dtype=torch.float32).to(device)

Labels shape: (2147, 8)
pos_counts: [1126.  434.  338.  280.  259.  133.   84.   74.]
neg_counts: [1021. 1713. 1809. 1867. 1888. 2014. 2063. 2073.]
pos_weight_np: [ 0.90674955  3.9470046   5.352071    6.667857    7.289575   15.142858
 24.559525   28.013514  ]


In [5]:
!pip install iterative-stratification



In [6]:
# ===== 4. Convert to Hugging Face Dataset and split =====
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
# Keep only cols we need + labels
keep_cols = ["description_clean", "code_clean"] + TARGET_TAGS
df_hf = df[keep_cols].copy()


# X = features (we just use the text columns as placeholders)
X = df_hf[["description_clean", "code_clean"]].values
# y = multi-label targets
y = df_hf[TARGET_TAGS].values.astype(int)

msss = MultilabelStratifiedShuffleSplit(
    n_splits=1,
    test_size=0.2,
    random_state=42,
)

for train_idx, test_idx in msss.split(X, y):
    df_train = df_hf.iloc[train_idx].reset_index(drop=True)
    df_test  = df_hf.iloc[test_idx].reset_index(drop=True)

print("Train label sums:", df_train[TARGET_TAGS].sum().values)
print("Test  label sums:", df_test[TARGET_TAGS].sum().values)

Train label sums: [900 347 270 224 207 106  67  59]
Test  label sums: [226  87  68  56  52  27  17  15]


In [7]:
train_ds = Dataset.from_pandas(df_train)
eval_ds  = Dataset.from_pandas(df_test)
train_full_ds = Dataset.from_pandas(df_hf)

# Add "labels" field = list of 8 ints/floats
def add_labels(example):
    example["labels"] = [example[tag] for tag in TARGET_TAGS]
    return example

train_ds = train_ds.map(add_labels)
eval_ds = eval_ds.map(add_labels)
train_full_ds = train_full_ds.map(add_labels)

Map:   0%|          | 0/1726 [00:00<?, ? examples/s]

Map:   0%|          | 0/421 [00:00<?, ? examples/s]

Map:   0%|          | 0/2147 [00:00<?, ? examples/s]

In [8]:
from huggingface_hub import login
login(new_session=False)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [9]:
# ===== 5. Tokenizer & tokenization =====

model_name = "Mallard74/codebert-xcode-tags-classification"  # you can switch to another model
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_fn(batch):
    texts = [
        (d or "") + "\n\n" + (c or "")
        for d, c in zip(batch["description_clean"], batch["code_clean"])
    ]
    return tokenizer(
        texts,
        truncation=True,
        max_length=512,
        padding="max_length",
        return_tensors="pt",
    )

train_ds = train_ds.map(tokenize_fn, batched=True)
eval_ds  = eval_ds.map(tokenize_fn, batched=True)
train_full_ds = train_full_ds.map(tokenize_fn, batched=True)

# Set PyTorch format
train_ds.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels"],
)
eval_ds.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels"],
)
train_full_ds.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels"],
)

Map:   0%|          | 0/1726 [00:00<?, ? examples/s]

Map:   0%|          | 0/421 [00:00<?, ? examples/s]

Map:   0%|          | 0/2147 [00:00<?, ? examples/s]

In [10]:
# ===== 6. Model definition =====

config = AutoConfig.from_pretrained(
    model_name,
    num_labels=num_labels,
    problem_type="multi_label_classification",
)

def model_init():
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=config,
    )

In [11]:
# ===== 7. Custom Trainer with BCEWithLogitsLoss + pos_weight =====

class MultilabelTrainer(Trainer):
    def __init__(self, pos_weight=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.pos_weight = pos_weight

    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        loss_fct = torch.nn.BCEWithLogitsLoss(pos_weight=self.pos_weight)
        loss = loss_fct(logits, labels.float())

        return (loss, outputs) if return_outputs else loss


In [12]:
def compute_metrics(eval_pred):
        logits, labels = eval_pred
        logits = torch.tensor(logits)
        labels = torch.tensor(labels)

        probs = torch.sigmoid(logits)
        preds = (probs > 0.5).int().cpu().numpy()
        labels = labels.int().cpu().numpy()

        micro_f1 = f1_score(labels, preds, average="micro", zero_division=0)
        macro_f1 = f1_score(labels, preds, average="macro", zero_division=0)

        return {"micro_f1": micro_f1, "macro_f1": macro_f1}

In [13]:
from transformers import EarlyStoppingCallback

In [None]:
def train():
    with wandb.init():
        # Get sweep config from wandb
        sweep_config = wandb.config

        training_args = TrainingArguments(
            output_dir="/kaggle/working/finetune/",
            per_device_train_batch_size=sweep_config.train_batch_size,
            per_device_eval_batch_size=sweep_config.eval_batch_size,
            learning_rate=sweep_config.learning_rate,
            num_train_epochs=sweep_config.num_epochs,
            weight_decay=sweep_config.weight_decay,
            eval_strategy="epoch",
            save_strategy="epoch",
            report_to="wandb",
            logging_dir="/kaggle/working/tb_logs",
            load_best_model_at_end=True,
            logging_steps=10,
            run_name="test_study",
            metric_for_best_model="eval_micro_f1",
            greater_is_better=True,
        )

        trainer = MultilabelTrainer(
            model_init=model_init,
            args=training_args,
            train_dataset=train_ds,
            eval_dataset=eval_ds,
            tokenizer=tokenizer,
            pos_weight=pos_weight,
            compute_metrics=compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=5)],
        )

        # Train the model
        trainer.train()


In [None]:
wandb.agent(sweep_id, train, count=10)

[34m[1mwandb[0m: Agent Starting Run: doe5escr with config:
[34m[1mwandb[0m: 	eval_batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_epochs: 8
[34m[1mwandb[0m: 	train_batch_size: 16
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: Currently logged in as: [33mjradi-ahmed[0m ([33mjradi-ahmed-none[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  super().__init__(*args, **kwargs)


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]



Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.6024,0.732088,0.688172,0.595392
2,0.7377,0.628445,0.668567,0.623518
3,0.5858,0.599766,0.689757,0.649585
4,0.4256,0.619518,0.705058,0.663326
5,0.3557,0.559841,0.736146,0.68485
6,0.3532,0.603288,0.747725,0.700333
7,0.2761,0.617007,0.770026,0.724974
8,0.2697,0.613871,0.761905,0.712277


0,1
eval/loss,█▄▃▃▁▃▃▃
eval/macro_f1,▁▃▄▅▆▇█▇
eval/micro_f1,▂▁▂▄▆▆█▇
eval/runtime,█▁▄▄▃▄▄▄
eval/samples_per_second,▁█▅▅▆▅▅▅
eval/steps_per_second,▁█▅▅▆▅▄▅
train/epoch,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇██
train/grad_norm,▂▂▃▄▁▃▃▂▂▂▂▂▆▁▁▁▃▂▂▂▁▆▁▂▃▂▅▃▁▂█▃▂▂▁▇▂▁▃▄
train/learning_rate,████▇▇▇▇▆▆▆▆▆▆▆▅▅▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁

0,1
eval/loss,0.61387
eval/macro_f1,0.71228
eval/micro_f1,0.7619
eval/runtime,11.624
eval/samples_per_second,36.218
eval/steps_per_second,1.204
total_flos,3633233169874944.0
train/epoch,8
train/global_step,864
train/grad_norm,6.37961


[34m[1mwandb[0m: Agent Starting Run: v9whx478 with config:
[34m[1mwandb[0m: 	eval_batch_size: 16
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 16
[34m[1mwandb[0m: 	weight_decay: 0


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.6097,0.722014,0.692432,0.60724
2,0.694,0.654611,0.66967,0.613925
3,0.5569,0.628512,0.692595,0.653849
4,0.4528,0.618948,0.722311,0.674795
5,0.331,0.604354,0.744646,0.697644


0,1
eval/loss,█▄▂▂▁
eval/macro_f1,▁▂▅▆█
eval/micro_f1,▃▁▃▆█
eval/runtime,▁▁█▆▂
eval/samples_per_second,██▁▃▇
eval/steps_per_second,▇█▁▃▇
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,▂▂▂▂▂▂▃▁▂▃▅▁▄▂▂▃▃▂▁▂▂▃▂▁█▁▄▁▂▅▃▂▂▄▅▅▃▂▃▂
train/learning_rate,████▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁

0,1
eval/loss,0.60435
eval/macro_f1,0.69764
eval/micro_f1,0.74465
eval/runtime,11.3779
eval/samples_per_second,37.002
eval/steps_per_second,2.373
total_flos,2270770731171840.0
train/epoch,5
train/global_step,540
train/grad_norm,3.96135


[34m[1mwandb[0m: Agent Starting Run: oxyjkl5l with config:
[34m[1mwandb[0m: 	eval_batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	num_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	weight_decay: 0.01


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.7072,0.868347,0.603199,0.53593
2,0.7108,0.71986,0.640371,0.582686
3,0.7025,0.732244,0.697042,0.624844
4,0.4121,0.773668,0.724876,0.653523
5,0.3248,0.908472,0.730187,0.687056
6,0.4185,0.747727,0.756364,0.720321
7,0.1957,0.904813,0.76639,0.728699
8,0.1679,0.978601,0.759963,0.715014
9,0.1715,0.939869,0.782931,0.733337
10,0.1033,1.025785,0.761194,0.724641


0,1
eval/loss,▄▁▁▂▅▂▅▆▅▇██
eval/macro_f1,▁▃▄▅▆██▇████
eval/micro_f1,▁▂▅▆▆▇▇▇█▇█▇
eval/runtime,▃▃█▆▃▆▇▆▅▁▁█
eval/samples_per_second,▆▆▁▃▆▃▂▃▄██▁
eval/steps_per_second,▆▆▁▃▆▃▃▃▄██▂
train/epoch,▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇██
train/global_step,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
train/grad_norm,▁▁▁▁▂▂▁▁▃▁▂▂▁▂▁▂▁▃▁█▁▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▁▁

0,1
eval/loss,1.06752
eval/macro_f1,0.71978
eval/micro_f1,0.76852
eval/runtime,11.632
eval/samples_per_second,36.193
eval/steps_per_second,1.204
total_flos,5449849754812416.0
train/epoch,12
train/global_step,2592
train/grad_norm,0.91228


[34m[1mwandb[0m: Agent Starting Run: vupdbjsu with config:
[34m[1mwandb[0m: 	eval_batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	weight_decay: 0.1


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.6558,0.722242,0.640807,0.582529
2,0.6042,0.631973,0.682964,0.637146
3,0.6138,0.611139,0.741935,0.712183
4,0.3659,0.614413,0.739454,0.693501
5,0.3633,0.588517,0.762869,0.722966


0,1
eval/loss,█▃▂▂▁
eval/macro_f1,▁▄▇▇█
eval/micro_f1,▁▃▇▇█
eval/runtime,█▆▅▇▁
eval/samples_per_second,▁▃▄▂█
eval/steps_per_second,▁▃▃▁█
train/epoch,▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇███
train/grad_norm,▂▂▂▁▄█▁▁▁▁▁▁▃▁▁▂▂▂▁▁▂▂▂▂▂▂▂▁▁▁▄▁▂▂▃▂▂▂▁▃
train/learning_rate,████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁

0,1
eval/loss,0.58852
eval/macro_f1,0.72297
eval/micro_f1,0.76287
eval/runtime,11.55
eval/samples_per_second,36.45
eval/steps_per_second,1.212
total_flos,2270770731171840.0
train/epoch,5
train/global_step,1080
train/grad_norm,10.23387


[34m[1mwandb[0m: Agent Starting Run: zyk9wao7 with config:
[34m[1mwandb[0m: 	eval_batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	weight_decay: 0.01


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.6533,0.735554,0.626468,0.575669
2,0.6263,0.630396,0.661314,0.612846
3,0.6265,0.650804,0.730364,0.694409
4,0.3696,0.726776,0.72549,0.660382
5,0.3097,0.718994,0.76087,0.720217
6,0.3868,0.700635,0.761989,0.732574
7,0.169,0.82263,0.767098,0.727585
8,0.2897,0.868748,0.770659,0.726222
9,0.1823,0.883343,0.767808,0.712981
10,0.1042,0.8944,0.764977,0.725189


0,1
eval/loss,▃▁▁▃▃▂▅▆▆▆▇█
eval/macro_f1,▁▃▆▅▇███▇█▇█
eval/micro_f1,▁▃▆▆████████
eval/runtime,▆▃▃▃▃▃▅█▄▄▂▁
eval/samples_per_second,▃▆▆▆▆▆▄▁▅▅▇█
eval/steps_per_second,▂▇▆▇▆▆▅▁▅▅▇█
train/epoch,▁▁▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇█
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇███
train/grad_norm,▂▁▁▂▇▂▁▁▆▁▂▁▁▁▂▁▁▃▂▁▃▃▁▁▁▃▁█▂▁▁▁▁▁▁▁▁▃▁▁
train/learning_rate,████▇▇▇▇▇▇▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁

0,1
eval/loss,0.96805
eval/macro_f1,0.72222
eval/micro_f1,0.77095
eval/runtime,11.5967
eval/samples_per_second,36.303
eval/steps_per_second,1.207
total_flos,5449849754812416.0
train/epoch,12
train/global_step,2592
train/grad_norm,2.35834


[34m[1mwandb[0m: Agent Starting Run: uf2nvf05 with config:
[34m[1mwandb[0m: 	eval_batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	num_epochs: 8
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	weight_decay: 0.01


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.7383,0.734724,0.640059,0.564244
2,0.6304,0.663635,0.696049,0.626211
3,0.8346,0.613474,0.706422,0.657937
4,0.4503,0.59769,0.719436,0.681104
5,0.4149,0.588075,0.759796,0.714872
6,0.5118,0.600949,0.752724,0.720095
7,0.3363,0.587442,0.766387,0.731568
8,0.4282,0.585148,0.777114,0.739246


0,1
eval/loss,█▅▂▂▁▂▁▁
eval/macro_f1,▁▃▅▆▇▇██
eval/micro_f1,▁▄▄▅▇▇▇█
eval/runtime,▄▁▅█▅▆▄▃
eval/samples_per_second,▅█▄▁▄▃▅▆
eval/steps_per_second,▅█▄▁▄▃▆▆
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,▂▁▁▁▁▁▁▂█▂▂▄▂▂▆▁▃▂▁▃▁▁▁▂▁▁▁▁▁▆▃▃▂▂▂▁▁▆▁▁
train/learning_rate,███▇▇▇▆▆▆▅▅▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁

0,1
eval/loss,0.58515
eval/macro_f1,0.73925
eval/micro_f1,0.77711
eval/runtime,11.5924
eval/samples_per_second,36.317
eval/steps_per_second,1.208
total_flos,3633233169874944.0
train/epoch,8
train/global_step,1728
train/grad_norm,4.7566


[34m[1mwandb[0m: Agent Starting Run: n6k9oyl4 with config:
[34m[1mwandb[0m: 	eval_batch_size: 16
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	num_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	weight_decay: 0


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.7357,0.734666,0.639053,0.565482
2,0.6279,0.660966,0.695982,0.627108
3,0.8372,0.608056,0.709579,0.665166
4,0.427,0.614664,0.720764,0.670215
5,0.4053,0.601752,0.763889,0.72419
6,0.4957,0.618989,0.757576,0.722713
7,0.3178,0.584588,0.772842,0.732637
8,0.4074,0.607972,0.78169,0.744387
9,0.277,0.635153,0.780919,0.742548
10,0.2132,0.682662,0.775,0.739535


0,1
eval/loss,█▅▂▂▂▃▁▂▃▆▃▄
eval/macro_f1,▁▃▅▅▇▇████▇█
eval/micro_f1,▁▄▄▅▇▇██████
eval/runtime,▆▄▆▆▇▅▅▅█▃▁▆
eval/samples_per_second,▃▅▃▃▂▄▄▄▁▆█▃
eval/steps_per_second,▃▄▃▃▂▄▄▄▁▆█▃
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
train/grad_norm,▂▁▁▁▁▂▁▁▃▂▂▁▁▃▁▂▃█▂▁▂▁▄▄▂▄▂▄▁▁▁▂█▁▂▂▂▂▂▄
train/learning_rate,███████▇▇▇▆▆▆▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁

0,1
eval/loss,0.6554
eval/macro_f1,0.73788
eval/micro_f1,0.77966
eval/runtime,11.4055
eval/samples_per_second,36.912
eval/steps_per_second,2.367
total_flos,5449849754812416.0
train/epoch,12
train/global_step,2592
train/grad_norm,3.40311


[34m[1mwandb[0m: Agent Starting Run: ri0pscgv with config:
[34m[1mwandb[0m: 	eval_batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	weight_decay: 0.1


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.6779,0.817749,0.664557,0.579222
2,0.7232,0.66854,0.67433,0.60756
3,0.7827,0.608231,0.734219,0.672127
4,0.3813,0.661802,0.74958,0.707565
5,0.2779,0.615204,0.771403,0.735459


0,1
eval/loss,█▃▁▃▁
eval/macro_f1,▁▂▅▇█
eval/micro_f1,▁▂▆▇█
eval/runtime,▇▁▅█▂
eval/samples_per_second,▂█▄▁▇
eval/steps_per_second,▁█▄▁▇
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,▁▁▄▁▂▁▂▂▂▂▂▃▁▁█▂▂▁▂▃▁▃▁▂▂▂▂▃▂▂▁▁▁▁▁▂▁▂▂▂
train/learning_rate,█████▇▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▁

0,1
eval/loss,0.6152
eval/macro_f1,0.73546
eval/micro_f1,0.7714
eval/runtime,11.5698
eval/samples_per_second,36.388
eval/steps_per_second,1.21
total_flos,2270770731171840.0
train/epoch,5
train/global_step,1080
train/grad_norm,5.63742


[34m[1mwandb[0m: Agent Starting Run: napxce5n with config:
[34m[1mwandb[0m: 	eval_batch_size: 16
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_epochs: 12
[34m[1mwandb[0m: 	train_batch_size: 16
[34m[1mwandb[0m: 	weight_decay: 0.1


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.6026,0.732828,0.688172,0.589894
2,0.7348,0.624283,0.660071,0.616945
3,0.5862,0.616794,0.685841,0.644772
4,0.4156,0.628702,0.700935,0.659694
5,0.3538,0.548618,0.738617,0.689942
6,0.3441,0.603932,0.735895,0.700032
7,0.2559,0.66897,0.771403,0.723093
8,0.2325,0.695339,0.784878,0.748127
9,0.2252,0.690551,0.774368,0.731239
10,0.228,0.708138,0.77484,0.737971


[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: An internal error occurred. Please contact support. (<Response [500]>)
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: An internal error occurred. Please contact support. (<Response [500]>)
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: An internal error occurred. Please contact support. (<Response [500]>)
[34m[1mwandb[0m: Network error (HTTPError), entering retry loop.
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: An internal error occurred. Please contact support. (<Response [500]>)
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: An internal error occurred. Please contact support. (<Response [500]>)


[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: An internal error occurred. Please contact support. (<Response [500]>)
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: An internal error occurred. Please contact support. (<Response [500]>)
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: An internal error occurred. Please contact support. (<Response [500]>)


0,1
eval/loss,█▄▄▄▁▃▆▇▆▇▇▇
eval/macro_f1,▁▂▃▄▅▆▇█▇█▇▇
eval/micro_f1,▃▁▂▃▅▅▇█▇▇▇▇
eval/runtime,▁▄█▆▁▅▅▃▃▂▅█
eval/samples_per_second,█▅▁▃█▄▄▆▆▇▄▁
eval/steps_per_second,█▆▁▄█▅▅▆▆▇▄▂
train/epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,▂▃▄▄▄▃▄▂▂▂▅▂▁▆▁▂█▂▃▄▂▂▂▂▂▃▁▅█▃▁▄▃▂▂▂▂▂▂▂
train/learning_rate,█████▇▇▇▆▆▆▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁

0,1
eval/loss,0.71658
eval/macro_f1,0.72299
eval/micro_f1,0.76978
eval/runtime,11.432
eval/samples_per_second,36.827
eval/steps_per_second,2.362
total_flos,5449849754812416.0
train/epoch,12
train/global_step,1296
train/grad_norm,7.95902


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h3vkdak4 with config:
[34m[1mwandb[0m: 	eval_batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	train_batch_size: 16
[34m[1mwandb[0m: 	weight_decay: 0


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,0.6097,0.722014,0.692432,0.60724
2,0.694,0.654612,0.66967,0.613925
3,0.5569,0.628516,0.692595,0.653849
4,0.4528,0.618954,0.722311,0.674795
5,0.331,0.604364,0.744646,0.697644


0,1
eval/loss,█▄▂▂▁
eval/macro_f1,▁▂▅▆█
eval/micro_f1,▃▁▃▆█
eval/runtime,▁▆█▁▅
eval/samples_per_second,█▃▁█▄
eval/steps_per_second,█▂▁█▄
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█████
train/grad_norm,▁▁▁▂▁▁▁▂▁▅▃▁▂▅▃▁▁▂▇▂█▃▁▇▄▄▁▂▃▄▃▂▂▃▄▅▂▁▂▂
train/learning_rate,████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁

0,1
eval/loss,0.60436
eval/macro_f1,0.69764
eval/micro_f1,0.74465
eval/runtime,11.6291
eval/samples_per_second,36.202
eval/steps_per_second,1.204
total_flos,2270770731171840.0
train/epoch,5
train/global_step,540
train/grad_norm,3.96147


In [None]:
api = wandb.Api()
sweep = api.sweep(f"jradi-ahmed-none/test_technique/{"lxcfaqiu"}")

best_run = sweep.best_run()
print(f"Best Run ID: {best_run.id}")
print(f"Best Metrics: {best_run.summary.get('eval_micro_f1')}")
print("Best Hyperparameters:")
import pprint
pprint.pprint(best_run.config)

best_config = best_run.config

[34m[1mwandb[0m: Sorting runs by -summary_metrics.eval_micro_f1


Best Run ID: doe5escr
Best Metrics: None
Best Hyperparameters:
{'_name_or_path': 'Mallard74/codebert-xcode-tags-classification',
 'accelerator_config': {'dispatch_batches': None,
                        'even_batches': True,
                        'gradient_accumulation_kwargs': None,
                        'non_blocking': False,
                        'split_batches': False,
                        'use_seedable_sampler': True},
 'adafactor': False,
 'adam_beta1': 0.9,
 'adam_beta2': 0.999,
 'adam_epsilon': 1e-08,
 'add_cross_attention': False,
 'architectures': ['RobertaForSequenceClassification'],
 'attention_probs_dropout_prob': 0.1,
 'auto_find_batch_size': False,
 'average_tokens_across_devices': True,
 'bad_words_ids': None,
 'batch_eval_metrics': False,
 'begin_suppress_tokens': None,
 'bf16': False,
 'bf16_full_eval': False,
 'bos_token_id': 0,
 'chunk_size_feed_forward': 0,
 'classifier_dropout': None,
 'cross_attention_hidden_size': None,
 'data_seed': None,
 'dataloader_

In [30]:
config = {
    "learning_rate": 2e-5,
    "train_batch_size": 16,
    "eval_batch_size": 32,
    "num_epochs": 8,
    "weight_decay": 0.01,
}

print(f"Training with manual config: {config}")

# Set up TrainingArguments with the manual config
training_args = TrainingArguments(
    output_dir="./hp_config_model_checkpoints",
    per_device_train_batch_size=config['train_batch_size'],
    per_device_eval_batch_size=config['eval_batch_size'],
    learning_rate=config['learning_rate'],
    num_train_epochs=config['num_epochs'],
    weight_decay=config['weight_decay'],
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_micro_f1",
    greater_is_better=True,
    report_to="none",
)

# Initialize a fresh model
manual_model = model_init()

# Initialize the custom trainer
trainer = MultilabelTrainer(
    model=manual_model,
    args=training_args,
    train_dataset=train_full_ds,
    eval_dataset=eval_ds,
    tokenizer=tokenizer,
    pos_weight=pos_weight,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)],
)

# Train
trainer.train()

# Save the final model
print("Saving model to ./hp_config_model ...")
trainer.save_model("./hp_config_model_best_run_v3")
tokenizer.save_pretrained("./hp_config_model_best_run_v3")
print("Done!")

Training with manual config: {'learning_rate': 2e-05, 'train_batch_size': 16, 'eval_batch_size': 32, 'num_epochs': 8, 'weight_decay': 0.01}


  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Micro F1,Macro F1
1,No log,0.615743,0.667143,0.623609
2,No log,0.484121,0.733436,0.674414
3,No log,0.406615,0.770465,0.729751
4,0.632900,0.336786,0.8,0.788913
5,0.632900,0.31182,0.832192,0.841742
6,0.632900,0.267547,0.84456,0.85467
7,0.632900,0.243928,0.862191,0.872296
8,0.332900,0.237402,0.863958,0.880757


Saving model to ./hp_config_model ...
Done!
