In [59]:
import json
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, hamming_loss, precision_recall_curve
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
)
import torch
from datasets import Dataset
import nlpaug.augmenter.word as naw
from nlpaug.util import Action
import optuna

In [60]:
# ==================================================
# 0. Choose Model: "modernbert", "roberta", or "legalbert"
# ==================================================
model_choice = "legalbert"  # change to "legalbert" or "modernbert" as desired

if model_choice == "roberta":
    model_name = "roberta-base"
elif model_choice == "legalbert":
    model_name = "nlpaueb/legal-bert-base-uncased"
elif model_choice == "modernbert":
    model_name = "answerdotai/ModernBERT-base"
else:
    raise ValueError("Invalid model_choice. Choose 'modernbert', 'roberta', or 'legalbert'.")


In [61]:
# -----------------------------
# 1. Load Data
# -----------------------------
def load_license_data(json_folder):
    license_data = []
    for filename in os.listdir(json_folder):
        if filename.endswith(".json"):
            license_name = filename[:-5]
            filepath = os.path.join(json_folder, filename)
            with open(filepath, "r", encoding="utf-8") as f:
                data = json.load(f)
                license_data.append({
                    "license_name": license_name,
                    "family": data["family"],
                    "labels": data["labels"],
                    "text": data["text"],
                })
    return license_data

json_folder = "../../data/processed/preprocessed_licenses_json_2"
license_data = load_license_data(json_folder)
df = pd.DataFrame(license_data)

# Drop rows with missing labels
df.dropna(subset=["labels"], inplace=True)
df = df[df["labels"].apply(lambda x: len(x) > 0)]

In [62]:
# -----------------------------
# 2. Encode Labels
# -----------------------------
mlb = MultiLabelBinarizer()
df["labels"] = list(mlb.fit_transform(df["labels"]))
num_labels = len(mlb.classes_)


In [63]:
# -----------------------------
# 3. Split Data
# -----------------------------
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

In [64]:
# -----------------------------
# 4. BERT-Based Augmentation
# -----------------------------

def augment_text(text, aug_p=0.2, aug_max=3):
    """Context-aware augmentation using BERT"""
    aug = naw.ContextualWordEmbsAug(
        model_path='bert-base-uncased',
        action=Action.SUBSTITUTE,
        aug_p=aug_p,
        aug_max=aug_max,
        device='cuda' if torch.cuda.is_available() else 'cpu'
    )
    return aug.augment(text)[0]  # Return first augmented version

def augment_minority_classes(df, mlb, min_samples=50, aug_factor=3):
    augmented_texts = []
    augmented_labels = []
    
    # Calculate label counts (fixed)
    label_matrix = np.array(df["labels"].tolist())
    label_counts = label_matrix.sum(axis=0)
    minority_labels = np.where(label_counts < min_samples)[0]
    
    for _, row in df.iterrows():
        text = row["text"]
        labels = row["labels"]
        
        # Check if sample contains minority labels
        if any(label in minority_labels for label in np.where(labels == 1)[0]):
            for _ in range(aug_factor):
                new_text = augment_text(text)
                augmented_texts.append(new_text)
                augmented_labels.append(labels)
    
    # Create augmented DataFrame
    augmented_df = pd.DataFrame({
        "text": augmented_texts,
        "labels": list(augmented_labels)
    })
    
    return pd.concat([df, augmented_df], ignore_index=True)

# Apply augmentation
print("Original training size:", len(train_df))
train_df = augment_minority_classes(
    train_df,
    mlb=mlb,
    min_samples=80,
    aug_factor=5
)
print("Augmented training size:", len(train_df))

Original training size: 247
Augmented training size: 972


In [68]:
# -----------------------------
# 5. Tokenization
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )

# Prepare datasets
train_dataset = Dataset.from_pandas(train_df).map(tokenize, batched=True)
val_dataset = Dataset.from_pandas(val_df).map(tokenize, batched=True)
test_dataset = Dataset.from_pandas(test_df).map(tokenize, batched=True)

# Set format (ensure "labels" column)
train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

loading configuration file config.json from cache at C:\Users\NPARSHO\.cache\huggingface\hub\models--nlpaueb--legal-bert-base-uncased\snapshots\15b570cbf88259610b082a167dacc190124f60f6\config.json
Model config BertConfig {
  "_name_or_path": "nlpaueb/legal-bert-base-uncased",
  "architectures": [
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_ids": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.48.0.dev0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading configuration file config.json from cache at C:\Users\NPARSHO\.cache\huggingface\

Map:   0%|          | 0/972 [00:00<?, ? examples/s]

Map:   0%|          | 0/53 [00:00<?, ? examples/s]

Map:   0%|          | 0/54 [00:00<?, ? examples/s]

In [69]:
# -----------------------------
# 6. Focal Loss Implementation
# -----------------------------
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, logits, labels):
        bce_loss = torch.nn.functional.binary_cross_entropy_with_logits(
            logits, labels, reduction="none"
        )
        pt = torch.exp(-bce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * bce_loss
        return focal_loss.mean()

In [70]:
# -----------------------------
# 7. Custom Trainer
# -----------------------------
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        
        # Apply focal loss
        loss_fct = FocalLoss(alpha=0.25, gamma=2.0)
        loss = loss_fct(logits, labels.float())
        
        return (loss, outputs) if return_outputs else loss

In [73]:

# -----------------------------
# 8. Model Setup
# -----------------------------
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=num_labels,
        problem_type="multi_label_classification",
        label2id={label: str(i) for i, label in enumerate(mlb.classes_)},
        id2label={str(i): label for i, label in enumerate(mlb.classes_)}
    )

In [None]:
# -----------------------------
# 9. Training
# -----------------------------
def compute_metrics(p):
    preds = torch.sigmoid(torch.tensor(p.predictions)).cpu().numpy() > 0.5
    labels = p.label_ids
    
    return {
        "f1_macro": f1_score(labels, preds, average="macro", zero_division=0),
        "f1_micro": f1_score(labels, preds, average="micro", zero_division=0),
        "hamming_loss": hamming_loss(labels, preds)
    }

training_args = TrainingArguments(
    output_dir="../../model/Bert4.0",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    save_total_limit=2,
    logging_dir="./logs",
    logging_strategy="steps",
    logging_steps=10,
    report_to="none",
    seed=42,
    log_level="info",       # Show detailed logs
    disable_tqdm=False
)

trainer = CustomTrainer(
    model=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)


PyTorch: setting up devices


In [29]:
# -----------------------------
# 10. Train the Model
# -----------------------------
trainer.train()

The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: license_name, family, text. If license_name, family, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 972
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 610
  Number of trainable parameters = 124,667,933


  0%|          | 0/610 [00:00<?, ?it/s]

{'loss': 0.0405, 'grad_norm': 0.11439289897680283, 'learning_rate': 1.9672131147540985e-05, 'epoch': 0.16}
{'loss': 0.0271, 'grad_norm': 0.0758814811706543, 'learning_rate': 1.934426229508197e-05, 'epoch': 0.33}
{'loss': 0.0236, 'grad_norm': 0.07778170704841614, 'learning_rate': 1.9016393442622952e-05, 'epoch': 0.49}
{'loss': 0.0204, 'grad_norm': 0.06722109019756317, 'learning_rate': 1.8688524590163936e-05, 'epoch': 0.66}
{'loss': 0.0188, 'grad_norm': 0.0932110995054245, 'learning_rate': 1.836065573770492e-05, 'epoch': 0.82}
{'loss': 0.018, 'grad_norm': 0.07130974531173706, 'learning_rate': 1.8032786885245903e-05, 'epoch': 0.98}


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-61
Configuration saved in ../../model/Bert4.0\checkpoint-61\config.json


{'eval_loss': 0.01727002114057541, 'eval_f1_macro': 0.367498240549108, 'eval_f1_micro': 0.7978436657681941, 'eval_hamming_loss': 0.09759271307742355, 'eval_runtime': 53.9866, 'eval_samples_per_second': 0.982, 'eval_steps_per_second': 0.074, 'epoch': 1.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-387] due to args.save_total_limit


{'loss': 0.0169, 'grad_norm': 0.07764017581939697, 'learning_rate': 1.7704918032786887e-05, 'epoch': 1.15}
{'loss': 0.0163, 'grad_norm': 0.12719246745109558, 'learning_rate': 1.737704918032787e-05, 'epoch': 1.31}
{'loss': 0.0168, 'grad_norm': 0.11566201597452164, 'learning_rate': 1.7049180327868854e-05, 'epoch': 1.48}
{'loss': 0.0144, 'grad_norm': 0.06543538719415665, 'learning_rate': 1.6721311475409837e-05, 'epoch': 1.64}
{'loss': 0.014, 'grad_norm': 0.10747621953487396, 'learning_rate': 1.639344262295082e-05, 'epoch': 1.8}
{'loss': 0.0128, 'grad_norm': 0.1329927146434784, 'learning_rate': 1.6065573770491805e-05, 'epoch': 1.97}


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-122
Configuration saved in ../../model/Bert4.0\checkpoint-122\config.json


{'eval_loss': 0.01521281898021698, 'eval_f1_macro': 0.44981964708721967, 'eval_f1_micro': 0.8199737187910644, 'eval_hamming_loss': 0.08913467794404685, 'eval_runtime': 53.0748, 'eval_samples_per_second': 0.999, 'eval_steps_per_second': 0.075, 'epoch': 2.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-61] due to args.save_total_limit


{'loss': 0.0134, 'grad_norm': 0.11253954470157623, 'learning_rate': 1.5737704918032788e-05, 'epoch': 2.13}
{'loss': 0.0129, 'grad_norm': 0.2705577313899994, 'learning_rate': 1.5409836065573772e-05, 'epoch': 2.3}
{'loss': 0.0116, 'grad_norm': 0.13734114170074463, 'learning_rate': 1.5081967213114754e-05, 'epoch': 2.46}
{'loss': 0.0112, 'grad_norm': 0.08426066488027573, 'learning_rate': 1.4754098360655739e-05, 'epoch': 2.62}
{'loss': 0.0113, 'grad_norm': 0.10119855403900146, 'learning_rate': 1.4426229508196722e-05, 'epoch': 2.79}
{'loss': 0.01, 'grad_norm': 0.12322243303060532, 'learning_rate': 1.4098360655737706e-05, 'epoch': 2.95}


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-183
Configuration saved in ../../model/Bert4.0\checkpoint-183\config.json


{'eval_loss': 0.01475460547953844, 'eval_f1_macro': 0.5279964197296426, 'eval_f1_micro': 0.8382165605095542, 'eval_hamming_loss': 0.0826284970722186, 'eval_runtime': 50.3421, 'eval_samples_per_second': 1.053, 'eval_steps_per_second': 0.079, 'epoch': 3.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-122] due to args.save_total_limit


{'loss': 0.0107, 'grad_norm': 0.06795009225606918, 'learning_rate': 1.377049180327869e-05, 'epoch': 3.11}
{'loss': 0.0091, 'grad_norm': 0.077070452272892, 'learning_rate': 1.3442622950819673e-05, 'epoch': 3.28}
{'loss': 0.0087, 'grad_norm': 0.09553803503513336, 'learning_rate': 1.3114754098360655e-05, 'epoch': 3.44}
{'loss': 0.0088, 'grad_norm': 0.08226407319307327, 'learning_rate': 1.2786885245901642e-05, 'epoch': 3.61}
{'loss': 0.0091, 'grad_norm': 0.10462537407875061, 'learning_rate': 1.2459016393442624e-05, 'epoch': 3.77}
{'loss': 0.0077, 'grad_norm': 0.10200094431638718, 'learning_rate': 1.2131147540983608e-05, 'epoch': 3.93}


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-244
Configuration saved in ../../model/Bert4.0\checkpoint-244\config.json


{'eval_loss': 0.014614422805607319, 'eval_f1_macro': 0.5685425134584888, 'eval_f1_micro': 0.8553137003841229, 'eval_hamming_loss': 0.07351984385165908, 'eval_runtime': 51.7762, 'eval_samples_per_second': 1.024, 'eval_steps_per_second': 0.077, 'epoch': 4.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-183] due to args.save_total_limit


{'loss': 0.0073, 'grad_norm': 0.07025521993637085, 'learning_rate': 1.1803278688524591e-05, 'epoch': 4.1}
{'loss': 0.0074, 'grad_norm': 0.07960332930088043, 'learning_rate': 1.1475409836065575e-05, 'epoch': 4.26}
{'loss': 0.007, 'grad_norm': 0.05971836671233177, 'learning_rate': 1.1147540983606557e-05, 'epoch': 4.43}
{'loss': 0.0071, 'grad_norm': 0.07849713414907455, 'learning_rate': 1.0819672131147544e-05, 'epoch': 4.59}
{'loss': 0.0068, 'grad_norm': 0.12895247340202332, 'learning_rate': 1.0491803278688525e-05, 'epoch': 4.75}
{'loss': 0.0065, 'grad_norm': 0.08169500529766083, 'learning_rate': 1.0163934426229509e-05, 'epoch': 4.92}


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-305
Configuration saved in ../../model/Bert4.0\checkpoint-305\config.json


{'eval_loss': 0.013690901920199394, 'eval_f1_macro': 0.5615409085426857, 'eval_f1_micro': 0.8538461538461538, 'eval_hamming_loss': 0.0741704619388419, 'eval_runtime': 53.2578, 'eval_samples_per_second': 0.995, 'eval_steps_per_second': 0.075, 'epoch': 5.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-305] due to args.save_total_limit


{'loss': 0.0062, 'grad_norm': 0.06895734369754791, 'learning_rate': 9.836065573770493e-06, 'epoch': 5.08}
{'loss': 0.0065, 'grad_norm': 0.06376904994249344, 'learning_rate': 9.508196721311476e-06, 'epoch': 5.25}
{'loss': 0.0062, 'grad_norm': 0.06455773115158081, 'learning_rate': 9.18032786885246e-06, 'epoch': 5.41}
{'loss': 0.0054, 'grad_norm': 0.05414144694805145, 'learning_rate': 8.852459016393443e-06, 'epoch': 5.57}
{'loss': 0.0054, 'grad_norm': 0.04053435102105141, 'learning_rate': 8.524590163934427e-06, 'epoch': 5.74}
{'loss': 0.0057, 'grad_norm': 0.08845486491918564, 'learning_rate': 8.19672131147541e-06, 'epoch': 5.9}


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-366
Configuration saved in ../../model/Bert4.0\checkpoint-366\config.json


{'eval_loss': 0.014660362154245377, 'eval_f1_macro': 0.5879635830552704, 'eval_f1_micro': 0.8633461047254151, 'eval_hamming_loss': 0.06961613532856213, 'eval_runtime': 54.0349, 'eval_samples_per_second': 0.981, 'eval_steps_per_second': 0.074, 'epoch': 6.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-244] due to args.save_total_limit


{'loss': 0.006, 'grad_norm': 0.07076963782310486, 'learning_rate': 7.868852459016394e-06, 'epoch': 6.07}
{'loss': 0.005, 'grad_norm': 0.05282030627131462, 'learning_rate': 7.540983606557377e-06, 'epoch': 6.23}
{'loss': 0.0054, 'grad_norm': 0.0750291720032692, 'learning_rate': 7.213114754098361e-06, 'epoch': 6.39}
{'loss': 0.0049, 'grad_norm': 0.08564931899309158, 'learning_rate': 6.885245901639345e-06, 'epoch': 6.56}
{'loss': 0.005, 'grad_norm': 0.05929868295788765, 'learning_rate': 6.5573770491803276e-06, 'epoch': 6.72}
{'loss': 0.0047, 'grad_norm': 0.06613773852586746, 'learning_rate': 6.229508196721312e-06, 'epoch': 6.89}


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-427
Configuration saved in ../../model/Bert4.0\checkpoint-427\config.json


{'eval_loss': 0.014256794936954975, 'eval_f1_macro': 0.591090456295439, 'eval_f1_micro': 0.8663239074550129, 'eval_hamming_loss': 0.06766428106701367, 'eval_runtime': 53.6835, 'eval_samples_per_second': 0.987, 'eval_steps_per_second': 0.075, 'epoch': 7.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-366] due to args.save_total_limit


{'loss': 0.0049, 'grad_norm': 0.051996152848005295, 'learning_rate': 5.9016393442622956e-06, 'epoch': 7.05}
{'loss': 0.0048, 'grad_norm': 0.04995713010430336, 'learning_rate': 5.573770491803278e-06, 'epoch': 7.21}
{'loss': 0.0045, 'grad_norm': 0.05542890354990959, 'learning_rate': 5.245901639344263e-06, 'epoch': 7.38}
{'loss': 0.0046, 'grad_norm': 0.07824521511793137, 'learning_rate': 4.918032786885246e-06, 'epoch': 7.54}
{'loss': 0.0046, 'grad_norm': 0.04286443069577217, 'learning_rate': 4.59016393442623e-06, 'epoch': 7.7}
{'loss': 0.0047, 'grad_norm': 0.07349569350481033, 'learning_rate': 4.2622950819672135e-06, 'epoch': 7.87}


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-488
Configuration saved in ../../model/Bert4.0\checkpoint-488\config.json


{'eval_loss': 0.014187810942530632, 'eval_f1_macro': 0.5872885320277947, 'eval_f1_micro': 0.8630490956072352, 'eval_hamming_loss': 0.06896551724137931, 'eval_runtime': 53.3295, 'eval_samples_per_second': 0.994, 'eval_steps_per_second': 0.075, 'epoch': 8.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-430] due to args.save_total_limit


{'loss': 0.0045, 'grad_norm': 0.03882031515240669, 'learning_rate': 3.934426229508197e-06, 'epoch': 8.03}
{'loss': 0.0045, 'grad_norm': 0.05249963700771332, 'learning_rate': 3.6065573770491806e-06, 'epoch': 8.2}
{'loss': 0.0043, 'grad_norm': 0.04327310621738434, 'learning_rate': 3.2786885245901638e-06, 'epoch': 8.36}
{'loss': 0.0043, 'grad_norm': 0.049907613545656204, 'learning_rate': 2.9508196721311478e-06, 'epoch': 8.52}
{'loss': 0.0041, 'grad_norm': 0.041949931532144547, 'learning_rate': 2.6229508196721314e-06, 'epoch': 8.69}
{'loss': 0.0045, 'grad_norm': 0.11130422353744507, 'learning_rate': 2.295081967213115e-06, 'epoch': 8.85}


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-549
Configuration saved in ../../model/Bert4.0\checkpoint-549\config.json


{'eval_loss': 0.014318455941975117, 'eval_f1_macro': 0.6004805539312504, 'eval_f1_micro': 0.8670076726342711, 'eval_hamming_loss': 0.06766428106701367, 'eval_runtime': 53.4912, 'eval_samples_per_second': 0.991, 'eval_steps_per_second': 0.075, 'epoch': 9.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-427] due to args.save_total_limit


{'loss': 0.0044, 'grad_norm': 0.060591962188482285, 'learning_rate': 1.9672131147540985e-06, 'epoch': 9.02}
{'loss': 0.0044, 'grad_norm': 0.044623780995607376, 'learning_rate': 1.6393442622950819e-06, 'epoch': 9.18}
{'loss': 0.0038, 'grad_norm': 0.043660108000040054, 'learning_rate': 1.3114754098360657e-06, 'epoch': 9.34}
{'loss': 0.0041, 'grad_norm': 0.06246509402990341, 'learning_rate': 9.836065573770493e-07, 'epoch': 9.51}
{'loss': 0.0043, 'grad_norm': 0.06374526023864746, 'learning_rate': 6.557377049180328e-07, 'epoch': 9.67}
{'loss': 0.0042, 'grad_norm': 0.037270937114953995, 'learning_rate': 3.278688524590164e-07, 'epoch': 9.84}


Saving model checkpoint to ../../model/Bert4.0\checkpoint-610
Configuration saved in ../../model/Bert4.0\checkpoint-610\config.json


{'loss': 0.0041, 'grad_norm': 0.03981207311153412, 'learning_rate': 0.0, 'epoch': 10.0}


Deleting older checkpoint [..\..\model\Bert4.0\checkpoint-488] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/Bert4.0\checkpoint-610
Configuration saved in ../../model/Bert4.0\checkpoint-610\config.json


{'eval_loss': 0.014367332682013512, 'eval_f1_macro': 0.5897142482466144, 'eval_f1_micro': 0.8600770218228498, 'eval_hamming_loss': 0.07091737150292778, 'eval_runtime': 52.4383, 'eval_samples_per_second': 1.011, 'eval_steps_per_second': 0.076, 'epoch': 10.0}




Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ../../model/Bert4.0\checkpoint-549 (score: 0.6004805539312504).


{'train_runtime': 29038.2895, 'train_samples_per_second': 0.335, 'train_steps_per_second': 0.021, 'train_loss': 0.00911996128251318, 'epoch': 10.0}


TrainOutput(global_step=610, training_loss=0.00911996128251318, metrics={'train_runtime': 29038.2895, 'train_samples_per_second': 0.335, 'train_steps_per_second': 0.021, 'total_flos': 2558059437957120.0, 'train_loss': 0.00911996128251318, 'epoch': 10.0})

In [None]:
trainer.save_model("../../model/Roberta")  # Saves model and tokenizer
tokenizer.save_pretrained("../../model/Roberta")  # Save tokenizer

Saving model checkpoint to ../../model/Roberta
Configuration saved in ../../model/Roberta\config.json


('../../model/Roberta\\tokenizer_config.json',
 '../../model/Roberta\\special_tokens_map.json',
 '../../model/Roberta\\vocab.json',
 '../../model/Roberta\\merges.txt',
 '../../model/Roberta\\added_tokens.json',
 '../../model/Roberta\\tokenizer.json')

In [31]:
# -----------------------------
# 11. Threshold Optimization
# -----------------------------
def optimize_thresholds(y_true, y_pred_proba):
    thresholds = {}
    for i in range(y_true.shape[1]):
        precision, recall, th = precision_recall_curve(y_true[:, i], y_pred_proba[:, i])
        f1_scores = 2 * (precision * recall) / (precision + recall + 1e-10)
        optimal_th = th[np.argmax(f1_scores)]
        thresholds[i] = optimal_th
    return thresholds

# Get validation predictions
val_preds = trainer.predict(val_dataset)
val_pred_proba = torch.sigmoid(torch.tensor(val_preds.predictions)).numpy()
val_labels = val_preds.label_ids

# Find optimal thresholds
optimal_thresholds = optimize_thresholds(val_labels, val_pred_proba)

# -----------------------------
# 12. Final Evaluation
# -----------------------------
test_preds = trainer.predict(test_dataset)
test_pred_proba = torch.sigmoid(torch.tensor(test_preds.predictions)).numpy()
test_labels = test_preds.label_ids

# Apply optimized thresholds
final_test_preds = (test_pred_proba > np.array(list(optimal_thresholds.values()))).astype(int)

# Print final metrics
print("\n--- Final Test Metrics ---")
print(f"F1 Macro: {f1_score(test_labels, final_test_preds, average='macro'):.4f}")
print(f"F1 Micro: {f1_score(test_labels, final_test_preds, average='micro'):.4f}")
print(f"Hamming Loss: {hamming_loss(test_labels, final_test_preds):.4f}")

The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 54
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]


--- Final Test Metrics ---
F1 Macro: 0.6237
F1 Micro: 0.6896
Hamming Loss: 0.2075


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [78]:
model_directory = "../../model/LegalBert"
model = AutoModelForSequenceClassification.from_pretrained(model_directory)
tokenizer = AutoTokenizer.from_pretrained(model_directory)

loading configuration file ../../model/LegalBert\config.json
Model config BertConfig {
  "_name_or_path": "../../model/LegalBert",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_ids": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "2": "Copyleft (network protective) (obligation/ALARM)",
    "6": "Deprecated License (other/INFORMATION)",
    "9": "Display copyright notice (obligation/INFORMATION)",
    "10": "Display license in binary (obligation/INFORMATION)",
    "11": "Display license in the source (obligation/INFORMATION)",
    "12": "Doing Business with US (other/ALARM)",
    "13": "Endorsement prohibited (prohibition/INFORMATION)",
    "16": "License upgrade allowed (right/INFORMATION)",
    "18": "No further restrictions permitted (prohibition/INFORMATION)",
    "19": "Patent grant (other/INFORMATION)",
    "20":

In [79]:
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

In [80]:
# Get validation predictions
val_preds = trainer.predict(val_dataset)
val_pred_proba = torch.sigmoid(torch.tensor(val_preds.predictions)).numpy()
val_labels = val_preds.label_ids

# Find optimal thresholds
optimal_thresholds = optimize_thresholds(val_labels, val_pred_proba)

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]



In [81]:


# -----------------------------
# 12. Final Evaluation
# -----------------------------
test_preds = trainer.predict(test_dataset)
test_pred_proba = torch.sigmoid(torch.tensor(test_preds.predictions)).numpy()
test_labels = test_preds.label_ids

# Apply optimized thresholds
final_test_preds = (test_pred_proba > np.array(list(optimal_thresholds.values()))).astype(int)

# Print final metrics
print("\n--- Final Test Metrics ---")
print(f"F1 Macro: {f1_score(test_labels, final_test_preds, average='macro'):.4f}")
print(f"F1 Micro: {f1_score(test_labels, final_test_preds, average='micro'):.4f}")
print(f"Hamming Loss: {hamming_loss(test_labels, final_test_preds):.4f}")

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 54
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]


--- Final Test Metrics ---
F1 Macro: 0.6397
F1 Micro: 0.6956
Hamming Loss: 0.2018


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [75]:
# Define your hyperparameter search space
def hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [8, 16]),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 5, 15),
        "weight_decay": trial.suggest_float("weight_decay", 0.01, 0.1, log=True),
        # Focal loss parameters: 
        "alpha": trial.suggest_float("alpha", 0.25, 0.5),
        "gamma": trial.suggest_float("gamma", 1.5, 3.0)
    }

training_args = TrainingArguments(
    output_dir="../../model/hp_search",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    report_to="none",
    seed=42,
    # These values can be overridden by hyperparameter search
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    learning_rate=2e-5,
    weight_decay=0.01,
)

# Initialize the Trainer with your custom Trainer (using FocalLoss) and model_init
trainer = CustomTrainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# Run hyperparameter search with Optuna
best_trial = trainer.hyperparameter_search(
    direction="maximize",  # Assuming you're maximizing F1 score
    hp_space=hp_space,
    n_trials=20  # You can adjust the number of trials
)

print("Best hyperparameters found:", best_trial.hyperparameters)


PyTorch: setting up devices
loading configuration file config.json from cache at C:\Users\NPARSHO\.cache\huggingface\hub\models--nlpaueb--legal-bert-base-uncased\snapshots\15b570cbf88259610b082a167dacc190124f60f6\config.json
Model config BertConfig {
  "_name_or_path": "nlpaueb/legal-bert-base-uncased",
  "architectures": [
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_ids": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "10": "Display license in binary (obligation/INFORMATION)",
    "11": "Display license in the source (obligation/INFORMATION)",
    "12": "Doing Business with US (other/ALARM)",
    "13": "Endorsement prohibited (prohibition/INFORMATION)",
    "16": "License upgrade allowed (right/INFORMATION)",
    "18": "No further restrictions permitted (prohibition/INFORMATION)",
    "19": "Patent grant (other/INFORMATION)",
    "2": "Copyleft (

  0%|          | 0/732 [00:00<?, ?it/s]

Attempting to create safetensors variant
Safetensors PR exists
Attempting to create safetensors variant
Safetensors PR exists
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-61
Configuration saved in ../../model/hp_search\run-0\checkpoint-61\config.json


{'eval_loss': 0.017661551013588905, 'eval_f1_macro': 0.32713001329340347, 'eval_f1_micro': 0.7896879240162822, 'eval_hamming_loss': 0.10084580351333768, 'eval_runtime': 65.7015, 'eval_samples_per_second': 0.807, 'eval_steps_per_second': 0.061, 'epoch': 1.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-122
Configuration saved in ../../model/hp_search\run-0\checkpoint-122\config.json


{'eval_loss': 0.015107940882444382, 'eval_f1_macro': 0.45192000123258314, 'eval_f1_micro': 0.8258575197889182, 'eval_hamming_loss': 0.08588158750813273, 'eval_runtime': 65.5585, 'eval_samples_per_second': 0.808, 'eval_steps_per_second': 0.061, 'epoch': 2.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-183
Configuration saved in ../../model/hp_search\run-0\checkpoint-183\config.json


{'eval_loss': 0.01468051690608263, 'eval_f1_macro': 0.5134380507339251, 'eval_f1_micro': 0.8435897435897436, 'eval_hamming_loss': 0.07937540663630449, 'eval_runtime': 65.9055, 'eval_samples_per_second': 0.804, 'eval_steps_per_second': 0.061, 'epoch': 3.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-244
Configuration saved in ../../model/hp_search\run-0\checkpoint-244\config.json


{'eval_loss': 0.014503732323646545, 'eval_f1_macro': 0.5882168617029118, 'eval_f1_micro': 0.8582375478927203, 'eval_hamming_loss': 0.07221860767729343, 'eval_runtime': 65.9639, 'eval_samples_per_second': 0.803, 'eval_steps_per_second': 0.061, 'epoch': 4.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-305
Configuration saved in ../../model/hp_search\run-0\checkpoint-305\config.json


{'eval_loss': 0.014879824593663216, 'eval_f1_macro': 0.5797974374466691, 'eval_f1_micro': 0.8556701030927835, 'eval_hamming_loss': 0.07286922576447626, 'eval_runtime': 65.9471, 'eval_samples_per_second': 0.804, 'eval_steps_per_second': 0.061, 'epoch': 5.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-366
Configuration saved in ../../model/hp_search\run-0\checkpoint-366\config.json


{'eval_loss': 0.015594271942973137, 'eval_f1_macro': 0.6083628785549509, 'eval_f1_micro': 0.8560509554140128, 'eval_hamming_loss': 0.07351984385165908, 'eval_runtime': 66.3287, 'eval_samples_per_second': 0.799, 'eval_steps_per_second': 0.06, 'epoch': 6.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-427
Configuration saved in ../../model/hp_search\run-0\checkpoint-427\config.json


{'eval_loss': 0.014932326972484589, 'eval_f1_macro': 0.6141900765603633, 'eval_f1_micro': 0.8663239074550129, 'eval_hamming_loss': 0.06766428106701367, 'eval_runtime': 66.0796, 'eval_samples_per_second': 0.802, 'eval_steps_per_second': 0.061, 'epoch': 7.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-488
Configuration saved in ../../model/hp_search\run-0\checkpoint-488\config.json


{'eval_loss': 0.015610205940902233, 'eval_f1_macro': 0.6115016235944682, 'eval_f1_micro': 0.8611825192802056, 'eval_hamming_loss': 0.07026675341574495, 'eval_runtime': 65.9635, 'eval_samples_per_second': 0.803, 'eval_steps_per_second': 0.061, 'epoch': 8.0}
{'loss': 0.009, 'grad_norm': 0.02335641346871853, 'learning_rate': 1.1844778992585751e-05, 'epoch': 8.2}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-549
Configuration saved in ../../model/hp_search\run-0\checkpoint-549\config.json


{'eval_loss': 0.0153913339599967, 'eval_f1_macro': 0.6047692285556783, 'eval_f1_micro': 0.8600770218228498, 'eval_hamming_loss': 0.07091737150292778, 'eval_runtime': 66.1347, 'eval_samples_per_second': 0.801, 'eval_steps_per_second': 0.06, 'epoch': 9.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-610
Configuration saved in ../../model/hp_search\run-0\checkpoint-610\config.json


{'eval_loss': 0.015886398032307625, 'eval_f1_macro': 0.6060601940675816, 'eval_f1_micro': 0.8578745198463509, 'eval_hamming_loss': 0.07221860767729343, 'eval_runtime': 65.725, 'eval_samples_per_second': 0.806, 'eval_steps_per_second': 0.061, 'epoch': 10.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-671
Configuration saved in ../../model/hp_search\run-0\checkpoint-671\config.json


{'eval_loss': 0.016224214807152748, 'eval_f1_macro': 0.6076854291125322, 'eval_f1_micro': 0.8604353393085787, 'eval_hamming_loss': 0.07091737150292778, 'eval_runtime': 65.9028, 'eval_samples_per_second': 0.804, 'eval_steps_per_second': 0.061, 'epoch': 11.0}


Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-732
Configuration saved in ../../model/hp_search\run-0\checkpoint-732\config.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-0\checkpoint-732
Configuration saved in ../../model/hp_search\run-0\checkpoint-732\config.json


{'eval_loss': 0.016121268272399902, 'eval_f1_macro': 0.6076854291125322, 'eval_f1_micro': 0.8604353393085787, 'eval_hamming_loss': 0.07091737150292778, 'eval_runtime': 66.48, 'eval_samples_per_second': 0.797, 'eval_steps_per_second': 0.06, 'epoch': 12.0}




Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2025-03-13 10:12:00,352] Trial 0 finished with value: 1.5390381399240387 and parameters: {'learning_rate': 3.737231992488263e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 12, 'weight_decay': 0.030357542715208397, 'alpha': 0.4233822628381545, 'gamma': 2.077604194362665}. Best is trial 0 with value: 1.5390381399240387.
Trying to set alpha in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set gamma in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trial: {'learning_rate': 4.150579859926717e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 14, 'weight_decay': 0.014580526204328113, 'alpha': 0.4989228897662774, 'gamma': 2.0623051788188915}


{'train_runtime': 35672.2796, 'train_samples_per_second': 0.327, 'train_steps_per_second': 0.021, 'train_loss': 0.006994806107927541, 'epoch': 12.0}


loading configuration file config.json from cache at C:\Users\NPARSHO\.cache\huggingface\hub\models--nlpaueb--legal-bert-base-uncased\snapshots\15b570cbf88259610b082a167dacc190124f60f6\config.json
Model config BertConfig {
  "_name_or_path": "nlpaueb/legal-bert-base-uncased",
  "architectures": [
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_ids": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "10": "Display license in binary (obligation/INFORMATION)",
    "11": "Display license in the source (obligation/INFORMATION)",
    "12": "Doing Business with US (other/ALARM)",
    "13": "Endorsement prohibited (prohibition/INFORMATION)",
    "16": "License upgrade allowed (right/INFORMATION)",
    "18": "No further restrictions permitted (prohibition/INFORMATION)",
    "19": "Patent grant (other/INFORMATION)",
    "2": "Copyleft (network protective) (obligat

  0%|          | 0/1708 [00:00<?, ?it/s]

Attempting to create safetensors variant
Safetensors PR exists
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-122
Configuration saved in ../../model/hp_search\run-1\checkpoint-122\config.json


{'eval_loss': 0.01722702756524086, 'eval_f1_macro': 0.3269749707154933, 'eval_f1_micro': 0.7757909215955984, 'eval_hamming_loss': 0.10605074821080027, 'eval_runtime': 55.7149, 'eval_samples_per_second': 0.951, 'eval_steps_per_second': 0.072, 'epoch': 1.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-244
Configuration saved in ../../model/hp_search\run-1\checkpoint-244\config.json


{'eval_loss': 0.01484353095293045, 'eval_f1_macro': 0.557358453453057, 'eval_f1_micro': 0.854614412136536, 'eval_hamming_loss': 0.07482108002602472, 'eval_runtime': 54.237, 'eval_samples_per_second': 0.977, 'eval_steps_per_second': 0.074, 'epoch': 2.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-366
Configuration saved in ../../model/hp_search\run-1\checkpoint-366\config.json


{'eval_loss': 0.014910904690623283, 'eval_f1_macro': 0.611887076510675, 'eval_f1_micro': 0.8629441624365483, 'eval_hamming_loss': 0.07026675341574495, 'eval_runtime': 54.2167, 'eval_samples_per_second': 0.978, 'eval_steps_per_second': 0.074, 'epoch': 3.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-488
Configuration saved in ../../model/hp_search\run-1\checkpoint-488\config.json


{'eval_loss': 0.015760479494929314, 'eval_f1_macro': 0.616392282487043, 'eval_f1_micro': 0.8673469387755102, 'eval_hamming_loss': 0.06766428106701367, 'eval_runtime': 54.2425, 'eval_samples_per_second': 0.977, 'eval_steps_per_second': 0.074, 'epoch': 4.0}
{'loss': 0.0103, 'grad_norm': 0.03576251491904259, 'learning_rate': 2.935538917325219e-05, 'epoch': 4.1}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-610
Configuration saved in ../../model/hp_search\run-1\checkpoint-610\config.json


{'eval_loss': 0.017529094591736794, 'eval_f1_macro': 0.6167742608997735, 'eval_f1_micro': 0.8575063613231552, 'eval_hamming_loss': 0.07286922576447626, 'eval_runtime': 54.1554, 'eval_samples_per_second': 0.979, 'eval_steps_per_second': 0.074, 'epoch': 5.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-732
Configuration saved in ../../model/hp_search\run-1\checkpoint-732\config.json


{'eval_loss': 0.017601365223526955, 'eval_f1_macro': 0.630371392397419, 'eval_f1_micro': 0.8710089399744572, 'eval_hamming_loss': 0.06571242680546518, 'eval_runtime': 53.7403, 'eval_samples_per_second': 0.986, 'eval_steps_per_second': 0.074, 'epoch': 6.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-854
Configuration saved in ../../model/hp_search\run-1\checkpoint-854\config.json


{'eval_loss': 0.01862945780158043, 'eval_f1_macro': 0.638877947430367, 'eval_f1_micro': 0.8724489795918368, 'eval_hamming_loss': 0.06506180871828236, 'eval_runtime': 55.4152, 'eval_samples_per_second': 0.956, 'eval_steps_per_second': 0.072, 'epoch': 7.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-976
Configuration saved in ../../model/hp_search\run-1\checkpoint-976\config.json


{'eval_loss': 0.018686309456825256, 'eval_f1_macro': 0.6307129613610022, 'eval_f1_micro': 0.8629961587708067, 'eval_hamming_loss': 0.06961613532856213, 'eval_runtime': 54.1944, 'eval_samples_per_second': 0.978, 'eval_steps_per_second': 0.074, 'epoch': 8.0}
{'loss': 0.0023, 'grad_norm': 0.033410124480724335, 'learning_rate': 1.720497974723721e-05, 'epoch': 8.2}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-1098
Configuration saved in ../../model/hp_search\run-1\checkpoint-1098\config.json


{'eval_loss': 0.019458139315247536, 'eval_f1_macro': 0.6252391506029013, 'eval_f1_micro': 0.8695652173913043, 'eval_hamming_loss': 0.06636304489264802, 'eval_runtime': 55.0358, 'eval_samples_per_second': 0.963, 'eval_steps_per_second': 0.073, 'epoch': 9.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-1220
Configuration saved in ../../model/hp_search\run-1\checkpoint-1220\config.json


{'eval_loss': 0.01992134191095829, 'eval_f1_macro': 0.6273256929798248, 'eval_f1_micro': 0.8655569782330346, 'eval_hamming_loss': 0.06831489915419649, 'eval_runtime': 54.3962, 'eval_samples_per_second': 0.974, 'eval_steps_per_second': 0.074, 'epoch': 10.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-1342
Configuration saved in ../../model/hp_search\run-1\checkpoint-1342\config.json


{'eval_loss': 0.020407995209097862, 'eval_f1_macro': 0.6230612921636999, 'eval_f1_micro': 0.8666666666666667, 'eval_hamming_loss': 0.06766428106701367, 'eval_runtime': 53.4858, 'eval_samples_per_second': 0.991, 'eval_steps_per_second': 0.075, 'epoch': 11.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-1464
Configuration saved in ../../model/hp_search\run-1\checkpoint-1464\config.json


{'eval_loss': 0.02054348774254322, 'eval_f1_macro': 0.6230612921636999, 'eval_f1_micro': 0.8666666666666667, 'eval_hamming_loss': 0.06766428106701367, 'eval_runtime': 55.2696, 'eval_samples_per_second': 0.959, 'eval_steps_per_second': 0.072, 'epoch': 12.0}
{'loss': 0.0014, 'grad_norm': 0.013558023609220982, 'learning_rate': 5.054570321222231e-06, 'epoch': 12.3}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-1586
Configuration saved in ../../model/hp_search\run-1\checkpoint-1586\config.json


{'eval_loss': 0.02073541469871998, 'eval_f1_macro': 0.6230612921636999, 'eval_f1_micro': 0.8666666666666667, 'eval_hamming_loss': 0.06766428106701367, 'eval_runtime': 54.5817, 'eval_samples_per_second': 0.971, 'eval_steps_per_second': 0.073, 'epoch': 13.0}


Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-1708
Configuration saved in ../../model/hp_search\run-1\checkpoint-1708\config.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-1\checkpoint-1708
Configuration saved in ../../model/hp_search\run-1\checkpoint-1708\config.json


{'eval_loss': 0.02086891047656536, 'eval_f1_macro': 0.6230612921636999, 'eval_f1_micro': 0.8666666666666667, 'eval_hamming_loss': 0.06766428106701367, 'eval_runtime': 53.3848, 'eval_samples_per_second': 0.993, 'eval_steps_per_second': 0.075, 'epoch': 14.0}




Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2025-03-13 15:54:48,575] Trial 1 finished with value: 1.5573922398973803 and parameters: {'learning_rate': 4.150579859926717e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 14, 'weight_decay': 0.014580526204328113, 'alpha': 0.4989228897662774, 'gamma': 2.0623051788188915}. Best is trial 1 with value: 1.5573922398973803.
Trying to set alpha in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set gamma in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trial: {'learning_rate': 2.154275755551988e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 6, 'weight_decay': 0.05291984206452107, 'alpha': 0.42034951319231434, 'gamma': 1.819836017561793}


{'train_runtime': 20565.5124, 'train_samples_per_second': 0.662, 'train_steps_per_second': 0.083, 'train_loss': 0.004229242684411221, 'epoch': 14.0}


loading configuration file config.json from cache at C:\Users\NPARSHO\.cache\huggingface\hub\models--nlpaueb--legal-bert-base-uncased\snapshots\15b570cbf88259610b082a167dacc190124f60f6\config.json
Model config BertConfig {
  "_name_or_path": "nlpaueb/legal-bert-base-uncased",
  "architectures": [
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_ids": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "10": "Display license in binary (obligation/INFORMATION)",
    "11": "Display license in the source (obligation/INFORMATION)",
    "12": "Doing Business with US (other/ALARM)",
    "13": "Endorsement prohibited (prohibition/INFORMATION)",
    "16": "License upgrade allowed (right/INFORMATION)",
    "18": "No further restrictions permitted (prohibition/INFORMATION)",
    "19": "Patent grant (other/INFORMATION)",
    "2": "Copyleft (network protective) (obligat

  0%|          | 0/732 [00:00<?, ?it/s]

Attempting to create safetensors variant
Safetensors PR exists
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-2\checkpoint-122
Configuration saved in ../../model/hp_search\run-2\checkpoint-122\config.json


{'eval_loss': 0.017682256177067757, 'eval_f1_macro': 0.3308952928773526, 'eval_f1_micro': 0.7874659400544959, 'eval_hamming_loss': 0.1014964216005205, 'eval_runtime': 37.7823, 'eval_samples_per_second': 1.403, 'eval_steps_per_second': 0.106, 'epoch': 1.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-2\checkpoint-244
Configuration saved in ../../model/hp_search\run-2\checkpoint-244\config.json


{'eval_loss': 0.015143858268857002, 'eval_f1_macro': 0.48538409738233323, 'eval_f1_micro': 0.8273195876288659, 'eval_hamming_loss': 0.08718282368249837, 'eval_runtime': 38.6327, 'eval_samples_per_second': 1.372, 'eval_steps_per_second': 0.104, 'epoch': 2.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-2\checkpoint-366
Configuration saved in ../../model/hp_search\run-2\checkpoint-366\config.json


{'eval_loss': 0.014605019241571426, 'eval_f1_macro': 0.4975994869415896, 'eval_f1_micro': 0.8431876606683805, 'eval_hamming_loss': 0.07937540663630449, 'eval_runtime': 38.7796, 'eval_samples_per_second': 1.367, 'eval_steps_per_second': 0.103, 'epoch': 3.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-2\checkpoint-488
Configuration saved in ../../model/hp_search\run-2\checkpoint-488\config.json


{'eval_loss': 0.014512765221297741, 'eval_f1_macro': 0.575321673417913, 'eval_f1_micro': 0.8498727735368957, 'eval_hamming_loss': 0.07677293428757319, 'eval_runtime': 38.3289, 'eval_samples_per_second': 1.383, 'eval_steps_per_second': 0.104, 'epoch': 4.0}
{'loss': 0.013, 'grad_norm': 0.0493619367480278, 'learning_rate': 6.827759225246738e-06, 'epoch': 4.1}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-2\checkpoint-610
Configuration saved in ../../model/hp_search\run-2\checkpoint-610\config.json


{'eval_loss': 0.014502720907330513, 'eval_f1_macro': 0.5841231937103983, 'eval_f1_micro': 0.8527918781725888, 'eval_hamming_loss': 0.07547169811320754, 'eval_runtime': 38.7081, 'eval_samples_per_second': 1.369, 'eval_steps_per_second': 0.103, 'epoch': 5.0}


Saving model checkpoint to ../../model/hp_search\run-2\checkpoint-732
Configuration saved in ../../model/hp_search\run-2\checkpoint-732\config.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-2\checkpoint-732
Configuration saved in ../../model/hp_search\run-2\checkpoint-732\config.json


{'eval_loss': 0.014404107816517353, 'eval_f1_macro': 0.5857723691226923, 'eval_f1_micro': 0.8567774936061381, 'eval_hamming_loss': 0.07286922576447626, 'eval_runtime': 38.2621, 'eval_samples_per_second': 1.385, 'eval_steps_per_second': 0.105, 'epoch': 6.0}




Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2025-03-13 17:39:24,013] Trial 2 finished with value: 1.5154190884933065 and parameters: {'learning_rate': 2.154275755551988e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 6, 'weight_decay': 0.05291984206452107, 'alpha': 0.42034951319231434, 'gamma': 1.819836017561793}. Best is trial 1 with value: 1.5573922398973803.
Trying to set alpha in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set gamma in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trial: {'learning_rate': 3.523999264148842e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 12, 'weight_decay': 0.010081010685090833, 'alpha': 0.3670719384605021, 'gamma': 2.1323741129438742}
loading configuration file config.json from cache at C:\Users\NPARSHO\.cache\huggingface\hub\models--nlpaueb--legal-bert-base-uncased\snapshots\15b570cbf882596

{'train_runtime': 6273.5338, 'train_samples_per_second': 0.93, 'train_steps_per_second': 0.117, 'train_loss': 0.010764774728993901, 'epoch': 6.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: license_name, family, text. If license_name, family, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 972
  Num Epochs = 12
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 732
  Number of trainable parameters = 109,504,541


  0%|          | 0/732 [00:00<?, ?it/s]

Attempting to create safetensors variant
Safetensors PR exists
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-61
Configuration saved in ../../model/hp_search\run-3\checkpoint-61\config.json


{'eval_loss': 0.01708749122917652, 'eval_f1_macro': 0.40267470779560843, 'eval_f1_micro': 0.8010680907877169, 'eval_hamming_loss': 0.09694209499024073, 'eval_runtime': 27.8089, 'eval_samples_per_second': 1.906, 'eval_steps_per_second': 0.144, 'epoch': 1.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-122
Configuration saved in ../../model/hp_search\run-3\checkpoint-122\config.json


{'eval_loss': 0.01517126802355051, 'eval_f1_macro': 0.5001606421428645, 'eval_f1_micro': 0.8305304010349288, 'eval_hamming_loss': 0.0852309694209499, 'eval_runtime': 27.9213, 'eval_samples_per_second': 1.898, 'eval_steps_per_second': 0.143, 'epoch': 2.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-183
Configuration saved in ../../model/hp_search\run-3\checkpoint-183\config.json


{'eval_loss': 0.014474988915026188, 'eval_f1_macro': 0.5495273566859381, 'eval_f1_micro': 0.8422391857506362, 'eval_hamming_loss': 0.08067664281067013, 'eval_runtime': 27.8477, 'eval_samples_per_second': 1.903, 'eval_steps_per_second': 0.144, 'epoch': 3.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-244
Configuration saved in ../../model/hp_search\run-3\checkpoint-244\config.json


{'eval_loss': 0.01454697735607624, 'eval_f1_macro': 0.6132603199587112, 'eval_f1_micro': 0.8604060913705583, 'eval_hamming_loss': 0.07156798959011061, 'eval_runtime': 27.9647, 'eval_samples_per_second': 1.895, 'eval_steps_per_second': 0.143, 'epoch': 4.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-305
Configuration saved in ../../model/hp_search\run-3\checkpoint-305\config.json


{'eval_loss': 0.015239044092595577, 'eval_f1_macro': 0.6004720382778966, 'eval_f1_micro': 0.8582375478927203, 'eval_hamming_loss': 0.07221860767729343, 'eval_runtime': 27.7763, 'eval_samples_per_second': 1.908, 'eval_steps_per_second': 0.144, 'epoch': 5.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-366
Configuration saved in ../../model/hp_search\run-3\checkpoint-366\config.json


{'eval_loss': 0.015852220356464386, 'eval_f1_macro': 0.6167395495171172, 'eval_f1_micro': 0.8636942675159236, 'eval_hamming_loss': 0.06961613532856213, 'eval_runtime': 27.9214, 'eval_samples_per_second': 1.898, 'eval_steps_per_second': 0.143, 'epoch': 6.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-427
Configuration saved in ../../model/hp_search\run-3\checkpoint-427\config.json


{'eval_loss': 0.01544348057359457, 'eval_f1_macro': 0.6146665874270738, 'eval_f1_micro': 0.8633461047254151, 'eval_hamming_loss': 0.06961613532856213, 'eval_runtime': 27.8965, 'eval_samples_per_second': 1.9, 'eval_steps_per_second': 0.143, 'epoch': 7.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-488
Configuration saved in ../../model/hp_search\run-3\checkpoint-488\config.json


{'eval_loss': 0.016330905258655548, 'eval_f1_macro': 0.6148682409862591, 'eval_f1_micro': 0.8644501278772379, 'eval_hamming_loss': 0.06896551724137931, 'eval_runtime': 27.8393, 'eval_samples_per_second': 1.904, 'eval_steps_per_second': 0.144, 'epoch': 8.0}
{'loss': 0.0083, 'grad_norm': 0.029556605964899063, 'learning_rate': 1.1168959416428025e-05, 'epoch': 8.2}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-549
Configuration saved in ../../model/hp_search\run-3\checkpoint-549\config.json


{'eval_loss': 0.016298621892929077, 'eval_f1_macro': 0.612796143135654, 'eval_f1_micro': 0.8611825192802056, 'eval_hamming_loss': 0.07026675341574495, 'eval_runtime': 27.6874, 'eval_samples_per_second': 1.914, 'eval_steps_per_second': 0.144, 'epoch': 9.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-610
Configuration saved in ../../model/hp_search\run-3\checkpoint-610\config.json


{'eval_loss': 0.01674608327448368, 'eval_f1_macro': 0.6150143322866924, 'eval_f1_micro': 0.8626444159178434, 'eval_hamming_loss': 0.06961613532856213, 'eval_runtime': 27.8481, 'eval_samples_per_second': 1.903, 'eval_steps_per_second': 0.144, 'epoch': 10.0}


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-671
Configuration saved in ../../model/hp_search\run-3\checkpoint-671\config.json


{'eval_loss': 0.01670306921005249, 'eval_f1_macro': 0.6153622451378055, 'eval_f1_micro': 0.8641025641025641, 'eval_hamming_loss': 0.06896551724137931, 'eval_runtime': 28.0374, 'eval_samples_per_second': 1.89, 'eval_steps_per_second': 0.143, 'epoch': 11.0}


Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-732
Configuration saved in ../../model/hp_search\run-3\checkpoint-732\config.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: family, license_name, __index_level_0__, text. If family, license_name, __index_level_0__, text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 53
  Batch size = 16


  0%|          | 0/4 [00:00<?, ?it/s]

Saving model checkpoint to ../../model/hp_search\run-3\checkpoint-732
Configuration saved in ../../model/hp_search\run-3\checkpoint-732\config.json


{'eval_loss': 0.01667293719947338, 'eval_f1_macro': 0.6150143322866924, 'eval_f1_micro': 0.8626444159178434, 'eval_hamming_loss': 0.06961613532856213, 'eval_runtime': 26.3921, 'eval_samples_per_second': 2.008, 'eval_steps_per_second': 0.152, 'epoch': 12.0}




Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2025-03-14 00:52:07,151] Trial 3 finished with value: 1.5472748835330978 and parameters: {'learning_rate': 3.523999264148842e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 12, 'weight_decay': 0.010081010685090833, 'alpha': 0.3670719384605021, 'gamma': 2.1323741129438742}. Best is trial 1 with value: 1.5573922398973803.
Trying to set alpha in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trying to set gamma in the hyperparameter search but there is no corresponding field in `TrainingArguments`.
Trial: {'learning_rate': 1.8983211874813515e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 13, 'weight_decay': 0.016436174343137766, 'alpha': 0.4312776160305887, 'gamma': 1.6192230403063839}


{'train_runtime': 25961.6358, 'train_samples_per_second': 0.449, 'train_steps_per_second': 0.028, 'train_loss': 0.006545368439512826, 'epoch': 12.0}


loading configuration file config.json from cache at C:\Users\NPARSHO\.cache\huggingface\hub\models--nlpaueb--legal-bert-base-uncased\snapshots\15b570cbf88259610b082a167dacc190124f60f6\config.json
Model config BertConfig {
  "_name_or_path": "nlpaueb/legal-bert-base-uncased",
  "architectures": [
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_ids": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "10": "Display license in binary (obligation/INFORMATION)",
    "11": "Display license in the source (obligation/INFORMATION)",
    "12": "Doing Business with US (other/ALARM)",
    "13": "Endorsement prohibited (prohibition/INFORMATION)",
    "16": "License upgrade allowed (right/INFORMATION)",
    "18": "No further restrictions permitted (prohibition/INFORMATION)",
    "19": "Patent grant (other/INFORMATION)",
    "2": "Copyleft (network protective) (obligat

  0%|          | 0/1586 [00:00<?, ?it/s]

Attempting to create safetensors variant
Safetensors PR exists
[W 2025-03-14 00:53:52,046] Trial 4 failed with parameters: {'learning_rate': 1.8983211874813515e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 13, 'weight_decay': 0.016436174343137766, 'alpha': 0.4312776160305887, 'gamma': 1.6192230403063839} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\NPARSHO\AppData\Local\anaconda3\envs\cuda2\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "c:\Users\NPARSHO\AppData\Local\anaconda3\envs\cuda2\Lib\site-packages\transformers\integrations\integration_utils.py", line 249, in _objective
    trainer.train(resume_from_checkpoint=checkpoint, trial=trial)
  File "c:\Users\NPARSHO\AppData\Local\anaconda3\envs\cuda2\Lib\site-packages\transformers\trainer.py", line 2163, in train
    return inner_training_loop(
           ^^^^^^^^^^^

KeyboardInterrupt: 

In [77]:
tokenizer.save_pretrained("../../model/LegalBert")

('../../model/LegalBert\\tokenizer_config.json',
 '../../model/LegalBert\\special_tokens_map.json',
 '../../model/LegalBert\\vocab.txt',
 '../../model/LegalBert\\added_tokens.json',
 '../../model/LegalBert\\tokenizer.json')