In [27]:
!pip install -q transformers datasets wandb

In [28]:
!huggingface-cli login --token 

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [30]:
import torch
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import wandb

# Initialize wandb
wandb.init(
    project="bert-crossencoder-classification"
)

# Load dataset
dataset = load_dataset("minoosh/EPITOME_pairs")

# Initialize the tokenizer and model for cross-encoder setup
model_name = "google-bert/bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Preprocess data for the cross-encoder model by concatenating text1 and text2 with [SEP]
def preprocess_function(examples):
    # Concatenate both texts with a [SEP] token in between
    encodings = tokenizer(examples['text1'], examples['text2'], truncation=True, padding=True, max_length=512)
    encodings['labels'] = examples['label']  # Add labels
    return encodings

# Apply tokenization
tokenized_train = dataset['train'].map(preprocess_function, batched=True)
tokenized_test = dataset['test'].map(preprocess_function, batched=True)
tokenized_val = dataset['validation'].map(preprocess_function, batched=True)

# Set format for PyTorch
tokenized_train.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
tokenized_test.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
tokenized_val.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Define compute_metrics function for classification evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = predictions.argmax(axis=1)
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average="weighted")
    recall = recall_score(labels, preds, average="weighted")
    f1 = f1_score(labels, preds, average="weighted")
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# Custom Cross-Encoder model class for classification
class CrossEncoderModel(torch.nn.Module):
    def __init__(self, model_name, num_classes=4, loss_fn="cross_entropy"):
        super(CrossEncoderModel, self).__init__()
        # Load model config
        self.config = AutoConfig.from_pretrained(model_name, num_labels=num_classes)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name, config=self.config)
        self.loss_fn = loss_fn

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits  # Output logits for classification

        loss = None
        if labels is not None:
            if self.loss_fn == "cross_entropy":
                loss_fct = torch.nn.CrossEntropyLoss()  # Use CrossEntropyLoss for classification
                loss = loss_fct(logits, labels)
            elif self.loss_fn == "focal_loss":
                # Focal loss implementation for handling class imbalance
                alpha = 0.25
                gamma = 2.0
                ce_loss = torch.nn.CrossEntropyLoss(reduction="none")(logits, labels)
                pt = torch.exp(-ce_loss)  # Probability of the true class
                loss = (alpha * (1 - pt) ** gamma * ce_loss).mean()
            elif self.loss_fn == "kl_divergence":
                # KL Divergence for soft-label classification
                kl_div = torch.nn.KLDivLoss(reduction="batchmean")
                soft_labels = torch.nn.functional.one_hot(labels, num_classes=self.config.num_labels).float()
                log_probs = torch.nn.functional.log_softmax(logits, dim=-1)
                loss = kl_div(log_probs, soft_labels)
            else:
                raise ValueError(f"Unsupported loss function: {self.loss_fn}")

        return {"loss": loss, "logits": logits}


    def save_pretrained(self, save_directory):
        # Save model weights
        #self.model.save_pretrained(save_directory)
        # Save tokenizer if applicable
        #tokenizer.save_pretrained(save_directory)
        # Save config
        self.config.save_pretrained(save_directory)



# Function to initialize and train the cross-encoder model
def train_crossencoder(loss_fn):
    model = CrossEncoderModel(model_name=model_name, loss_fn=loss_fn)

    # Set up TrainingArguments
    training_args = TrainingArguments(
        output_dir=f"./output/TTTTempathy-crossencoder-{loss_fn}",
        evaluation_strategy="epoch",
        logging_dir='./logs',
        logging_steps=10,
        per_device_train_batch_size=wandb.config['batch_size'],
        per_device_eval_batch_size=wandb.config['batch_size'],
        num_train_epochs=wandb.config['epochs'],
        warmup_steps=100,
        learning_rate=wandb.config['learning_rate'],
        weight_decay=0.01,
        report_to="wandb",
        save_strategy="epoch",
        load_best_model_at_end=True,
        push_to_hub=True,
        save_total_limit=2
    )

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_val,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    # Train the model
    trainer.train()

    # Evaluate the model on the test set
    trainer.evaluate(tokenized_test)

    trainer.model.save_pretrained(f"./output/TTTTempathy-crossencoder-{loss_fn}")

    # Save and push the model to the Hugging Face Hub
    trainer.save_model(f"./output/TTTTempathy-crossencoder-{loss_fn}")
    trainer.push_to_hub(f"minoosh/TTTTempathy-crossencoder-{loss_fn}")

    # End the wandb run
    wandb.finish()
    return trainer

In [31]:
# Specify list of loss functions to try
loss_functions = ["cross_entropy", "focal_loss", "kl_divergence"]

loss_fn = loss_functions[0]  # Change to desired loss function
wandb.init(project="bert-crossencoder-classification", name=f"bert-crossencoder-classification-{loss_fn}", config={"epochs": 3, "batch_size": 16, "learning_rate": 2e-5})
tr = train_crossencoder(loss_fn)
wandb.finish()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1931,1.067032,0.601942,0.612247,0.601942,0.581434
2,0.9073,0.962425,0.61165,0.623103,0.61165,0.606202
3,0.79,0.941332,0.61165,0.613697,0.61165,0.608384


No files have been modified since last commit. Skipping to prevent empty commit.


No files have been modified since last commit. Skipping to prevent empty commit.


0,1
eval/accuracy,▁██▇
eval/f1,▁▇█▇
eval/loss,█▂▁▅
eval/precision,▂█▃▁
eval/recall,▁██▇
eval/runtime,▅▅▁█
eval/samples_per_second,▅▄█▁
eval/steps_per_second,▄▄█▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████

0,1
eval/accuracy,0.61039
eval/f1,0.6036
eval/loss,1.01857
eval/precision,0.61049
eval/recall,0.61039
eval/runtime,9.8313
eval/samples_per_second,31.328
eval/steps_per_second,2.034
total_flos,0.0
train/epoch,3.0


In [48]:
tokenizer = tr.tokenizer
tokenizer

BertTokenizerFast(name_or_path='google-bert/bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [50]:
tokenized_test

Dataset({
    features: ['text1', 'text2', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 309
})

In [49]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import load_dataset

'''# Load the model and tokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained("/content/output/TTTTempathy-crossencoder-cross_entropy")
'''
# Load the dataset
dataset = load_dataset("minoosh/EPITOME_pairs")

# Tokenize the test split
def preprocess_test_function(examples):
    encodings = tokenizer(examples['text1'], examples['text2'], truncation=True, padding=True, max_length=512)
    return encodings

# Apply tokenization to the test dataset
tokenized_test = dataset['validation'].map(preprocess_test_function, batched=True)
tokenized_test.set_format(type='torch', columns=['input_ids', 'attention_mask'])

# Initialize the Trainer
trainer = tr

# Make predictions on the test dataset
predictions = trainer.predict(tokenized_test)

# Get the predicted class indices
predicted_classes = predictions.predictions.argmax(axis=1)

# If you want to compare with actual labels
actual_labels = tokenized_test['label']

# Print out predictions and actual labels for verification
for i in range(len(predicted_classes)):
    print(f"Predicted: {predicted_classes[i]}, Actual: {actual_labels[i]}")

TypeError: Unsupported types (<class 'NoneType'>) passed to `_pad_across_processes`. Only nested list/tuple/dicts of objects that are valid for `is_torch_tensor` should be passed.

In [None]:
res = compute_metrics2(actual_labels, predicted_classes)
res

In [51]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import load_dataset

# Load the model and tokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained("/content/output/TTTTempathy-crossencoder-cross_entropy")

# Load the dataset
dataset = load_dataset("minoosh/EPITOME_pairs")

# Tokenize the test split
def preprocess_test_function(examples):
    encodings = tokenizer(examples['text1'], examples['text2'], truncation=True, padding=True, max_length=512)
    return encodings

# Apply tokenization to the test dataset
tokenized_test = dataset['validation'].map(preprocess_test_function, batched=True)
tokenized_test.set_format(type='torch', columns=['input_ids', 'attention_mask'])

# Initialize the Trainer
trainer = Trainer(model=model)

# Make predictions on the test dataset
predictions = trainer.predict(tokenized_test)

# Get the predicted class indices
predicted_classes = predictions.predictions.argmax(axis=1)

# If you want to compare with actual labels
actual_labels = tokenized_test['label']

# Print out predictions and actual labels for verification
for i in range(len(predicted_classes)):
    print(f"Predicted: {predicted_classes[i]}, Actual: {actual_labels[i]}")




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /content/output/TTTTempathy-crossencoder-cross_entropy and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias',

Predicted: 0, Actual: 2
Predicted: 0, Actual: 2
Predicted: 0, Actual: 3
Predicted: 0, Actual: 2
Predicted: 0, Actual: 0
Predicted: 0, Actual: 2
Predicted: 0, Actual: 0
Predicted: 0, Actual: 3
Predicted: 0, Actual: 3
Predicted: 0, Actual: 2
Predicted: 0, Actual: 2
Predicted: 0, Actual: 2
Predicted: 0, Actual: 2
Predicted: 0, Actual: 0
Predicted: 0, Actual: 1
Predicted: 0, Actual: 3
Predicted: 0, Actual: 2
Predicted: 0, Actual: 1
Predicted: 0, Actual: 2
Predicted: 0, Actual: 3
Predicted: 0, Actual: 2
Predicted: 0, Actual: 2
Predicted: 0, Actual: 1
Predicted: 0, Actual: 0
Predicted: 0, Actual: 1
Predicted: 0, Actual: 2
Predicted: 0, Actual: 0
Predicted: 0, Actual: 1
Predicted: 0, Actual: 2
Predicted: 0, Actual: 3
Predicted: 0, Actual: 3
Predicted: 0, Actual: 3
Predicted: 0, Actual: 1
Predicted: 0, Actual: 0
Predicted: 0, Actual: 1
Predicted: 0, Actual: 2
Predicted: 0, Actual: 2
Predicted: 0, Actual: 3
Predicted: 0, Actual: 3
Predicted: 0, Actual: 2
Predicted: 0, Actual: 0
Predicted: 0, Ac

In [52]:
from sklearn.metrics import classification_report

a = classification_report(actual_labels, predicted_classes)
print(a)

              precision    recall  f1-score   support

           0       0.22      1.00      0.37        69
           1       0.00      0.00      0.00        56
           2       0.00      0.00      0.00       130
           3       0.00      0.00      0.00        54

    accuracy                           0.22       309
   macro avg       0.06      0.25      0.09       309
weighted avg       0.05      0.22      0.08       309



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [53]:
from sklearn.metrics import recall_score

recall_score(actual_labels, predicted_classes, average='weighted')

0.22330097087378642

In [54]:
def compute_metrics2(preds, labels):
    #predictions, labels = eval_pred
    #preds = predictions.argmax(axis=1)
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average="weighted")
    recall = recall_score(labels, preds, average="weighted")
    f1 = f1_score(labels, preds, average="weighted")
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

In [55]:
res = compute_metrics2(actual_labels, predicted_classes)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [56]:
res

{'accuracy': 0.22330097087378642,
 'precision': 1.0,
 'recall': 0.22330097087378642,
 'f1': 0.36507936507936506}