In [8]:
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel, Trainer, TrainingArguments
from datasets import DatasetDict, concatenate_datasets, load_dataset
from sklearn.metrics import accuracy_score, f1_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


ds = load_dataset("PiC/phrase_similarity")

train_label0 = ds['train'].filter(lambda x: x['label'] == 0)
train_label1 = ds['train'].filter(lambda x: x['label'] == 1)

train_label0 = train_label0.shuffle(seed=42)
train_label1 = train_label1.shuffle(seed=42)

train_label0 = train_label0.select(range(500))
train_label1 = train_label1.select(range(500))

balanced_train = concatenate_datasets([train_label0, train_label1])

ds = DatasetDict({
    'train': balanced_train,
    'validation': ds['validation'],
    'test': ds['test']
})

model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    tokenized_s1 = tokenizer(
        examples['phrase1'],
        examples['sentence1'],
        truncation=True,
        padding='max_length',
        max_length=128
    )
    tokenized_s2 = tokenizer(
        examples['phrase2'],
        examples['sentence2'],
        truncation=True,
        padding='max_length',
        max_length=128
    )
    examples['input_ids_s1'] = tokenized_s1['input_ids']
    examples['attention_mask_s1'] = tokenized_s1['attention_mask']
    examples['input_ids_s2'] = tokenized_s2['input_ids']
    examples['attention_mask_s2'] = tokenized_s2['attention_mask']
    return examples

tokenized_ds = ds.map(tokenize_function, batched=True)

tokenized_ds = tokenized_ds.remove_columns(['phrase1', 'phrase2', 'sentence1', 'sentence2', 'idx'])

# Rename 'label' to 'labels'
tokenized_ds = tokenized_ds.rename_column("label", "labels")

# Set format to PyTorch tensors
tokenized_ds.set_format(type='torch', columns=['input_ids_s1', 'attention_mask_s1',
                                               'input_ids_s2', 'attention_mask_s2', 'labels'])

# Define the model classes
class BaseBERTModel(nn.Module):
    def __init__(self, model_name='bert-base-uncased'):
        super(BaseBERTModel, self).__init__()
        self.bert = BertModel.from_pretrained(model_name)
        for param in self.bert.embeddings.parameters():
            param.requires_grad = True # this is the main part here when I say with_grad

    def forward(self, input_ids_s1, attention_mask_s1, input_ids_s2, attention_mask_s2):
        outputs_s1 = self.bert(input_ids=input_ids_s1, attention_mask=attention_mask_s1)
        outputs_s2 = self.bert(input_ids=input_ids_s2, attention_mask=attention_mask_s2)
        return outputs_s1.last_hidden_state, outputs_s2.last_hidden_state

class PoolingModel(nn.Module):
    def __init__(self, model_name='bert-base-uncased', hidden_size=768, num_classes=2):
        super(PoolingModel, self).__init__()
        self.base_model = BaseBERTModel(model_name)
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.classifier = nn.Linear(hidden_size * 2, num_classes)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, input_ids_s1, attention_mask_s1, input_ids_s2, attention_mask_s2, labels=None):
        last_hidden_s1, last_hidden_s2 = self.base_model(input_ids_s1, attention_mask_s1, input_ids_s2, attention_mask_s2)
        pooled_s1 = self.pool(last_hidden_s1.permute(0, 2, 1)).squeeze(-1)
        pooled_s2 = self.pool(last_hidden_s2.permute(0, 2, 1)).squeeze(-1)
        combined = torch.cat((pooled_s1, pooled_s2), dim=1)
        logits = self.classifier(combined)
        outputs = {'logits': logits}
        if labels is not None:
            loss = self.loss_fn(logits, labels)
            outputs['loss'] = loss
        return outputs

# Define metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    return {'accuracy': acc, 'f1': f1}

# Training and evaluation function
def train_evaluate_model(model_class, strategy_name):
    print(f"\n=== Training Model with {strategy_name} Strategy ===")
    model = model_class().to(device)  # Move model to GPU
    training_args = TrainingArguments(
        output_dir=f'./results_{strategy_name}',
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir=f'./logs_{strategy_name}',
        logging_steps=100,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        greater_is_better=True
    )
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_ds['train'],
        eval_dataset=tokenized_ds['validation'],
        compute_metrics=compute_metrics
    )
    trainer.train()
    eval_results = trainer.evaluate(tokenized_ds['validation'])
    print(f"Validation Results for {strategy_name} Strategy: {eval_results}")
    return trainer, eval_results


Using device: cuda


In [9]:
# Train and evaluate the PoolingModel
trainer, eval_results = train_evaluate_model(PoolingModel, "PoolingModel")



=== Training Model with PoolingModel Strategy ===




Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.717133,0.489,0.398987
2,0.654100,0.730591,0.5,0.485832
3,0.654100,0.736013,0.534,0.529665


Validation Results for PoolingModel Strategy: {'eval_loss': 0.7360132932662964, 'eval_accuracy': 0.534, 'eval_f1': 0.5296653962922292, 'eval_runtime': 14.7351, 'eval_samples_per_second': 67.865, 'eval_steps_per_second': 1.086, 'epoch': 3.0}


In [10]:
# Evaluate the model on the test set
test_results = trainer.evaluate(tokenized_ds['test'])

# Print the test results
print("Test Results:", test_results)


Test Results: {'eval_loss': 0.7363443970680237, 'eval_accuracy': 0.52, 'eval_f1': 0.5155351721465022, 'eval_runtime': 30.2219, 'eval_samples_per_second': 66.177, 'eval_steps_per_second': 1.059, 'epoch': 3.0}


In [11]:
import torch
import torch.nn as nn

class AveragingModel(nn.Module):
    def __init__(self, model_name='bert-base-uncased', hidden_size=768, num_classes=2):
        super(AveragingModel, self).__init__()
        self.base_model = BaseBERTModel(model_name)
        self.classifier = nn.Linear(hidden_size * 2, num_classes)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, input_ids_s1, attention_mask_s1, input_ids_s2, attention_mask_s2, labels=None):
        # Move inputs to GPU if available
        input_ids_s1 = input_ids_s1.to(device)
        attention_mask_s1 = attention_mask_s1.to(device)
        input_ids_s2 = input_ids_s2.to(device)
        attention_mask_s2 = attention_mask_s2.to(device)
        if labels is not None:
            labels = labels.to(device)

        # Get last hidden states for both sequences
        last_hidden_s1, last_hidden_s2 = self.base_model(input_ids_s1, attention_mask_s1,
                                                         input_ids_s2, attention_mask_s2)

        # Compute mean pooling for s1
        mask_s1 = attention_mask_s1.unsqueeze(-1).expand(last_hidden_s1.size()).float()
        mean_s1 = torch.sum(last_hidden_s1 * mask_s1, dim=1) / torch.clamp(mask_s1.sum(dim=1), min=1e-9)

        # Compute mean pooling for s2
        mask_s2 = attention_mask_s2.unsqueeze(-1).expand(last_hidden_s2.size()).float()
        mean_s2 = torch.sum(last_hidden_s2 * mask_s2, dim=1) / torch.clamp(mask_s2.sum(dim=1), min=1e-9)

        # Concatenate mean embeddings
        combined = torch.cat((mean_s1, mean_s2), dim=1)  # (batch_size, hidden_dim * 2)

        # Classification
        logits = self.classifier(combined)  # (batch_size, num_classes)

        # Prepare outputs
        outputs = {'logits': logits}

        # Add loss if labels are provided
        if labels is not None:
            loss = self.loss_fn(logits, labels)
            outputs['loss'] = loss

        return outputs




In [12]:
# Train and evaluate the PoolingModel
trainer_avg, eval_results_avg = train_evaluate_model(AveragingModel, "AveragingModel")



=== Training Model with AveragingModel Strategy ===




Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.689924,0.536,0.453768
2,0.610300,0.757975,0.608,0.588062
3,0.610300,1.147776,0.615,0.607191


Validation Results for AveragingModel Strategy: {'eval_loss': 1.1477761268615723, 'eval_accuracy': 0.615, 'eval_f1': 0.6071905554325545, 'eval_runtime': 14.6851, 'eval_samples_per_second': 68.096, 'eval_steps_per_second': 1.09, 'epoch': 3.0}


In [13]:
# Evaluate the model on the test set
test_results_avg = trainer_avg.evaluate(tokenized_ds['test'])

# Print the test results
print("Test Results:", test_results_avg)


Test Results: {'eval_loss': 1.136587142944336, 'eval_accuracy': 0.624, 'eval_f1': 0.6121212121212122, 'eval_runtime': 30.0289, 'eval_samples_per_second': 66.603, 'eval_steps_per_second': 1.066, 'epoch': 3.0}
