In [1]:
pip install wandb

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import AdamW
from tqdm import tqdm
import numpy as np
import os
import wandb
import random

In [3]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
# start a new wandb run to track this script
wandb.init(project="spanish-full", config={
    "epochs": 10,
    "patience": 3,
    "optimizer": "AdamW",
    "learning_rate": 2e-4,
    "batch_size": 16
})

[34m[1mwandb[0m: Currently logged in as: [33mdivyessh01[0m ([33mdivyessh01-iiit-hyderabad[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113760511110134, max=1.0…

In [5]:
# Access the config parameters
config = wandb.config

In [6]:
# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [7]:
# Load the dataset
dataset = "/kaggle/input/anlp-datasets/preprocessed data/Spanish/"
train_dataset_file = dataset + "spanish_train.csv"
val_dataset_file = dataset + "spanish_dev.csv"

In [8]:
train_data = pd.read_csv(train_dataset_file)
val_data = pd.read_csv(val_dataset_file)

In [9]:
class SarcasmDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = self.data['preprocessed_text'].iloc[index]
        label = self.data['label'].iloc[index]
        
        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),
        }

In [10]:
# Tokenizer and datasets
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
train_dataset = SarcasmDataset(train_data, tokenizer, max_len=128)
val_dataset = SarcasmDataset(val_data, tokenizer, max_len=128)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [11]:
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config.batch_size)

In [12]:
# Model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
model = model.to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
optimizer = AdamW(model.parameters(), lr=config.learning_rate)



In [14]:
def train_model(model, train_loader, val_loader, device, config):
    best_loss = np.inf
    patience_counter = 0
    best_model_weights = None

    for epoch in range(config.epochs):
        print(f"\nEpoch {epoch + 1}/{config.epochs}")
        
        wandb.log({"epoch": epoch + 1})

        # train
        model.train()
        train_loss = 0
        loop = tqdm(train_loader, leave=True)
        for batch in loop:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            loop.set_description(f"Train loss: {loss.item():.4f}")

        avg_train_loss = train_loss / len(train_loader)

        # validate
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["label"].to(device)

                outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
                loss = outputs.loss
                val_loss += loss.item()

        avg_val_loss = val_loss / len(val_loader)
        print(f"Epoch {epoch + 1}: Train Loss = {avg_train_loss:.4f}, Val Loss = {avg_val_loss:.4f}")

        wandb.log({"train_loss": avg_train_loss, "val_loss": avg_val_loss})

        # early stopping
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            patience_counter = 0
            best_model_weights = model.state_dict()
            torch.save(best_model_weights, "best_model_weights.pth")
        else:
            patience_counter += 1

        if patience_counter >= config.patience:
            print("Early stopping triggered.")
            break

    return best_model_weights


In [15]:
best_model_weights = train_model(model, train_loader, val_loader, device, config)
print("Training complete. Best model saved as 'best_model_weights.pth'.")


Epoch 1/10


Train loss: 0.4014: 100%|██████████| 48/48 [00:10<00:00,  4.71it/s]


Epoch 1: Train Loss = 0.3257, Val Loss = 0.3563

Epoch 2/10


Train loss: 0.4211: 100%|██████████| 48/48 [00:09<00:00,  4.97it/s]


Epoch 2: Train Loss = 0.3176, Val Loss = 0.3636

Epoch 3/10


Train loss: 0.2896: 100%|██████████| 48/48 [00:09<00:00,  4.99it/s]


Epoch 3: Train Loss = 0.3077, Val Loss = 0.3585

Epoch 4/10


Train loss: 0.2722: 100%|██████████| 48/48 [00:09<00:00,  5.00it/s]


Epoch 4: Train Loss = 0.3050, Val Loss = 0.3623
Early stopping triggered.
Training complete. Best model saved as 'best_model_weights.pth'.


In [16]:
test_dataset_file = dataset + "spanish_test.csv"
test_data = pd.read_csv(test_dataset_file)

In [17]:
test_dataset = SarcasmDataset(test_data, tokenizer, max_len=128)
test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)

In [18]:
# Load the saved model weights
model.load_state_dict(torch.load("best_model_weights.pth"))
model = model.to(device)
model.eval()

  model.load_state_dict(torch.load("best_model_weights.pth"))


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [19]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# evaluate the model on the test set
def evaluate_model(model, test_loader, device, config):
    predictions = []
    true_labels = []
    
    model.eval()
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            preds = torch.argmax(logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)

    print(f"Test Set Performance:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

    wandb.log({
        "test_accuracy": accuracy,
        "test_precision": precision,
        "test_recall": recall,
        "test_f1_score": f1
    })

    results = {
        "Metric": ["Accuracy", "Precision", "Recall", "F1 Score"],
        "Value": [accuracy, precision, recall, f1],
    }
    results_df = pd.DataFrame(results)
    results_df.to_csv("evaluation_results.csv", index=False)
    print(f"Results saved to evaluation_results.csv")

    return predictions


In [20]:
predictions = evaluate_model(model, test_loader, device, config)

# Save test predictions to a new CSV file
output_file = "test_predictions.csv"
test_data["prediction"] = predictions
test_data[["preprocessed_text", "label", "prediction"]].to_csv(output_file, index=False)
print(f"Predictions saved to {output_file}")

Evaluating: 100%|██████████| 6/6 [00:00<00:00, 16.34it/s]

Test Set Performance:
Accuracy: 0.8958
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Results saved to evaluation_results.csv
Predictions saved to test_predictions.csv



  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
predictions_file = "test_predictions.csv"
predictions_df = pd.read_csv(predictions_file)

In [22]:
print("Random rows from the test predictions file:")
print(predictions_df.sample(10))

Random rows from the test predictions file:
                                    preprocessed_text  label  prediction
35  I Thought that if he is added to Whoever Asked...      0           0
56  Is It Safe Soccer for Your Children? A Worried...      0           0
55                                 ALREADY IF IT WERE      0           0
43        Wool, wool, do you envy that I am a vergon?      1           0
89                     You Said Subthing About A Plan      1           0
53                                          Well Okay      0           0
17  I shit on milk that I'm not going to make you ...      0           0
74  "The Only Phrase You know in urdu is" "No Fuck...      0           0
70  We already have jewelry.Now we have to look fo...      0           0
14                            You are now is bleeding      0           0


In [23]:
wandb.finish()

VBox(children=(Label(value='0.019 MB of 0.019 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▆█
test_accuracy,▁
test_f1_score,▁
test_precision,▁
test_recall,▁
train_loss,█▅▂▁
val_loss,▁█▃▇

0,1
epoch,4.0
test_accuracy,0.89583
test_f1_score,0.0
test_precision,0.0
test_recall,0.0
train_loss,0.30503
val_loss,0.36234
