In [3]:
# Training the Bert Model

In [None]:
from transformers import BertConfig, BertModel
import torch.nn as nn
class BertSingleEncoderEarlyExit(nn.Module):
    def __init__(self, config, num_labels):
        super().__init__()
        self.num_labels = num_labels

        # Load BERT model with only one encoder layer
        config.num_hidden_layers = 1  # Set the number of hidden layers to 1
        self.bert_encoder = BertModel(config)

        # Early Exit classifier
        self.early_exit_classifier = nn.Linear(config.hidden_size, num_labels)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_normal_(self.early_exit_classifier.weight)
        nn.init.constant_(self.early_exit_classifier.bias, 0)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
        outputs = self.bert_encoder(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids,
                                    position_ids=position_ids, head_mask=head_mask)

        # Get the output from the single encoder layer
        encoder_output = outputs.last_hidden_state

        # Apply the early exit classifier
        logits_early_exit = self.early_exit_classifier(encoder_output[:, 0, :])

        return logits_early_exit


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define model configuration
config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2)

# Instantiate the model
model = BertSingleEncoderEarlyExit(config,num_labels=2)  # Example with early_exit_hidden_size=256

# Print the model
print(model)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

BertSingleEncoderEarlyExit(
  (bert_encoder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, el

In [None]:
from torch.utils.data import DataLoader
from transformers import BertTokenizer, GlueDataTrainingArguments, GlueDataset

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Define the task name and data directory
TASK_NAME = "sst-2"
DATA_DIR = "/content/drive/MyDrive/GLUE-baselines/SST-2"

# Define data arguments
data_args = GlueDataTrainingArguments(
    task_name=TASK_NAME,
    data_dir=DATA_DIR,
)

# Load the training and evaluation datasets
train_dataset = GlueDataset(data_args, tokenizer=tokenizer)
eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")

# Define batch size
batch_size = 8


import torch

# Define your custom collate function
def custom_collate(batch):
    input_ids_batch = torch.tensor([item.input_ids for item in batch])
    attention_mask_batch = torch.tensor([item.attention_mask for item in batch])
    labels_batch = torch.tensor([item.label for item in batch])  # Assuming the label attribute exists
    return {'input_ids': input_ids_batch, 'attention_mask': attention_mask_batch, 'labels': labels_batch}

# Create train and eval data loaders with the custom collate function
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate)
eval_dataloader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)



tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



In [None]:
import torch.optim as optim
import os
# Initialize the model
model = BertSingleEncoderEarlyExit(config, num_labels=2)  # Assuming binary classification

# Define optimizer and learning rate scheduler
optimizer = optim.AdamW(model.parameters(), lr=5e-5)  # You can adjust the learning rate as needed
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)  # You can adjust the scheduler parameters as needed

# Define the loss function (cross-entropy loss)
loss_fn = nn.CrossEntropyLoss()

# Define the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
num_epochs = 3  # You can adjust the number of epochs as needed
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        logits = model(input_ids, attention_mask=attention_mask)

        # Compute the loss
        loss = loss_fn(logits, labels)

        # Backward pass
        loss.backward()

        # Update parameters
        optimizer.step()

        # Accumulate the total loss
        total_loss += loss.item()

    # Calculate average loss
    avg_loss = total_loss / len(train_dataloader)

    # Evaluate the model on the validation set
    model.eval()
    total_eval_accuracy = 0.0
    for batch in eval_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        with torch.no_grad():
            logits = model(input_ids, attention_mask=attention_mask)

        # Compute accuracy
        predictions = torch.argmax(logits, dim=1)
        correct_predictions = (predictions == labels).sum().item()
        total_eval_accuracy += correct_predictions

    # Calculate average accuracy
    avg_accuracy = total_eval_accuracy / len(eval_dataset)

    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"  Training Loss: {avg_loss:.4f}")
    print(f"  Validation Accuracy: {avg_accuracy:.2%}")

    # Update the learning rate scheduler
    scheduler.step()
# Directory to save the trained model
output_dir = "/content/drive/MyDrive/result"
os.makedirs(output_dir, exist_ok=True)

# Define the file path for saving the model
model_save_path = os.path.join(output_dir, "bert_single_encoder_early_exit.pth")

# Save the model
torch.save(model.state_dict(), model_save_path)

print(f"Model saved to {model_save_path}")

KeyboardInterrupt: 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from transformers import BertConfig, BertModel, BertTokenizer, GlueDataTrainingArguments, GlueDataset

# Define the model architecture
class BertSingleEncoderEarlyExit(nn.Module):
    def __init__(self, config, num_labels):
        super().__init__()
        self.num_labels = num_labels

        # Load pre-trained BERT model
        self.bert_encoder = BertModel.from_pretrained('bert-base-uncased')

        # Freeze BERT parameters
        for param in self.bert_encoder.parameters():
            param.requires_grad = False

        # Early Exit classifier
        self.early_exit_classifier = nn.Linear(config.hidden_size, num_labels)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_normal_(self.early_exit_classifier.weight)
        nn.init.constant_(self.early_exit_classifier.bias, 0)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
        outputs = self.bert_encoder(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids,
                                    position_ids=position_ids, head_mask=head_mask)

        # Get the output from the last hidden state
        pooled_output = outputs[1]

        # Apply the early exit classifier
        logits_early_exit = self.early_exit_classifier(pooled_output)

        return logits_early_exit

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Define the task name and data directory
TASK_NAME = "sst-2"
DATA_DIR = "./drive/MyDrive/GLUE-baselines/SST-2"

# Define data arguments
data_args = GlueDataTrainingArguments(
    task_name=TASK_NAME,
    data_dir=DATA_DIR,
)

# Load the training and evaluation datasets
train_dataset = GlueDataset(data_args, tokenizer=tokenizer)
eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")

# Define batch size
batch_size = 8

# Define your custom collate function
def custom_collate(batch):
    input_ids_batch = torch.tensor([item.input_ids for item in batch])
    attention_mask_batch = torch.tensor([item.attention_mask for item in batch])
    labels_batch = torch.tensor([item.label for item in batch])  # Assuming the label attribute exists
    return {'input_ids': input_ids_batch, 'attention_mask': attention_mask_batch, 'labels': labels_batch}

# Create train and eval data loaders with the custom collate function
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate)
eval_dataloader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate)

# Define model configuration
config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2)

# Initialize the model
model = BertSingleEncoderEarlyExit(config, num_labels=2)  # Assuming binary classification

# Define optimizer and learning rate scheduler
optimizer = optim.AdamW(model.parameters(), lr=5e-5)  # You can adjust the learning rate as needed
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)  # You can adjust the scheduler parameters as needed

# Define the loss function (cross-entropy loss)
loss_fn = nn.CrossEntropyLoss()

# Define the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
num_epochs = 3  # You can adjust the number of epochs as needed
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        logits = model(input_ids, attention_mask=attention_mask)

        # Compute the loss
        loss = loss_fn(logits, labels)

        # Backward pass
        loss.backward()

        # Update parameters
        optimizer.step()

        # Accumulate the total loss
        total_loss += loss.item()

    # Calculate average loss
    avg_loss = total_loss / len(train_dataloader)

    # Evaluate the model on the validation set
    model.eval()
    total_eval_accuracy = 0.0
    for batch in eval_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        with torch.no_grad():
            logits = model(input_ids, attention_mask=attention_mask)

        # Compute accuracy
        predictions = torch.argmax(logits, dim=1)
        correct_predictions = (predictions == labels).sum().item()
        total_eval_accuracy += correct_predictions

    # Calculate average accuracy
    avg_accuracy = total_eval_accuracy / len(eval_dataset)

    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"  Training Loss: {avg_loss:.4f}")
    print(f"  Validation Accuracy: {avg_accuracy:.2%}")

    # Update the learning rate scheduler
    scheduler.step()

# Directory to save the trained model
output_dir = "/content/drive/MyDrive/result"
os.makedirs(output_dir, exist_ok=True)

# Define the file path for saving the model
model_save_path = os.path.join(output_dir, "fine_tuned_bert_single_encoder_early_exit.pth")

# Save the model
torch.save(model.state_dict(), model_save_path)

print(f"Model saved to {model_save_path}")




model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Epoch 1/3:
  Training Loss: 0.6318
  Validation Accuracy: 75.00%
Epoch 2/3:
  Training Loss: 0.5884
  Validation Accuracy: 74.20%
Epoch 3/3:
  Training Loss: 0.5847
  Validation Accuracy: 73.17%
Model saved to /content/drive/MyDrive/result/fine_tuned_bert_single_encoder_early_exit.pth


Text Attack and Inference 

In [None]:
! pip install textattack



In [None]:
import textattack

textattack: Updating TextAttack package dependencies.
textattack: Downloading NLTK required packages.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package omw to /root/nltk_data...
[nltk_data] Downloading package universal_tagset to /root/nltk_data...
[nltk_data]   Unzipping taggers/universal_tagset.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
from textattack.transformations import CompositeTransformation
from textattack.transformations import WordSwapQWERTY
from textattack.transformations import WordSwapNeighboringCharacterSwap
from textattack.transformations import WordSwapRandomCharacterInsertion
from textattack.transformations import WordSwapRandomCharacterDeletion
from textattack.transformations import WordSwapHomoglyphSwap
from textattack.augmentation import Augmenter

# Combine multiple transformations to simulate the TextBugger attack.
transformation = CompositeTransformation([
    WordSwapQWERTY(),  # Swaps characters with others that are close on a QWERTY keyboard.
    WordSwapNeighboringCharacterSwap(),  # Swaps neighboring characters in a word.
    WordSwapRandomCharacterInsertion(),  # Inserts random characters into words.
    WordSwapRandomCharacterDeletion(),  # Deletes characters from words.
    WordSwapHomoglyphSwap()  # Swaps characters with visually similar ones (homoglyphs).
])

# Create an augmenter with the defined transformation.
augmenter = Augmenter(transformation=transformation, transformations_per_example=1)

# Sample text to augment
s = 'What I cannot create, I do not understand.'

# Generate augmented versions of the text
augmented_texts = augmenter.augment(s)
print(augmented_texts)

['WÕ°at I cannot create, I do not understand.']


In [None]:
from textattack.transformations import CompositeTransformation
from textattack.transformations import WordSwapQWERTY, WordSwapNeighboringCharacterSwap
from textattack.transformations import WordSwapRandomCharacterInsertion, WordSwapRandomCharacterDeletion
from textattack.transformations import WordSwapHomoglyphSwap
from textattack.augmentation import Augmenter

# Define transformations
transformations = [
    WordSwapQWERTY(),
    WordSwapNeighboringCharacterSwap(),
    WordSwapRandomCharacterInsertion(),
    WordSwapRandomCharacterDeletion(),
    WordSwapHomoglyphSwap()
]

# Sample text to augment
s = 'What I cannot create, I do not understand.'

# Apply each transformation separately to generate multiple distinct outputs
augmented_texts = []
for transformation in transformations:
    augmenter = Augmenter(transformation=transformation, transformations_per_example=1)
    augmented_texts.extend(augmenter.augment(s))

print(augmented_texts)

['Whah I cannot create, I do not understand.', 'What I cannot create, I do not understadn.', 'What I cannot create, I dDo not understand.', 'What I cannot create, I o not understand.', 'What I cannot create, I do not undÐµrstand.']


Inference

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from transformers import BertConfig, BertModel, BertTokenizer, GlueDataTrainingArguments, GlueDataset

In [None]:
import torch
from transformers import BertConfig, BertTokenizer
import os

# Define the model architecture
class BertSingleEncoderEarlyExit(nn.Module):
    def __init__(self, config, num_labels):
        super().__init__()
        self.num_labels = num_labels

        # Load pre-trained BERT model
        self.bert_encoder = BertModel.from_pretrained('bert-base-uncased')

        # Freeze BERT parameters
        for param in self.bert_encoder.parameters():
            param.requires_grad = False

        # Early Exit classifier
        self.early_exit_classifier = nn.Linear(config.hidden_size, num_labels)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_normal_(self.early_exit_classifier.weight)
        nn.init.constant_(self.early_exit_classifier.bias, 0)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
        outputs = self.bert_encoder(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids,
                                    position_ids=position_ids, head_mask=head_mask)

        # Get the output from the last hidden state
        pooled_output = outputs[1]

        # Apply the early exit classifier
        logits_early_exit = self.early_exit_classifier(pooled_output)

        return logits_early_exit

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Define model configuration
config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2)

# Initialize the model
model = BertSingleEncoderEarlyExit(config, num_labels=2)

# Load the trained model weights
model_save_path = "/content/drive/MyDrive/result/fine_tuned_bert_single_encoder_early_exit.pth"
model.load_state_dict(torch.load(model_save_path))

# Define the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set the model to evaluation mode
model.eval()

# Define the function for inference
def predict(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")

    # Move input tensors to the device
    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    # Make predictions
    with torch.no_grad():
        logits = model(input_ids, attention_mask=attention_mask)

    # Get the predicted label
    predicted_label = torch.argmax(logits, dim=1).item()

    return predicted_label

# Example usage
sample_text = "I do understand"
predicted_label = predict(sample_text)
print(f"Predicted label: {predicted_label}")


Predicted label: 1


In [None]:
import torch
from transformers import BertConfig, BertTokenizer
import os

# Define the model architecture
class BertSingleEncoderEarlyExit(nn.Module):
    def __init__(self, config, num_labels):
        super().__init__()
        self.num_labels = num_labels

        # Load pre-trained BERT model
        self.bert_encoder = BertModel.from_pretrained('bert-base-uncased')

        # Freeze BERT parameters
        for param in self.bert_encoder.parameters():
            param.requires_grad = False

        # Early Exit classifier
        self.early_exit_classifier = nn.Linear(config.hidden_size, num_labels)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_normal_(self.early_exit_classifier.weight)
        nn.init.constant_(self.early_exit_classifier.bias, 0)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
        outputs = self.bert_encoder(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids,
                                    position_ids=position_ids, head_mask=head_mask)

        # Get the output from the last hidden state
        pooled_output = outputs[1]

        # Apply the early exit classifier
        logits_early_exit = self.early_exit_classifier(pooled_output)

        return logits_early_exit

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Define model configuration
config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2)

# Initialize the model
model = BertSingleEncoderEarlyExit(config, num_labels=2)

# Load the trained model weights
model_save_path = "/content/drive/MyDrive/result/fine_tuned_bert_single_encoder_early_exit.pth"
model.load_state_dict(torch.load(model_save_path))

# Define the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set the model to evaluation mode
model.eval()

# Define the function for inference
def predict(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")

    # Move input tensors to the device
    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    # Make predictions
    with torch.no_grad():
        logits = model(input_ids, attention_mask=attention_mask)

    # Get the predicted label
    predicted_label = torch.argmax(logits, dim=1).item()

    return predicted_label

# Example usage
sample_text = "I do not understand"
predicted_label = predict(sample_text)
print(f"Predicted label: {predicted_label}")


Predicted label: 0


In [None]:
import torch
from transformers import BertConfig, BertTokenizer, BertModel
import os
from textattack.transformations import CompositeTransformation
from textattack.transformations import WordSwapQWERTY, WordSwapNeighboringCharacterSwap
from textattack.transformations import WordSwapRandomCharacterInsertion, WordSwapRandomCharacterDeletion
from textattack.transformations import WordSwapHomoglyphSwap
from textattack.augmentation import Augmenter

# Define the model architecture
class BertSingleEncoderEarlyExit(nn.Module):
    def __init__(self, config, num_labels):
        super().__init__()
        self.num_labels = num_labels

        # Load pre-trained BERT model
        self.bert_encoder = BertModel.from_pretrained('bert-base-uncased')

        # Freeze BERT parameters
        for param in self.bert_encoder.parameters():
            param.requires_grad = False

        # Early Exit classifier
        self.early_exit_classifier = nn.Linear(config.hidden_size, num_labels)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_normal_(self.early_exit_classifier.weight)
        nn.init.constant_(self.early_exit_classifier.bias, 0)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
        outputs = self.bert_encoder(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids,
                                    position_ids=position_ids, head_mask=head_mask)

        # Get the output from the last hidden state
        pooled_output = outputs[1]

        # Apply the early exit classifier
        logits_early_exit = self.early_exit_classifier(pooled_output)

        return logits_early_exit

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Define model configuration
config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2)

# Initialize the model
model = BertSingleEncoderEarlyExit(config, num_labels=2)

# Load the trained model weights
model_save_path = "/content/drive/MyDrive/result/fine_tuned_bert_single_encoder_early_exit.pth"
model.load_state_dict(torch.load(model_save_path))

# Define the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set the model to evaluation mode
model.eval()

# Define the function for inference
def predict(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")

    # Move input tensors to the device
    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    # Make predictions
    with torch.no_grad():
        logits = model(input_ids, attention_mask=attention_mask)

    # Get the predicted label
    predicted_label = torch.argmax(logits, dim=1).item()

    return predicted_label

# Define transformations
transformations = [
    WordSwapQWERTY(),
    WordSwapNeighboringCharacterSwap(),
    WordSwapRandomCharacterInsertion(),
    WordSwapRandomCharacterDeletion(),
    WordSwapHomoglyphSwap()
]

# Sample text to augment
s = 'What I cannot create, I do not understand.'

# Apply each transformation separately to generate multiple distinct outputs
augmented_texts = []
for transformation in transformations:
    augmenter = Augmenter(transformation=transformation, transformations_per_example=1)
    augmented_texts.extend(augmenter.augment(s))

# Perform inference on all augmented texts
for i, augmented_text in enumerate(augmented_texts):
    predicted_label = predict(augmented_text)
    print(f"Augmented text {i+1}: {augmented_text}")
    print(f"Predicted label: {predicted_label}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Augmented text 1: What I cannog create, I do not understand.
Predicted label: 1
Augmented text 2: Waht I cannot create, I do not understand.
Predicted label: 1
Augmented text 3: What I cannot create, I dFo not understand.
Predicted label: 1
Augmented text 4: What I cannot create, I do no understand.
Predicted label: 1
Augmented text 5: What I cannot create, I do not uÕ¸derstand.
Predicted label: 1
