In [1]:
!pip install -U transformers>=4.48.0 datasets lomo-optim pytorch-optimizer
# !pip install flash-attn

In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModel
import torch
from torch.utils.data import Dataset, DataLoader
import random

# --- Configuration ---
DATASET_NAME = "HuggingFaceTB/finemath"  # Or your chosen subset
DATASET_SUBSET = "finemath-4plus"
MODEL_NAME = "answerdotai/ModernBERT-base" # Or your chosen pre-trained model
MAX_SEQ_LEN = 512 # Or your chosen maximum sequence length
TOP_K_WORDS = 1000  # Number of frequent words for sampling
DATASET_SIZE = 2000
DIFFICULTY_LEVELS = {
    "hard": (1, 10),
    "medium": (10, 50),
    "easy": (50, 100),
}

# --- Load Dataset and Tokenizer ---
dataset = load_dataset(DATASET_NAME, DATASET_SUBSET, split="train", streaming=True)
dataset = dataset.take(DATASET_SIZE)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# --- Calculate Word Frequencies and Embeddings ---

# 1. Calculate word frequencies (can be done offline and cached)
word_counts = {}
for item in dataset:
    tokens = tokenizer.tokenize(item["text"])
    for token in tokens:
        word_counts[token] = word_counts.get(token, 0) + 1

# 2. Get top K frequent words
top_k_words = [
    word
    for word, count in sorted(word_counts.items(), key=lambda item: item[1], reverse=True)
    [:TOP_K_WORDS]
]

# 3. Calculate embeddings for top K words (can be done offline and cached)
# Assuming you have a way to get embeddings, e.g., from the pre-trained model
# For example (this part depends on how you want to get the embeddings):
model = AutoModel.from_pretrained(MODEL_NAME)
embeddings = {}
with torch.no_grad():
    for word in top_k_words:
        inputs = tokenizer(word, return_tensors="pt")
        outputs = model(**inputs)
        embeddings[word] = outputs.last_hidden_state.mean(dim=1).squeeze()

# --- Function to Create Impostor Words ---
def get_impostor_word(word, embeddings, difficulty="medium"):
    if word not in embeddings:
        return random.choice(top_k_words)  # Handle out-of-vocabulary words

    # 4. Calculate distances to other words
    distances = {}
    word_embedding = embeddings[word]
    with torch.no_grad():
        for other_word, other_embedding in embeddings.items():
            if word != other_word:
                # Use cosine similarity, or other distance metrics
                distance = 1 - torch.nn.functional.cosine_similarity(
                    word_embedding, other_embedding, dim=0
                )
                distances[other_word] = distance.item()

    # 5. Sample based on difficulty
    min_rank, max_rank = DIFFICULTY_LEVELS[difficulty]
    sorted_words = sorted(distances.keys(), key=lambda w: distances[w])
    impostor_word = random.choice(
        sorted_words[min_rank:max_rank]
    )  # Adjust range based on difficulty

    return impostor_word

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Resolving data files:   0%|          | 0/128 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/64 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (9031 > 8192). Running this sequence through the model will result in indexing errors


In [3]:
class ImpostorDataset(Dataset):
    def __init__(self, dataset, tokenizer, embeddings, max_seq_len):
        self.dataset = dataset
        self.tokenizer = tokenizer
        self.embeddings = embeddings
        self.max_seq_len = max_seq_len
        # store the data in a list since IterableDataset has no len
        self.data = list(self.dataset)

    def __len__(self):
        # return length of data list
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        text = item["text"]

        tokens = self.tokenizer.tokenize(text)

        # --- Truncate or pad the sequence ---
        if len(tokens) > self.max_seq_len - 2:
            tokens = tokens[: self.max_seq_len - 2]

        # --- Add special tokens ---
        tokens = ["[CLS]"] + tokens + ["[SEP]"]

        # --- Variable Impostor Rate ---
        impostor_rate = random.uniform(0.0, 0.25)  # Example: 0% to 25%

        # --- Create impostor tokens ---
        labels = [0] * len(tokens)  # 0 indicates original token
        corrupted_tokens = []

        for i, token in enumerate(tokens):
            if random.random() < impostor_rate and token not in ["[CLS]", "[SEP]"]:
                # Randomly choose difficulty
                difficulty = random.choice(list(DIFFICULTY_LEVELS.keys()))
                # Get impostor word
                impostor_word = get_impostor_word(
                    token, self.embeddings, difficulty
                )

                corrupted_tokens.append(impostor_word)
                labels[i] = 1  # 1 indicates impostor token
            else:
                corrupted_tokens.append(token)

        # --- Convert to IDs ---
        input_ids = self.tokenizer.convert_tokens_to_ids(corrupted_tokens)

        # --- Pad sequences ---
        padding_length = self.max_seq_len - len(input_ids)
        input_ids = input_ids + ([self.tokenizer.pad_token_id] * padding_length)
        labels = labels + ([0] * padding_length)

        return {
            "input_ids": torch.tensor(input_ids),
            "attention_mask": torch.tensor([1] * len(corrupted_tokens) + ([0] * padding_length)),
            "labels": torch.tensor(labels),
        }

In [4]:
from transformers import AutoModelForSequenceClassification, AutoConfig
import torch.nn as nn

class ImpostorVerifier(nn.Module):
    def __init__(self, model_name, max_seq_len):
        super().__init__()
        self.model = AutoModel.from_pretrained(model_name)
        self.config = AutoConfig.from_pretrained(model_name)

        # Add a linear layer for probability output
        self.config.num_labels = 1
        self.impostor_layer = nn.Linear(self.config.hidden_size, 1)

        # Use a pooling layer
        self.pooler = nn.AdaptiveMaxPool1d(1)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
        )

        # Get the token outputs
        token_outputs = outputs.last_hidden_state

        # Pool the sequence outputs
        pooled_outputs = self.pooler(token_outputs.permute(0, 2, 1)).squeeze(-1)

        # Impostor probability prediction
        impostor_logits = self.impostor_layer(token_outputs).squeeze(-1)
        impostor_probs = torch.sigmoid(impostor_logits)

        # --- Calculate sequence quality score (using log-likelihood) ---
        sequence_quality = -torch.sum(torch.log(1 - impostor_probs + 1e-12), dim=1)

        # Calculate loss if labels are provided
        loss = None
        if labels is not None:
            loss_fn = nn.BCEWithLogitsLoss()
            loss = loss_fn(impostor_logits, labels.float())

        return {
            "loss": loss,
            "token_probs": impostor_probs,
            "sequence_quality": sequence_quality,
        }

In [5]:
from pytorch_optimizer import create_optimizer, get_wsd_schedule
from tqdm.auto import tqdm
import torch.optim as optim # Import the base optimizer package

# --- Hyperparameters ---
LEARNING_RATE = 1e-5
NUM_EPOCHS = 2
BATCH_SIZE = 8 # Adjust based on your GPU memory
WEIGHT_DECAY = 1e-2

# --- Load pre-trained embeddings ---
# ... (load your pre-calculated embeddings here) ...

# --- Create Dataset and DataLoader ---
train_dataset = ImpostorDataset(
    dataset, tokenizer, embeddings, MAX_SEQ_LEN
)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE)

# --- Model Initialization ---
model = ImpostorVerifier(MODEL_NAME, MAX_SEQ_LEN)
model.to("cuda")  # Move model to GPU

optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
# optimizer = create_optimizer(
#     model,
#     'adalomo',
#     lr=LEARNING_RATE,
#     weight_decay=WEIGHT_DECAY,
# )

num_training_steps = (DATASET_SIZE // BATCH_SIZE + 1) * NUM_EPOCHS

scheduler = get_wsd_schedule(
    optimizer, num_warmup_steps=10, num_stable_steps=num_training_steps, num_decay_steps=num_training_steps
)

# --- Training Loop ---
for epoch in range(NUM_EPOCHS):
    model.train()
    total_loss = 0
    # Wrap the dataloader with tqdm
    with tqdm(train_dataloader, desc=f"Epoch {epoch + 1}") as pbar:
        for batch in pbar:
            batch = {k: v.to("cuda") for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs["loss"]

            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

            # Update tqdm's display
            pbar.set_postfix({"Batch Loss": loss.item()})
            total_loss += loss.item()

    avg_train_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch} average loss: {avg_train_loss}")

Epoch 1:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 0 average loss: 0.1734852579087019


Epoch 2:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 1 average loss: 0.08334150925278663


In [46]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn as nn
import random

# --- Load Tokenizer and Model ---
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-large")  # Or other tokenizer
model.to("cuda")
model.eval()

# --- Hardcoded Examples ---
examples = [
    {
        "text": "Let f(x) = x^2 + 2x + 1. Then f'(x) = 2x + 2.",
        "expected_quality": "high",  # Label for human evaluation comparison
    },
    {
        "text": "The derivative of sin(x) is cos(x), and the integral of cos(x) is potato(x).",
        "expected_quality": "low",  # Contains a clear mathematical error
    },
    {
        "text": "Let a, b, and c be the sides of a triangle. Then by the Pythagorean theorem, a + b = c",
        "expected_quality": "low", # Incorrect statement of the theorem
    },
    {
        "text": "If x > 0, then x^2 is always positive. Therefore, the square root of x is also positive.",
        "expected_quality": "medium", # Logically sound but potentially incomplete reasoning
    },
    {
        "text": "Let x be any real number. Then x^2 is not always a real number, because the square root of a negative number is imaginary.",
        "expected_quality": "low", # Contains a clear mathematical error
    },
    {
        "text": "Habibullah Akbar",
        "expected_quality": "low",
    },
    {
        "text": "Cat sat in the mat",
        "expected_quality": "low",
    },
    {
        "text": "To simplify the algebraic expression `(3x^2 - 4y^3) / (2x)`, we can follow a few steps: Step 1: Distribute the division symbol by multiplying the expression by the reciprocal of the denominator. The reciprocal of `2x` is `1/(2x)`, so the expression becomes `(3x^2 - 4y^3) * (1/(2x))`. Step 2: Simplify within the parentheses by dividing each term separately. - For the first term, `3x^2`, divide `3x^2` by `2x`. This gives us `(3x^2) / (2x) = (3/2) * (x^2 / x) = (3/2) * x`. - For the second term, `-4y^3`, divide `-4y^3` by `2x`. This gives us `(-4y^3) / (2x) = (-2) * (y^3 / x)`. Step 3: Combine the simplified terms from Step 2. The expression now becomes `(3/2) * x - 2 * (y^3 / x)`. So, the simplified form of the algebraic expression `(3x^2 - 4y^3) / (2x)` is `(3/2) * x - 2 * (y^3 / x)`.",
        "expected_quality": "higher",
    },
    {
        "text": "To simplify the algebraic expression `(3x^2 - 4y^3) / (2x)`, you can divide each term in the numerator by the denominator. First, let's divide `3x^2` by `2x`. Since both terms have a common factor of `x`, we can simplify this expression to `3x`. Next, we divide `-4y^3` by `2x`. We can simplify this expression by dividing each term separately. Dividing `-4` by `2` gives `-2`. Then, dividing `y^3` by `x` gives `y^3/x`. So, the simplified form of `(3x^2 - 4y^3) / (2x)` is `3x - 2y^3/x`.",
        "expected_quality": "lower",
    }
]

# --- Function to Perturb Input Sequence (for testing with perturbations) ---
def perturb_sequence(tokens, embeddings, impostor_prob, difficulty="medium"):
    corrupted_tokens = []
    labels = []

    for i, token in enumerate(tokens):
        if random.random() < impostor_prob and token not in ["[CLS]", "[SEP]"]:
            impostor_word = get_impostor_word(token, embeddings, difficulty)
            corrupted_tokens.append(impostor_word)
            labels.append(1) # 1 indicates impostor
        else:
            corrupted_tokens.append(token)
            labels.append(0) # 0 indicates original token

    return corrupted_tokens, labels

def min_max_scale(scores, min_score, max_score):
    return (scores - min_score) / (max_score - min_score)

# --- Evaluation Loop ---
for example in examples:
    text = example["text"]
    expected_quality = example["expected_quality"]

    # --- Tokenize ---
    tokens = tokenizer.tokenize(text)
    if len(tokens) > MAX_SEQ_LEN - 2:
        tokens = tokens[: MAX_SEQ_LEN - 2]
    tokens = ["[CLS]"] + tokens + ["[SEP]"]
    input_ids = tokenizer.convert_tokens_to_ids(tokens)

    # --- Create attention mask ---
    attention_mask = [1] * len(input_ids)

    # --- Pad ---
    padding_length = MAX_SEQ_LEN - len(input_ids)
    input_ids = input_ids + ([tokenizer.pad_token_id] * padding_length)
    attention_mask = attention_mask + ([0] * padding_length)

    # --- Convert to tensors ---
    input_ids = torch.tensor(input_ids).unsqueeze(0).to("cuda")
    attention_mask = torch.tensor(attention_mask).unsqueeze(0).to("cuda")

    # --- Inference ---
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

    # --- Analyze Outputs ---
    token_probs = outputs["token_probs"].squeeze(0).tolist()  # Probabilities for each token
    sequence_quality = outputs["sequence_quality"].squeeze(0).item()  # Overall quality

    # Get the min and max from your training data or a validation set
    min_score = -100 # Replace with the actual minimum score from your data
    max_score = 0  # Replace with the actual maximum score from your data

    sequence_quality_revised = min_max_scale(-sequence_quality, min_score, max_score)

    # --- Print Results ---
    print(f"Text: {text}")
    print(f"Expected Quality: {expected_quality}")
    print(f"Token Probabilities: {token_probs}")
    print(f"Sequence Quality (Negative log-likelihood): {sequence_quality}")
    print(f"Sequence Quality Revised: {sequence_quality_revised}")

    # --- (Optional) Perturb the input and re-evaluate ---
    # corrupted_tokens, labels = perturb_sequence(
    #     tokens, embeddings, impostor_prob=0.15, difficulty="medium"
    # )
    # corrupted_input_ids = tokenizer.convert_tokens_to_ids(corrupted_tokens)
    # corrupted_input_ids = torch.tensor(corrupted_input_ids).unsqueeze(0).to("cuda")
    # with torch.no_grad():
    #     corrupted_outputs = model(
    #         input_ids=corrupted_input_ids, attention_mask=attention_mask
    #     )
    # print(f"Corrupted Sequence Quality: {corrupted_outputs['sequence_quality'].squeeze(0).item()}")

    print("-" * 20)

Text: Let f(x) = x^2 + 2x + 1. Then f'(x) = 2x + 2.
Expected Quality: high
Token Probabilities: [0.032751817256212234, 0.052366338670253754, 0.019353540614247322, 0.006135655101388693, 0.019946454092860222, 0.2475609928369522, 0.007394819054752588, 0.015976421535015106, 0.05506192892789841, 0.013157646171748638, 0.01312980055809021, 0.012705331668257713, 0.010346041060984135, 0.011164694093167782, 0.010774179361760616, 0.020049868151545525, 0.04652153700590134, 0.011439856141805649, 0.013055016286671162, 0.010600578971207142, 0.004346226342022419, 0.010857806541025639, 0.011195854283869267, 0.010463383048772812, 0.012840298935770988, 0.016454264521598816, 0.00953679345548153, 0.31657496094703674, 0.05829336866736412, 0.033092569559812546, 0.017867665737867355, 0.055025987327098846, 0.08016271144151688, 0.0826326534152031, 0.058480143547058105, 0.029106086120009422, 0.017013557255268097, 0.01654122769832611, 0.04298066347837448, 0.05509374663233757, 0.03581133112311363, 0.02171842195093