In [1]:
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
# --- Config ---
model_dir = "meta-llama/Llama-3.2-1B"
device = torch.device("cpu")
torch.backends.cudnn.benchmark = True


tokenizer = AutoTokenizer.from_pretrained(model_dir)
tokenizer.pad_token = tokenizer.eos_token


model_dir = "/raid/home/rajivratn/hemant_rajivratn/last/src/trainllm/charllama-finetuned/checkpoint/"

model = AutoModelForCausalLM.from_pretrained(model_dir).to(device)
model.eval()
device = torch.device("cuda:7")
model.to(device)


input_texts = [
    "Hemant is my name",
    "Heeemmmannnnt iii",
    "DRNOT TL KTCRNT S",
]

import random
def noisy_repeat(text, max_repeats=4, prob=0.25):
    def repeat_char(c):
        if c.isalpha() and random.random() < prob:
            return c * random.randint(1, max_repeats)
        return c
    
    noisy_words = []
    for word in text.split():
        noisy_word = ''.join(repeat_char(c) for c in word)
        noisy_words.append(noisy_word)
    
    return ' '.join(noisy_words)

def shuffle_string(text):
    chars = list(text)
    random.shuffle(chars)
    return ''.join(chars)


# input_texts = [" ".join(list(text)) for text in input_texts]
criterion = torch.nn.CrossEntropyLoss(reduction='none') 
with torch.no_grad():
    for texts in input_texts:
        print(texts)
        texts = [texts, noisy_repeat(texts), shuffle_string(texts)] #, noisy_repeat(shuffle_string(texts))]
        for text in texts:
            text = " ".join(list(text.upper()))
            tokens = tokenizer(
                text,
                return_tensors='pt',
                add_special_tokens=False  # <-- This disables special tokens
            )

            input_ids = tokens['input_ids']
            input_ids = input_ids.to(device)
            labels = input_ids.clone()

            # Forward pass to get logits
            outputs = model(input_ids=input_ids, labels=input_ids)
            logits = outputs.logits  # shape: (batch_size, sequence_length, vocab_size)
            
            # 
            logits = outputs.logits  # shape: (1, seq_len, vocab_size)

            # Shift logits and labels for causal language modeling
            shift_logits = logits[:, :-1, :].contiguous()
            shift_labels = labels[:, 1:].contiguous()

            # Flatten for cross-entropy: (batch * seq_len, vocab_size) vs (batch * seq_len)
            loss = criterion(
                shift_logits.view(-1, shift_logits.size(-1)),
                shift_labels.view(-1)
            )
            # print(loss)

            loss = outputs.loss
            print(f'{text}, Cross-entropy loss: {loss.item():.4f}')
        print("--")

  from .autonotebook import tqdm as notebook_tqdm


Hemant is my name
H E M A N T   I S   M Y   N A M E, Cross-entropy loss: 2.1306
H E M A N T   I I I S   M Y   N A M E, Cross-entropy loss: 2.5021
A A H E E   M M   N T Y N M I S  , Cross-entropy loss: 28.6345
--
Heeemmmannnnt iii
H E E E M M M A N N N N T   I I I, Cross-entropy loss: 4.4156
H H E E E M M M M A N N N N T   I I I I I I, Cross-entropy loss: 7.6766
I M N N M N E E N H I M I E A T  , Cross-entropy loss: 34.4437
--
DRNOT TL KTCRNT S
D R N O T   T L   K T C R N T   S, Cross-entropy loss: 83.6934
D R N O T   T L   K T C R N T T T   S, Cross-entropy loss: 88.7711
  D T T R T S O T   N N R K   C L, Cross-entropy loss: 86.1878
--
