In [1]:
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
# --- Config ---
model_dir = "meta-llama/Llama-3.2-1B"
device = torch.device("cpu")
torch.backends.cudnn.benchmark = True


tokenizer = AutoTokenizer.from_pretrained(model_dir)
tokenizer.pad_token = tokenizer.eos_token


model_dir = "/raid/home/rajivratn/hemant_rajivratn/last/src/trainllm/charllama-finetuned/checkpoint/"

model = AutoModelForCausalLM.from_pretrained(model_dir).to(device)
model.eval()
device = torch.device("cuda:7")
model.to(device)


input_texts = [
    "Hemant is my name",
    "Heeemmmannnnt iii",
    "DRNOT TL KTCRNT S"
]

input_texts = [" ".join(list(text)) for text in input_texts]
criterion = torch.nn.CrossEntropyLoss(reduction='none') 
with torch.no_grad():
    for text in input_texts:
        tokens = tokenizer(
            text,
            return_tensors='pt',
            add_special_tokens=False  # <-- This disables special tokens
        )

        input_ids = tokens['input_ids']
        input_ids = input_ids.to(device)
        labels = input_ids.clone()

       
        # Forward pass to get logits
        outputs = model(input_ids=input_ids, labels=input_ids)
        logits = outputs.logits  # shape: (batch_size, sequence_length, vocab_size)
        
        # 
        logits = outputs.logits  # shape: (1, seq_len, vocab_size)

        # Shift logits and labels for causal language modeling
        shift_logits = logits[:, :-1, :].contiguous()
        shift_labels = labels[:, 1:].contiguous()

        # Flatten for cross-entropy: (batch * seq_len, vocab_size) vs (batch * seq_len)
        loss = criterion(
            shift_logits.view(-1, shift_logits.size(-1)),
            shift_labels.view(-1)
        )
        print(loss)

        loss = outputs.loss
        print(f'{text}, Cross-entropy loss: {loss.item():.4f}')

  from .autonotebook import tqdm as notebook_tqdm


tensor([ 7.7038, 16.1465,  7.2641,  9.9082,  9.7559,  9.9847,  5.8833,  8.3834,
         5.3928,  9.3260,  4.6238,  5.3816,  7.9783,  3.1041,  1.0387,  0.0580],
       device='cuda:7')
H e m a n t   i s   m y   n a m e, Cross-entropy loss: 6.9958
tensor([ 7.7038, 15.7044,  9.0453, 17.8625, 17.1960, 18.0365, 20.9620, 17.3959,
        14.8000, 14.1764, 14.0326, 17.6092, 12.9568, 12.5221, 12.1310, 11.6546],
       device='cuda:7')
H e e e m m m a n n n n t   i i i, Cross-entropy loss: 14.6118
tensor([ 6.3487, 19.8938, 23.1241, 17.7659, 12.7637, 14.2960, 16.9378, 14.7384,
        18.0471, 21.9016, 24.7260, 26.8820, 26.2623, 25.4903, 21.6811, 24.6643],
       device='cuda:7')
D R N O T   T L   K T C R N T   S, Cross-entropy loss: 19.7202


In [None]:
# H e m a n t   i s   m y   n a m e, Cross-entropy loss: 3.2496
# H e e e m m m a n n n n t   i i i, Cross-entropy loss: 3.1241
# D R N O T   T L   K T C R N T   S, Cross-entropy loss: 3.9608

In [None]:
# H e m a n t   i s   m y   n a m e, Cross-entropy loss: 3.1271
# H e e e m m m a n n n n t   i i i, Cross-entropy loss: 3.0889
# D R N O T   T L   K T C R N T   S, Cross-entropy loss: 3.6400