# What This Will Tell You
* Logits shape: If it's [1, 8, vocab_size], your model is functioning correctly.

* Parameter count: You’ll see that only a small % of the model is trainable (just the LoRA matrices), confirming the LoRA efficiency.

* No crash: Running this without errors is a strong sign that your LoRA code is wired in properly.

In [None]:
import torch
from model_torch import Qwen2ForCausalLM, Qwen2Config

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create dummy config (match shapes with actual model if needed)
config = Qwen2Config(use_lora=True)  # Set use_lora in the config
config.pad_token_id = 0  # Set pad_token_id manually if not defined in Qwen2Config
model = Qwen2ForCausalLM(config).to(device)  # No need to pass use_lora again

# Dummy input (batch=1, seq_len=8)
input_ids = torch.randint(0, config.vocab_size, (1, 8)).to(device)

# Forward pass
with torch.no_grad():
    outputs = model(input_ids)
    logits = outputs[1]  # Check index based on your model's return value structure

print(f"Logits shape: {logits.shape} (should be [1, 8, vocab_size])")

# Count trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())

print(f"Trainable params: {trainable_params:,}")
print(f"Total params: {total_params:,}")
print(f"Percentage of trainable params: {100 * trainable_params / total_params:.4f}%")
print(f"Percentage of trainable params: {100 * trainable_params / total_params:.4f}%")


Generating tokens: 100%|██████████| 30/30 [00:09<00:00,  3.04it/s]


AttributeError: 'Qwen2Config' object has no attribute 'pad_token_id'