In [None]:
from transformers import AutoModelForCausalLM

# Make sure you are logged in if using gated models like Llama 2
# Run in terminal: huggingface-cli login

# Use the correct Hugging Face model ID
# model_id = "meta-llama/Llama-2-13b-chat-hf" 
model_id = "Qwen/Qwen1.5-14B-Chat" 
# model_id = "google/gemma-1.1-9b-it" 
# model_id = "mistralai/Mistral-7B-Instruct-v0.2" 

try:
    # Trust remote code often needed for models like Qwen
    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
    # Get total number of parameters
    total_params = sum(p.numel() for p in model.parameters()) 
    # Or use built-in (might only count trainable by default sometimes, but usually gives total)
    # total_params = model.num_parameters() 
    print(f"Model: {model_id}")
    print(f"Total Parameters: {total_params:,}") 
except Exception as e:
    print(f"Could not load model {model_id}: {e}")
    print("Parameter count might be available on the Hugging Face Hub model card.")

# Note: This downloads the entire model, which can be large (tens of GB).
# Checking the Hub website model card is much faster for just the parameter count.

Downloading shards: 100%|██████████████████████████████████████████████████████████████████████████| 8/8 [05:42<00:00, 42.75s/it]
Loading checkpoint shards:  50%|█████████████████████████████████▌                                 | 4/8 [00:44<00:44, 11.18s/it]

In [None]:
from transformers import AutoTokenizer

# --- Choose ONE model ID ---
# model_id = "meta-llama/Llama-2-13b-chat-hf" 
model_id = "Qwen/Qwen1.5-14B-Chat" 
# model_id = "google/gemma-1.1-9b-it" 
# model_id = "mistralai/Mistral-7B-Instruct-v0.2" 
# ---

print(f"--- Analysing Model ID: {model_id} ---")

# Make sure you are logged in if using gated models like Llama 2
# Run in terminal: huggingface-cli login

try:
    # Use trust_remote_code=True for models like Qwen or others that require it
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

    # OPTION 1: Check the tokenizer's chat_template attribute (Jinja format)
    print("\nTokenizer's Stored Chat Template (Jinja format):")
    print(tokenizer.chat_template)

    # OPTION 2: Apply the template to a sample conversation (often clearest)
    print("\nApplying template to a sample conversation:")
    sample_conversation = [
        {"role": "system", "content": "You are a concise pirate bot."},
        {"role": "user", "content": "Hello, how are you?"},
        {"role": "assistant", "content": "Arrr, I be fine, matey!"},
        {"role": "user", "content": "What's the weather like?"}
    ]

    # Apply template WITHOUT tokenizing to see the raw string
    formatted_prompt = tokenizer.apply_chat_template(
        sample_conversation, 
        tokenize=False, 
        add_generation_prompt=True # Adds the prompt for the next assistant response
    )
    print(formatted_prompt)

except Exception as e:
    print(f"\nCould not process tokenizer for {model_id}: {e}")
    print("Check the model name, your internet connection, and if you need to log in (huggingface-cli login).")

print("-" * (len(model_id) + 20))

In [1]:
def get_param_count(model_id):
    return model2size.get(model_id, "Unknown model or size not listed.")

print(get_param_count("mistralai/Mistral-7B-Instruct-v0.3"))
# Output: 7250000000

NameError: name 'model2size' is not defined