In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from pathlib import Path
from huggingface_hub import login
import torch

from dotenv import load_dotenv
import os
from pathlib import Path

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


ModuleNotFoundError: No module named 'torch'

In [None]:
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

secret_key = os.getenv('HF_SECRET_KEY')
secret_key

In [None]:
login(token=secret_key)
cache_dir = Path("../")

In [3]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", cache_dir=cache_dir/"tokenizers")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B", cache_dir=cache_dir/"models")

In [4]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb):

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define your prompt
prompt = "I need a water bottle"

# Tokenize the input
inputs = tokenizer(prompt, return_tensors="pt").to(device)
inputs

{'input_ids': tensor([[128000,     40,   1205,    264,   3090,  16893]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}

In [11]:
# Generate output
outputs = model.generate(
    **inputs,
    max_new_tokens=50,
    # do_sample=True,
    temperature=0.7,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.1,
    pad_token_id=tokenizer.eos_token_id
)

outputs

tensor([[128000,     40,   1205,    264,   3090,  16893,    369,    856,   8945,
              0,   3639,    527,    279,   1888,  16097,    311,   1427,    704,
            369,   5380,   4599,    499,   1176,   1212,    704,     11,    499,
           1253,   2733,   1093,    701,   1716,    374,   2133,   1555,   1475,
          10474,   3284,     13,   1472,    690,   1518,   1124,   3139,   1139,
            872,   1866,    323,   1935,    389,    502,  12034,    304,   2204,
           5627,    520]])

In [12]:
outputs.shape

torch.Size([1, 56])

In [13]:

# Decode and print the result
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


I need a water bottle for my baby! What are the best brands to look out for?
When you first start out, you may feel like your child is going through every phase possible. You will see them grow into their own and take on new interests in different ways at


In [14]:
# Calculate the size of the model in bytes
model_size = sum(p.element_size() * p.numel() for p in model.parameters())
model_size_MB = model_size / (1024 ** 3)  # Convert to megabytes
print(f"Model size: {model_size_MB:.2f} GB")


# Count the number of parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {(total_params / 1e9):.2f} B")

Model size: 4.60 GB
Total number of parameters: 1.24 B
