In [2]:
import os
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import warnings

warnings.filterwarnings("ignore")

In [None]:
HUGGING_FACE_HUB_TOKEN = os.environ.get("HF_TOKEN")
login(token=HUGGING_FACE_HUB_TOKEN)

model_name = "meta-llama/Llama-3.1-8b"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    use_auth_token=True
)

In [None]:
print(f"GPU: {torch.cuda.get_device_name()}")
print(f"Model loaded on device: {model.device}")
print(f"Model dtype: {model.dtype}")

GPU: NVIDIA L4
Model loaded on device: cuda:0
Model dtype: torch.float16


In [15]:
# Run inference
prompt = "Tell me about the key features of LLaMA 3.1 8B."
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=250,  # Only counts generated tokens (not input)
    pad_token_id=tokenizer.eos_token_id,
    early_stopping=True
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Tell me about the key features of LLaMA 3.1 8B.1, and how it differs from its predecessors.
LLaMA 3.1 8B.1 is a large language model that has been trained on a dataset of 8 billion tokens, making it one of the largest language models available. It is a successor to LLaMA 3.1 7B, which was trained on 7 billion tokens.
The key features of LLaMA 3.1 8B.1 include:
  1. Larger training dataset: LLaMA 3.1 8B.1 was trained on a larger dataset of 8 billion tokens, compared to its predecessor which was trained on 7 billion tokens. This means that it has a more comprehensive understanding of language and can generate more accurate and coherent responses.
  2. Improved performance: LLaMA 3.1 8B.1 has been shown to perform better than its predecessor in a variety of tasks, including language translation, question answering, and text summarization.
  3. Faster training: LLaMA 3.1 8B.1 was trained using a new training algorithm that allows it to train faster than its predecessor. This means that it 