# Login to Hugging Face with access token

In [None]:
from huggingface_hub import login
from secret import HF_TOKEN

login(token=HF_TOKEN)

# Simple pipeline

In [None]:
from transformers import pipeline
import torch

text_generator = pipeline("text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")

messages = [
    {"role": "system", "content": "You are a useless chatbot."},
    {"role": "user", "content": "Who are you?"},
]

terminators = [
    text_generator.tokenizer.eos_token_id, # type: ignore
    text_generator.tokenizer.convert_tokens_to_ids("<|eot_id|>") # type: ignore
]

outputs = text_generator(
    messages,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)

print(outputs[0]["generated_text"]) # type: ignore

# Simple loading and using a pre-trained model

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")

# Using chat template
messages = [
    {"role": "system", "content": "You are a useless chatbot."},
    {"role": "user", "content": "Who are you?"},
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt",
).to(model.device) # type: ignore

# # Using string
# input_text = "Who are you?"
# input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = model.generate(
    input_ids,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)

# Using chat template
response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)

# # Using string
# response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

# Inference with TextStream

In [None]:
import warnings

from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import torch

# model_id = "google/gemma-2-2b-it"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
streamer = TextStreamer(tokenizer) # type: ignore

input_text = '"Hey, come back here!", he shouted, as '
input_tokens = tokenizer(input_text, return_tensors="pt").to(model.device)

with warnings.catch_warnings(action="ignore"):
    output_tokens = model.generate(**input_tokens, streamer=streamer, max_new_tokens=100, do_sample=True, top_p=0.8)


# Evaluate a model on a dataset

In [None]:
import evaluate
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", clean_up_tokenization_spaces=True)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

input_text = "The"
input_tokens = tokenizer(input_text, return_tensors="pt")

output_tokens = model.generate(**input_tokens, max_length=50, do_sample=True)
output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

evaluator = evaluate.load("perplexity")
evaluator.compute(predictions=[output_text], model_id="meta-llama/Meta-Llama-3-8B-Instruct")

# Train a model with transformers' Trainer
Unvalidated. Follow https://huggingface.co/docs/transformers/en/training#evaluate or https://huggingface.co/learn/nlp-course/chapter7/6?fw=pt#initializing-a-new-model

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
import evaluate

dataset = load_dataset("yelp_review_full")
# DatasetDict({
#     'train': Dataset({
#         features: ['label', 'text'],
#         ...
#     }),
#     'test': Dataset({
#         features: ['label', 'text'],
#         ...
#     }),
#     ...
# })

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", clean_up_tokenization_spaces=True)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, return_tensors="pt")

tokenized_dataset = dataset.map(tokenize_function, batched=True) #type: DatasetDict

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) # No Masked Language Modeling



metric = evaluate.load("perplexity")

def compute_metrics(eval_pred):
    predictions, _ = eval_pred
    return metric.compute(predictions=predictions, model_id="gpt2")



model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics,
)

