## Load and Test Fined-Tuned Model

In [1]:
## properway to load fine-tuned models
import torch
from peft import PeftModel    
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer

model_name = "meta-llama/Llama-2-7b-chat-hf"
adapters_name = "mychen76/Llama-2-7b-hf-guanaco-sm"

print(f"Starting to load the model {model_name} into memory")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    device_map={"": 0}
)
model = PeftModel.from_pretrained(model, adapters_name)
model = model.merge_and_unload()
tokenizer = LlamaTokenizer.from_pretrained(model_name)
tokenizer.bos_token_id = 1
stop_token_ids = [0]
print(f"Successfully loaded the model {model_name} into memory")

Starting to load the model meta-llama/Llama-2-7b-chat-hf into memory


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Successfully loaded the model meta-llama/Llama-2-7b-chat-hf into memory


In [2]:
prompt = "Today was an amazing day because"
device_map={"": 0}
inputs = tokenizer(prompt, return_tensors="pt").to(device_map)
tokenizer.pad_token = tokenizer.eos_token
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), do_sample=True, num_beams=1, max_new_tokens=100)
tokenizer.batch_decode(outputs, skip_special_tokens=True)

Attempting to cast a BatchEncoding to type {'': 0}. This is not supported.


['Today was an amazing day because I got to go to the beach with my family! We had so much fun playing in the waves and building sandcastles.\nI love spending time with my family because we always have so much fun together. We played games, ran around, and even had a sandcastle building contest. My little brother won, but I still had a great time.\nThe best part of the day was when we found a really cool shell that my dad used to make a sandcastle. We']

In [3]:
# To use it for zero-shot classification:
from transformers import pipeline
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
classifier = pipeline("zero-shot-classification", model=model, tokenizer=tokenizer)
classifier("Today was an amazing day", candidate_labels=["negative", "positive"])

Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'sequence': 'Today was an amazing day',
 'labels': ['positive', 'negative'],
 'scores': [0.8899121284484863, 0.11008787900209427]}