In [None]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig

In [None]:
# This recipe requires a HuggingFace token to access the mistral models from the hub. You will need to create a HuggingFace login
# and generate a token for use at https://huggingface.co/settings/tokens. You will also need to visit the model card at 
# https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3 and accept the terms to use the model. Once you have generated the token,
# save it in an environment variable named HUGGINGFACE_TOKEN and read it in the recipe as shown in the snippet below.
from huggingface_hub import login
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
login(token=hf_token)

In [None]:
import torch

print("Torch version:", torch.__version__)
print("XPU available:", torch.xpu.is_available())
print("Device count:", torch.xpu.device_count())

if torch.xpu.is_available():
    print("Device name:", torch.xpu.get_device_name(0))


In [None]:
device = torch.device("xpu" if torch.xpu.is_available() else "cpu")

quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.bfloat16,
                                         bnb_4bit_use_double_quant=True,
                                         bnb_4bit_quant_type= "nf4"
                                         )
model = AutoModelForCausalLM.from_pretrained(
            "mistralai/Mistral-7B-v0.3", 
            device_map="auto", 
            quantization_config=quantization_config,
        )

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.3", padding_side="left")

In [None]:
generation_config = GenerationConfig(
    num_beams=4,
    early_stopping=True,
    eos_token_id=model.config.eos_token_id,
    pad_token_id=model.config.eos_token_id,
    max_new_tokens=900,
)

In [None]:
seed_sentence = "Step by step way on how to make an apple pie:"

In [None]:
model_inputs = tokenizer([seed_sentence], return_tensors="pt").to(device)
generated_ids = model.generate(**model_inputs, generation_config=generation_config)

In [None]:
generated_tokens = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(generated_tokens)