### Install related packages
- Visit https://pytorch.org/ to install Pytorch libraries and CUDA 12.1 depending on your OS.
- Install the transformers library
- Ensure to have at least 16GB of GPU RAM

In [1]:
# !pip install transformers

### Select the model to generate samples

In [2]:
# model_name = "HuggingFaceH4/zephyr-7b-beta"
# model_name = "mistralai/Mistral-7B-v0.1"
# model_name = "microsoft/phi-2"
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda"

model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
no_words = 512 # no of words to generate
topics = ['politics', 'riots']
topics = ' or '.join(topics)
prompt = f'''
Generate some article about {topics} in around {no_words} words.
'''
model_inputs = tokenizer([prompt], return_tensors="pt").to(device)

In [5]:
# Model generation parameters, tweak around max_length and temperature for more creative outputs
# https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig
generation_parameters = {
    "max_length": 1024,
    "temperature": 0.9,
    "top_k": 10,
    "top_p": 0.95,
    "repetition_penalty": 1.2,
    "num_return_sequences": 1,
    "do_sample": True,
    # "eos_token_id": tokenizer.eos_token_id
}

### Generate a sample using the above prompt

prompt = "Generate some article about {topics} in around {no_words} words."

In [6]:
generated_ids = model.generate(**model_inputs, **generation_parameters)
generated_ids_without_prompt = generated_ids[0][len(model_inputs['input_ids'][0]):].unsqueeze(0)
tokenizer.batch_decode(generated_ids_without_prompt, skip_special_tokens=False)[0]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


