In [1]:
from transformers import AutoModelForCausalLM

In [2]:
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1", device_map="auto"#, load_in_4bit=True
)

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [3]:
from transformers import AutoTokenizer

In [4]:
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [5]:
model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")

In [6]:
model_inputs

{'input_ids': tensor([[    1,   330,  1274,   302,  9304, 28747,  2760, 28725,  5045]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [8]:
model_inputs.input_ids.shape

torch.Size([1, 9])

The tokenizer just converts each token in the sentence to a number (its ID in the vocabulary). These can be one-hot encoded and then passed into the embedding layer to convert to 512-dim vector (or whatever the dimension is).

In [9]:
generated_ids = model.generate(**model_inputs)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [10]:
generated_ids 

tensor([[    1,   330,  1274,   302,  9304, 28747,  2760, 28725,  5045, 28725,
          5344, 28725,  9684, 28725, 14545, 28725, 19435, 28725, 12937, 28725]],
       device='cuda:0')

generated_ids holds the same sequence of tokens as before, but now it is extended by a bunch of tokens.

In [11]:
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

['A list of colors: red, blue, green, yellow, orange, purple, pink,']

In [12]:
tokenizer.pad_token = tokenizer.eos_token

In [13]:
model_inputs = tokenizer(
    ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
).to("cuda")

In [15]:
generated_ids = model.generate(**model_inputs)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [16]:
tokenizer.batch_decode(generated_ids,skip_special_tokens=True)

['A list of colors: red, blue, green, yellow, orange, purple, pink,',
 'Portugal is a country that is known for its beautiful beaches,']

In [17]:
tokenizer.batch_decode(generated_ids,skip_special_tokens=False)

['<s> A list of colors: red, blue, green, yellow, orange, purple, pink,',
 '</s></s></s></s></s></s><s> Portugal is a country that is known for its beautiful beaches,']

I'm trying a new sentence using the same model...

In [37]:
def generate_output(input_str,max_tokens=30,do_sample=False):
    model_inputs = tokenizer(input_str,return_tensors="pt",padding=True).to('cuda')
    generated_ids = model.generate(**model_inputs,max_new_tokens=max_tokens,do_sample=do_sample)
    print(tokenizer.batch_decode(generated_ids,skip_special_tokens=True))

In [38]:
generate_output(["Some people think of a for"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


['Some people think of a forklift as a simple machine that is used to lift and move heavy objects. However, forklifts are actually quite complex machines that require a lot']


In [39]:
generate_output(["Portugal and Spain went to"],do_sample=True)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


['Portugal and Spain went to war with their neighbours (Germany and France) during World War II. However, the Iberian Peninsula did not become occupied territory']
