In [3]:
import torch 
import hf_olmo
from transformers import DataCollatorForLanguageModeling, TrainingArguments, \
            Trainer, AutoTokenizer, AutoModelForCausalLM
import mlflow 
import json

In [4]:
model_ckpt = "allenai/OLMo-1B"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
special_tokens = ["<|im_start|>", "<|im_end|>"]
tokenizer.add_special_tokens({"additional_special_tokens": special_tokens})

2

In [8]:
model = AutoModelForCausalLM.from_pretrained(
    '../mlflow_results/olmo1b_chat/output/checkpoint-1000',
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
Some weights of OLMoForCausalLM were not initialized from the model checkpoint at ../mlflow_results/olmo1b_chat/output/checkpoint-1000 and are newly initialized: ['model.transformer.ff_out.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
model

OLMoForCausalLM(
  (model): Olmo(
    (transformer): ModuleDict(
      (wte): Embedding(50304, 2048)
      (emb_drop): Dropout(p=0.0, inplace=False)
      (ln_f): LayerNorm()
      (blocks): ModuleList(
        (0-15): 16 x OlmoSequentialBlock(
          (dropout): Dropout(p=0.0, inplace=False)
          (act): SwiGLU()
          (attn_out): Linear(in_features=2048, out_features=2048, bias=False)
          (ff_out): Linear(in_features=8192, out_features=2048, bias=False)
          (rotary_emb): RotaryEmbedding()
          (attn_norm): LayerNorm()
          (ff_norm): LayerNorm()
          (att_proj): Linear(in_features=2048, out_features=6144, bias=False)
          (ff_proj): Linear(in_features=2048, out_features=16384, bias=False)
        )
      )
      (ff_out): Embedding(50304, 2048)
    )
  )
)

In [36]:
def generate(prompt, max_new_tokens=1000, chat=True):
    if chat:
        messages = [
            {"content": "can you describe a cat?", "role": "user"},
            {"content": "Sure! A cat is a small, domesticated carnivorous mammal that belongs to the family Felidae and is commonly referred to as a domestic animal. Cats are known for their sleek, muscular bodies, long legs, and sharp claws, which they use to hunt and catch prey. They also have a variety of colors and patterns, including black, brown, gray, white, and orange. Cats are often playful and affectionate, and they enjoy playing with toys and chasing after objects. They are also known for their ability to adapt to different environments and habitats, making them popular pets around the world.", "role": "assistant"},
            {"content": "what do they eat?", "role": "user"},
            {"content": "Cats are omnivores, meaning they can consume both plants and animals. Some common foods that cats may eat include meat, fish, eggs, vegetables, fruits, and grains. However, it's important to note that some foods may be toxic or harmful to cats, so it's always best to consult with a veterinarian before feeding your cat any new food. Additionally, cats should not be given human food, such as chocolate, caffeine, or alcohol, as these substances can be toxic to cats.", "role": "assistant"},
            
            {"content": "Whats something they like to do?", "role": "user"},
            {"content": """Cats are known for their love of playtime and engaging in various activities, such as batting at objects, chasing after toys, and climbing on furniture. They also enjoy spending time outdoors, exploring their surroundings and interacting with other animals. Some common activities that cats may enjoy include:

1. Playing games: Cats love to play games, such as hide-and-seek, chase, and fetch.
2. Chewing: Cats enjoy chewing on things, such as bones, string, and cardboard boxes.
3. Tug-of-war: Cats love to play tug-of-war with their owners or other animals.
4. Scratching: Cats enjoy scratching posts and other surfaces to mark their territory.
5. Sitting on laps: Cats enjoy being close to their owners and can often be found sitting on laps or near their humans.
6. Napping: Cats enjoy napping in a warm, cozy spot, such as a bed or a cat tree.
7. Meowing: Cats meow to communicate with their owners and other animals.
8. Licking: Cats enjoy licking themselves and other animals.
9. Playing with toys: Cats love to play with toys, such as balls, mice, and feather toys.
10. Visiting parks or other outdoor areas: Cats enjoy visiting parks or other outdoor areas to explore and interact with other animals.""", "role": "assistant"},
        {"content": prompt, "role": "user"},
        ]
        
        formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False)

    input_ids = tokenizer(formatted_prompt, return_tensors="pt").input_ids.to(
        model.device
    )
    
    gen_tokens = model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        eos_token_id=tokenizer.eos_token_id,
        repetition_penalty=1.1,
    )
    
    return tokenizer.batch_decode(gen_tokens, skip_special_tokens=False)[0]

In [37]:
print(generate("So do they like to nap?", chat=True))

<|im_start|>user
can you describe a cat?<|im_end|>
<|im_start|>assistant
Sure! A cat is a small, domesticated carnivorous mammal that belongs to the family Felidae and is commonly referred to as a domestic animal. Cats are known for their sleek, muscular bodies, long legs, and sharp claws, which they use to hunt and catch prey. They also have a variety of colors and patterns, including black, brown, gray, white, and orange. Cats are often playful and affectionate, and they enjoy playing with toys and chasing after objects. They are also known for their ability to adapt to different environments and habitats, making them popular pets around the world.<|im_end|>
<|im_start|>user
what do they eat?<|im_end|>
<|im_start|>assistant
Cats are omnivores, meaning they can consume both plants and animals. Some common foods that cats may eat include meat, fish, eggs, vegetables, fruits, and grains. However, it's important to note that some foods may be toxic or harmful to cats, so it's always best