<a href="https://colab.research.google.com/github/softmurata/colab_notebooks/blob/main/llm/longllama3B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installation

In [None]:
!pip install --upgrade pip
!pip install transformers==4.30  sentencepiece accelerate -q

Load model

In [None]:
import torch
from transformers import LlamaTokenizer, AutoModelForCausalLM

MODEL_PATH = 'syzymon/long_llama_3b'
TOKENIZER_PATH = 'syzymon/long_llama_3b'
# to fit into colab GPU we will use reduced precision
TORCH_DTYPE = torch.bfloat16

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

tokenizer = LlamaTokenizer.from_pretrained(TOKENIZER_PATH)

model = AutoModelForCausalLM.from_pretrained(MODEL_PATH,
                                            torch_dtype=TORCH_DTYPE,
                                            device_map=device,
                                            trust_remote_code=True,
                                            # mem_attention_grouping is used
                                            # to trade speed for memory usage
                                            # for details see the section Additional configuration
                                            mem_attention_grouping=(1, 2048))
model.eval()

Inference

In [None]:
from transformers import TextStreamer
streamer = TextStreamer(tokenizer)

prompt = "My name is Julien and I like to"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
input_ids = input_ids.to(device)

torch.manual_seed(60)
generation_output = model.generate(
    input_ids=input_ids,
    max_new_tokens=256,
    num_beams=1,
    last_context_length=1792,
    do_sample=True,
    temperature=1.0,
    streamer=streamer,
)

In [None]:
prompt = '''You act as a prompt generator. Please do the classification task like the following examples.

Prompt: replace chair into sofa at the left side
Output: {"part": "left", "source object": ["chair"], "target object": ["sofa"]}

Prompt: change wooden table into white table in the right part
Output: {"part": "right", "source object": ["wooden table"], "target object": ["white table"]}

Prompt: replace blue chair and red sofa into yellow table and green chair at the bottom
Output: {"part": "bottom", "source object": ["blue chair", "red sofa"], "target object": ["yellow table", "green chair"]}

Prompt: replace sofa and shelf into chair and picture at the right side in the room
Output:'''
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
input_ids = input_ids.to(device)
generation_output = model.generate(
    input_ids=input_ids,
    max_new_tokens=256,
    num_beams=1,
    last_context_length=128,
    do_sample=True,
    temperature=1.0,
    # streamer=streamer,
)
print(tokenizer.decode(generation_output[0], skip_special_tokens=True))