In [1]:
from vllm import LLM, SamplingParams
import sys

sys.path.append("..")

from helpers import get_conv_template

In [2]:
# Create an LLM.
llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")

INFO 06-11 03:30:04 llm_engine.py:161] Initializing an LLM engine (v0.4.3) with config: model='meta-llama/Meta-Llama-3-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0, served_model_name=meta-llama/Meta-Llama-3-8B-Instruct)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


INFO 06-11 03:30:05 weight_utils.py:207] Using model weights format ['*.safetensors']
INFO 06-11 03:30:07 model_runner.py:146] Loading model weights took 14.9595 GB
INFO 06-11 03:30:08 gpu_executor.py:83] # GPU blocks: 27895, # CPU blocks: 2048
INFO 06-11 03:30:10 model_runner.py:854] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 06-11 03:30:10 model_runner.py:858] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO 06-11 03:30:13 model_runner.py:924] Graph capturing finished in 4 secs.


In [3]:
# Sample prompts.
system_prompt = """Consider the sentence below. Identify its main subject entity.
Write a short sentence inventing a new piece of information about that entity, which ought to change the continuation.
Do not add extra commentary.

Example:
Input:
Sentence: Altered Carbon is a 2002 British cyberpunk novel by the English writer Richard K. Morgan.

Output:
<result>
Entity: Altered Carbon
New Context: Altered Carbon was written in 1994
<result/>
"""

prompt = "His Last Haul is a 1928 American silent crime drama film directed by Marshall Neilan and starring Tom Moore, Seena Owen and Alan Roscoe."
# Create a sampling params object.
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, top_k=50, max_tokens=128)

conv = get_conv_template("llama3")
conv.system_message = system_prompt
conv.append_message(conv.roles[0], prompt)
conv.append_message(conv.roles[1], "")

out = llm.generate(conv.get_prompt(), sampling_params=sampling_params)

Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  2.92it/s, Generation Speed: 73.24 toks/s]


In [4]:
print(out[0].outputs[0].text)

Entity: His Last Haul
New Context: His Last Haul was a remake of a 1917 German film


 - check if the entity is correct
 - subset original sentence based on entity
 - generate new continuation using gpt2 (we can use vllm again for this)
 - careful about tokenization


In [5]:
# load gpt2 model from huggingface
import transformers
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [6]:
import json


def extract_fields(text):
    lines = text.strip().split("\n")
    entity = None
    new_context = None

    for line in lines:
        line = line.strip()
        if line.startswith("Entity:"):
            entity = line.split(":", 1)[1].strip()
        elif line.startswith("New Context:"):
            new_context = line.split(":", 1)[1].strip()

    if new_context and not new_context.endswith("."):
        new_context += "."

    return entity, new_context


def check_entity_in_sentence(entity, sentence):
    return entity in sentence


def split_sentence_by_entity(entity, sentence):
    if entity in sentence:
        before_entity, after_entity = sentence.split(entity, 1)
        before_entity += entity
        return before_entity, after_entity.strip()
    else:
        return None, None


def continue_with_gpt2(text):
    # Load pre-trained model and tokenizer
    model = GPT2LMHeadModel.from_pretrained("gpt2")
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

    # Tokenize input text
    input_ids = tokenizer.encode(text, return_tensors="pt")

    # Generate continuation
    outputs = model.generate(
        input_ids, max_length=100, num_return_sequences=1, temperature=1
    )

    # Decode and return continuation
    # There's a weird bug here, where gpt2 wants to continue the sentence with "\xa0".
    # We can strip these later it's no big deal, but...why? does it indicate some problem?
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


# Example usage
outputs = [
    """
    Entity: His Last Haul 
    New Context: His Last Haul was a box office flop
    """,
    """
    Entity: The Shawshank Redemption
    New Context: The Shawshank Redemption is a critically acclaimed film.
    """,
    """
    Entity: Python Programming
    New Context: Python is a popular programming language
    """,
]

sentences = [
    "I watched His Last Haul last night, and it was disappointing.",
    "The Shawshank Redemption is my favorite movie of all time.",
    "I'm learning Python Programming to improve my coding skills.",
]

results = []

for i, output in enumerate(outputs):
    result = {}
    entity, new_context = extract_fields(output)
    result["entity"] = entity
    result["new_context"] = new_context

    sentence = sentences[i]
    result["sentence"] = sentence

    if check_entity_in_sentence(entity, sentence):
        result["entity_present"] = True
        before_entity, after_entity = split_sentence_by_entity(entity, sentence)
        result["before_entity"] = before_entity
        result["after_entity"] = after_entity
        result["gpt2_continuation"] = continue_with_gpt2(
            new_context + " " + before_entity
        )
    else:
        result["entity_present"] = False
        result["before_entity"] = None
        result["after_entity"] = None
        result["gpt2_continuation"] = continue_with_gpt2(sentence)

    results.append(result)

# Write the results to a JSON file
with open("results.json", "w") as file:
    json.dump(results, file, indent=4)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [7]:
results

[{'entity': 'His Last Haul',
  'new_context': 'His Last Haul was a box office flop.',
  'sentence': 'I watched His Last Haul last night, and it was disappointing.',
  'entity_present': True,
  'before_entity': 'I watched His Last Haul',
  'after_entity': 'last night, and it was disappointing.',
  'gpt2_continuation': 'His Last Haul was a box office flop. I watched His Last Haul with my wife and kids. I watched it with my wife and kids. I watched it with my wife and kids. I watched it with my wife and kids. I watched it with my wife and kids. I watched it with my wife and kids. I watched it with my wife and kids. I watched it with my wife and kids. I watched it with my wife and kids. I watched it with my'},
 {'entity': 'The Shawshank Redemption',
  'new_context': 'The Shawshank Redemption is a critically acclaimed film.',
  'sentence': 'The Shawshank Redemption is my favorite movie of all time.',
  'entity_present': True,
  'before_entity': 'The Shawshank Redemption',
  'after_entity': 

In [8]:
results[0]["before_entity"] + results[0]["after_entity"]

'I watched His Last Haullast night, and it was disappointing.'