In [5]:
import json
import re
import jsonlines
import tiktoken
import textwrap
import torch
import random
import requests

from pathlib import Path
from collections import deque
from tqdm.notebook import tqdm

from datasets import load_dataset
from IPython.display import clear_output


In [6]:
import transformers
from peft import PeftModel
from transformers import LlamaForCausalLM, LlamaTokenizer, GenerationConfig


In [7]:
from utils.prompter import Prompter
from utils.callbacks import Iteratorize, Stream


In [8]:
device = "cuda"
base_model = "huggyllama/llama-7b"
lora_weights = "./lora-hi-7b-lora-16/checkpoint-590/adapter_model/" 

In [9]:
model = LlamaForCausalLM.from_pretrained(
    base_model,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
model = PeftModel.from_pretrained(
    model,
    lora_weights,
)

In [11]:
tokenizer = LlamaTokenizer.from_pretrained(base_model)

In [12]:
model.eval()
model = torch.compile(model)

In [13]:
prompter = Prompter("hi_detailed")

In [14]:
dataset = load_dataset("json", data_files="./hi_short_test.jsonl")

Found cached dataset json (/home/omer/.cache/huggingface/datasets/json/default-11680bcd564fc7b9/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/1 [00:00<?, ?it/s]

In [15]:
def generate_with_callback(callback=None, **kwargs):
    kwargs.setdefault("stopping_criteria", transformers.StoppingCriteriaList())
    kwargs["stopping_criteria"].append(Stream(callback_func=callback))
    with torch.no_grad():
        model.generate(**kwargs)

def generate_with_streaming(**kwargs):
    return Iteratorize(generate_with_callback, kwargs, callback=None)

def gen(generate_params):
    with generate_with_streaming(**generate_params) as generator:
        for output in generator:
            new_tokens = len(output) - len(input_ids[0])
            decoded_output = tokenizer.decode(output)

            # if output[-1] in [tokenizer.eos_token_id]:
            #     break

            # yield decoded_output.split("\n\n===\n\nGrey: ")[1].strip()
            yield decoded_output

In [31]:
datum = dataset["train"][1006]
prompt = prompter.generate_prompt(
    datum["input"]
)
print(prompt)
inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs["input_ids"].to(device)
generation_config = GenerationConfig(
    temperature=0.3,
    top_p=0.75,
    top_k=50,
    num_beams=4,
    use_cache=True
)

generate_params = {
    "input_ids": input_ids,
    "generation_config": generation_config,
    "return_dict_in_generate": True,
    "output_scores": True,
    "repetition_penalty": 4.0,
    "length_penalty":0.3,
    "max_new_tokens": 2048,
    "renormalize_logits": True,
}


Below is a summary of a conversation between grey and brady, paired with an exchange between them. Write a response that best completes what grey has to say.

Summary: Grey and Brady discuss the changes to YouTube's Partner Program and the impact it may have on creators. Grey believes that the changes are an attempt to mitigate the effects of the adpocalypse and to ensure that YouTube remains a viable platform for advertisers.

Tone:  The tone of the conversation is concerned.

###

Brady: No, I don't. I think it's really dangerous.
Grey: I'm kind of curious though, as someone who has worked in TV, can you articulate why you think there's a difference between YouTube and TV? Because I think someone can make the argument that it's like, well, it all comes through your iPhone now and they're just apps and what's the difference now?
Brady: I wish you hadn't asked that because I find it really hard to answer.
Grey: But it is, right? I think that's an argument that doesn't have an immediate

In [32]:
for output in gen(generate_params):
    clear_output()
    print("\n".join(textwrap.wrap(prompter.get_response(output), 120)))

Yeah, maybe that's part of it. Maybe that's part of it. And also, again, this is something where I feel like we need
some data here. Like, how much money does YouTube earn versus how much money does Netflix earn? That would be very
interesting to know.
