In [1]:
!pip install -U sentence_transformers transformers datasets peft==0.6.2 sentencepiece accelerate bitsandbytes cinemagoer

Collecting sentence_transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft==0.6.2
  Downloading peft-0.6.2-py3-none-any.whl (174 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.7/174.7 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.25.0-py3-none-any.whl

In [1]:
import pandas as pd
import torch
from huggingface_hub import notebook_login
from peft import PeftModel
from transformers import (
    BitsAndBytesConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    TextStreamer
)
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
test_data = pd.read_csv("/content/drive/Othercomputers/My Laptop/Desktop/Stevens/SEM1/Deep Learning - CS 583 A/CS 583 Project/datasets/movie_datasets/imdb/val_llm_ds_lg_v2.csv")
test_data = test_data.loc[2000:2300].reset_index(drop=True) #unseen data for the model as it was never trained on the lg dataset

In [5]:
quantization_config = BitsAndBytesConfig(
          load_in_4bit=True,
          bnb_4bit_quant_type="nf4",
          bnb_4bit_compute_dtype=torch.float16,
          bnb_4bit_use_double_quant=True,
          )

In [6]:
model = AutoModelForCausalLM.from_pretrained(
      MODEL_NAME,
      quantization_config=quantization_config,
      device_map="auto",
      token="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",
  )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [7]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token="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",)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id =  tokenizer.unk_token_id
tokenizer.padding_side = "left"

In [8]:
model = PeftModel.from_pretrained(
                model,
                "/content/drive/Othercomputers/My Laptop/Desktop/Stevens/SEM1/Deep Learning - CS 583 A/CS 583 Project/model_dump/llama-7b-chat-v1",
                torch_dtype=torch.float16)

In [9]:
prompt = test_data["prompt"].iloc[50]
prompt, response = prompt.split("###Response:")
prompt = prompt+"###Response:"
print(prompt)
print(response.strip())

Below is a question regarding movies and shows paired with an input that provides further context. Write a response that appropriately completes the request.
###Instruction: Given the movie Paranormal Activity  (2007), recommend 1 similar movies from the input movies
###Input: Paranormal Activity: The Ghost Dimension  (2015)
The Exorcism of Emily Rose  (2005)
eXXXorcismos  (2002)
The Ring  (2002)
Insidious  (2010)
The Haunting  (1999)
The Dooms Chapel Horror  (2016)
Insidious  (2010)
The Ring Two  (2005)
Stories of the Paranormal: It Came in the Night  (2012)
An American Haunting  (2005)
13 Eerie  (2013)
Paranormal Activity 2  (2010)
Paranormal Activity: The Marked Ones  (2014)
The Exorcist III  (1990)
The Quiet Ones  (2014)
Cabin Fear  (2015)
The Last Exorcism  (2010)
Paranormal Movie  (2013)
The Hellsworth Haunting  (2010)
###Response:
Insidious  (2010)


In [10]:
def generate_output(prompt):
  inputs = tokenizer(prompt, return_tensors="pt")
  input_ids = inputs["input_ids"].to(DEVICE)
  with torch.no_grad():
      generation_output = model.generate(
                      input_ids=input_ids,
                      return_dict_in_generate=True,
                      output_scores=True,
                      max_new_tokens=1024,
                      temperature=0.8,
                      do_sample=True,
                  )
      generation_output = generation_output.sequences[0].detach().cpu()
      s = generation_output[len(input_ids[0]):]
  return tokenizer.decode(s, skip_special_tokens=True)

In [11]:
output = generate_output(prompt)
print(output)

Paranormal Activity  (2010)


In [12]:
test_data["input_prompt"] = test_data["prompt"].map(lambda prompt: prompt.split("###Response:")[0]+"###Response:")
test_data["response"] = test_data["prompt"].map(lambda prompt: prompt.split("###Response:")[1].strip())

In [13]:
from tqdm import tqdm
tqdm.pandas()

In [14]:
test_data["llm_response"] = ""

In [16]:
start_idx = 0
for idx, row in tqdm(test_data.iterrows(), total=test_data.shape[0]):
  if idx<start_idx:
    continue
  llm_response = generate_output(row["input_prompt"])
  test_data.at[idx, "llm_response"] = generate_output(row["input_prompt"])

 33%|███▎      | 100/301 [20:01<40:15, 12.02s/it]


KeyboardInterrupt: ignored

In [20]:
with open("temp.txt", "w+") as fp:
  for _, sample in test_data.loc[:100].iterrows():
    fp.write(sample["input_prompt"]+"\n")
    fp.write(sample["response"]+"\n")
    fp.write("LLML"+sample["llm_response"]+"\n")