In [74]:
# script code
import os
import platform
import random
import sys
import argparse
from typing import List, Dict, Union

import wandb
import plotly
import torch
import transformers  # type: ignore
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    TrainingArguments,
)
from matplotlib import pyplot as plt  # type: ignore
from superhf.data import get_superhf_prompts  # type: ignore
from superhf.finetuning import SinglePassBestOfNTrainer  # type: ignore

# finetunning code
from accelerate import Accelerator, find_executable_batch_size
from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo

# data code
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset

In [2]:
!pwd

/sailhome/pchatain/projects/superhf/experiments/superhf/shf_single_pass_v1


In [3]:
# args
LANGUAGE_MODEL_NAME = "eleutherai/gpt-neo-1.3B"
REWARD_MODEL_NAME = "OpenAssistant/reward-model-deberta-v3-base"
DATASET_NAME = "openai/webgpt_comparisons"
NUM_TRAIN_EXAMPLES = 8000
NUM_TEST_EXAMPLES = 100
RANDOM_SEED = 66
SHUTDOWN_AFTER_RUN = True
MAX_EXAMPLE_LENGTH = 36
# TODO ask what cache dir the user wants to use. default is ~/.cache/huggingface/

In [4]:
# code
def print_gpu_utilization() -> None:
    """
    Print the GPU memory occupied using nvidia-smi.
    """
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")
print_gpu_utilization()
! nvidia-smi

GPU memory occupied: 455 MB.
Mon Feb 20 14:18:08 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.43.04    Driver Version: 515.43.04    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    On   | 00000000:25:00.0 Off |                  Off |
| 30%   31C    P8    22W / 300W |      1MiB / 49140MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+----------------------------------------------------------

In [5]:
# code
def check_node() -> str:
    """
    Check if we are on the sail compute cluster. If so, return a scratch directory to
    write checkpoints to and logs for wandb. If not, return None.
    """
    if not os.path.exists("/sailhome"):
        print("Not on sail compute cluster.")
        return ""
    # print machine name
    machine_name = platform.node().split(".")[0]
    print("We are running on node: ", machine_name)

    # print available scratch directories
    print(" ".join(os.listdir(f"/{machine_name}")))

    # get a random scratch directory
    scratch_dir = "/self/scr0"
    if not os.path.exists(scratch_dir):
      scratch_dir = "/self/" + os.listdir(f"/{machine_name}")[0]
    print("Using scratch directory: ", scratch_dir)
    return scratch_dir
print(f"Check if returned dir exists: {os.path.exists(check_node())}")

We are running on node:  jagupard34
scr-sync
Using scratch directory:  /self/scr-sync
Check if returned dir exists: True


In [6]:
# main code
scratch_dir = check_node()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wandb.login()

We are running on node:  jagupard34
scr-sync
Using scratch directory:  /self/scr-sync


[34m[1mwandb[0m: Currently logged in as: [33mpchatain[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [7]:
# main code
# Initialize random seeds for everything
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
transformers.enable_full_determinism(RANDOM_SEED)

In [8]:
# maind code
print_gpu_utilization()
language_model = AutoModelForCausalLM.from_pretrained(LANGUAGE_MODEL_NAME).to(
        device
    )
print_gpu_utilization()
reward_model = AutoModelForSequenceClassification.from_pretrained(
    REWARD_MODEL_NAME
).to(device)
print_gpu_utilization()
language_tokenizer = AutoTokenizer.from_pretrained(
    LANGUAGE_MODEL_NAME, padding_side="left"
)
reward_tokenizer = AutoTokenizer.from_pretrained(REWARD_MODEL_NAME)
print_gpu_utilization()

GPU memory occupied: 458 MB.
GPU memory occupied: 6359 MB.
GPU memory occupied: 7097 MB.
GPU memory occupied: 7097 MB.


In [9]:
# main code
models = {"language_model": language_model, "reward_model": reward_model}
tokenizers = {
    "language_tokenizer": language_tokenizer,
    "reward_tokenizer": reward_tokenizer,
}
print_gpu_utilization()
!nvidia-smi # why is there a slight disagreement?

GPU memory occupied: 7097 MB.
Mon Feb 20 14:19:03 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.43.04    Driver Version: 515.43.04    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    On   | 00000000:25:00.0 Off |                  Off |
| 30%   33C    P2    74W / 300W |   6642MiB / 49140MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------

In [78]:
# data code
class SummarizeFromFeedbackDataset(Dataset):
    """
    A webGPT dataset implementation for generating completions completions as well as finetuning a model on those generated completions.
    Loaded from hugging face.s
    """
    def __init__(self, split: str) -> None:
        """
        Return either the test or the train split for this dataset.
        # TODO: Add both the train and valid split?
        """
        super().__init__()
        self.data = load_dataset("openai/summarize_from_feedback", "comparisons")[split]
    
    def __len__(self) -> int:
        return len(self.data)
    
    def __getitem__(self, idx: int) -> Dict[str, str]:
        """
        Returns a tuple of the prompt, the completion, and the reward. The completions are re-ordered so that it is always
        chosen followed by rejected. 
        """
        assert self.data[idx]["choice"] in [0, 1], f"The choice for entry {idx} is not 0 or 1, it was {self.data[idx]['choice']}."
        prompt = self.data[idx]["info"]["post"]
        completion_chosen = self.data[idx]["summaries"][self.data[idx]["choice"]]["text"]
        completion_rejected = self.data[idx]["summaries"][1 - self.data[idx]["choice"]]["text"]
        return {
            "prompt": prompt,
            "completions": [completion_chosen, completion_rejected]
        }

class WebgptComparisons(Dataset):
    """
    A webGPT dataset implementation for generating completions completions as well as finetuning a model on those generated completions.
    Loaded from hugging face.s
    """
    def __init__(self, split: str) -> None:
        """
        Return either the test or the train split for this dataset.
        # TODO: Add both the train and valid split?
        """
        super().__init__()
        self.data = load_dataset("openai/webgpt_comparisons")[split]
    
    def __len__(self) -> int:
        return len(self.data)
    
    def __getitem__(self, idx: int) -> Dict[str, str]:
        """
        Returns a tuple of the prompt, the completion, and the reward. The completions are re-ordered so that it is always
        chosen followed by rejected. 
        """
        prompt = self.data[idx]["question"]["full_text"]
        score_0, score_1 = self.data[idx]["score_0"], self.data[idx]["score_1"]
        completion_chosen_idx = "answer_0" if score_0 >= 0 else "answer_1"
        completion_rejected_idx = "answer_1" if score_0 >= 0 else "answer_0"
        completion_chosen = self.data[idx][completion_chosen_idx]
        completion_rejected = self.data[idx][completion_rejected_idx]
        return {
            "prompt": prompt,
            "completions": [completion_chosen, completion_rejected],
            "scores": [score_0, score_1]
        }

# class FinetuneDataset(Dataset):
#     """
#     A dataset containing only the completions we plan to train on.
#     """
#     def __init__(self, dataset: Dataset, unique_prompt_indices=[]) -> None:
#         super().__init__()
#         self.data = dataset
#         self.unique_prompt_indices = unique_prompt_indices
#     def __len__(self) -> int:
#         return len(self.data)
    
#     def __getitem__(self, index) -> str:
#         """
#         Returns a single completion.
#         """
#         answer_completion_pair = self.data[index]["prompt"] + "[SEP]" + self.data[index]["completions"][0]
#         return answer_completion_pair

class PromptDataset(Dataset):
    """
    Get only the prompts from a particular dataset.
    TODO: implement fully
    """
    def __init__(self, dataset: Dataset) -> None:
        super().__init__()
        self.data = dataset.data
    
    def __len__(self) -> int:
        return len(self.data)
    
    def __getitem__(self, idx: int) -> str:
        """
        Dataset must have a 'prompt' key containing single prompt at each example.
        """
        return self.data[idx]["prompt"]


In [None]:
# main generating code
generating_dataset = WebgptComparisons("train")
print(f"Number of examples in generating dataset: {len(generating_dataset.data)}")

In [33]:
print(generating_dataset.data[2]["question"]["full_text"])

Heterophobia is the irrational fear of what


#### The prefix for example zero
 Voiced by Harry Shearer, what Simpsons character was modeled after Ted Koppel?◼[1] Kent Brockman (en.wikipedia.org)

Kent Brockman is a fictional character in the animated television series The Simpsons. He is voiced by Harry Shearer and first appeared in the episode "Krusty Gets Busted". He is a grumpy, self-centered local Springfield news anchor.◼[2] Krusty the Clown (en.wikipedia.org)

Krusty was created by cartoonist Matt Groening and partially inspired by Rusty Nails, a television clown from Groening's hometown of Portland, Oregon.◼

#### each part


In [112]:
# print(generating_dataset.data[0]["tokens_0"]["prefix"])
print(f"The untokenized version is\n {language_tokenizer.decode(generating_dataset.data[0]['tokens_0']['prefix'])}")
input_str_example1 = generating_dataset.data[0]["quotes_0"]['extract']
print(generating_dataset.data[0]["question"])
print(generating_dataset.data[0]["quotes_0"])
# from matching the above, it looks like the forumla is full_text<SEP>Title[0]\n\nExtract[0]<SEP>title[1]\n\nExtract[1]
print("----")
print(generating_dataset.data[0]["question"]["full_text"] + "[SEP]" + generating_dataset.data[0]["quotes_0"]["title"][0] + "\n\n" + generating_dataset.data[0]["quotes_0"]["extract"][0] + "[SEP]" + generating_dataset.data[0]["quotes_0"]["title"][1] + "\n\n" + generating_dataset.data[0]["quotes_0"]["extract"][1])
print("----")
print(input_str_example1)
print(language_tokenizer(generating_dataset.data[0]["question"]["full_text"] + input_str_example1[0] + input_str_example1[1]).input_ids)
print()
print(generating_dataset.data[0]['tokens_0']['prefix'])
print(generating_dataset.data[0]["score_1"])
print(generating_dataset.data[0]["answer_0"])
print("---")
print(generating_dataset.data[0]["answer_1"])

The untokenized version is
 Voiced by Harry Shearer, what Simpsons character was modeled after Ted Koppel?◼[1] Kent Brockman (en.wikipedia.org)

Kent Brockman is a fictional character in the animated television series The Simpsons. He is voiced by Harry Shearer and first appeared in the episode "Krusty Gets Busted". He is a grumpy, self-centered local Springfield news anchor.◼[2] Krusty the Clown (en.wikipedia.org)

Krusty was created by cartoonist Matt Groening and partially inspired by Rusty Nails, a television clown from Groening's hometown of Portland, Oregon.◼
{'dataset': 'triviaqa', 'id': '18c654a169eb80287f4353d33e701b1c', 'full_text': 'Voiced by Harry Shearer, what Simpsons character was modeled after Ted Koppel?'}
{'title': ['Kent Brockman (en.wikipedia.org)', 'Krusty the Clown (en.wikipedia.org)'], 'extract': ['Kent Brockman is a fictional character in the animated television series The Simpsons. He is voiced by Harry Shearer and first appeared in the episode "Krusty Gets Bus

In [135]:
t_z = generating_dataset.data[0]
# full_text<SEP>Title[0]\n\nExtract[0]<SEP>title[1]\n\nExtract[1]
test_input = t_z["question"]["full_text"]
test_input += "◼[1] " + t_z["quotes_0"]["title"][0] + "\n\n"
test_input += t_z["quotes_0"]["extract"][0] + "◼[2] " + t_z["quotes_0"]["title"][1] + "\n\n"
test_input += t_z["quotes_0"]["extract"][1] + "◼ " + t_z["answer_0"]
print(test_input)
input_tests = language_tokenizer(test_input, return_tensors="pt").to(device)
with torch.no_grad():
  print(reward_model(**input_tests))
  print(reward_model(**language_tokenizer(t_z["question"]["full_text"] + "[SEP] " + t_z["answer_0"], return_tensors="pt").to(device)))
  print(reward_model(**language_tokenizer(t_z["question"]["full_text"] + "[SEP] " + t_z["answer_1"], return_tensors="pt").to(device)))

Voiced by Harry Shearer, what Simpsons character was modeled after Ted Koppel?◼[1] Kent Brockman (en.wikipedia.org)

Kent Brockman is a fictional character in the animated television series The Simpsons. He is voiced by Harry Shearer and first appeared in the episode "Krusty Gets Busted". He is a grumpy, self-centered local Springfield news anchor.◼[2] Krusty the Clown (en.wikipedia.org)

Krusty was created by cartoonist Matt Groening and partially inspired by Rusty Nails, a television clown from Groening's hometown of Portland, Oregon.◼ The Simpsons character that was possibly based on Ted Koppel is Kent Brockman.  He is a local news anchor in Springfield and is modeled after Ted Koppel. [1]
SequenceClassifierOutput(loss=None, logits=tensor([[-0.4093]], device='cuda:0'), hidden_states=None, attentions=None)
SequenceClassifierOutput(loss=None, logits=tensor([[0.1230]], device='cuda:0'), hidden_states=None, attentions=None)
SequenceClassifierOutput(loss=None, logits=tensor([[-0.3003]], 

In [90]:
torch.cuda.current_device()

0

In [87]:
# finetuning code
def process_batch_inputs(model, tokenizer=None, examples=None, max_length=512):
    """
    Process a batch of examples and return the logits of the model

    Args:
      model: the model to use
      tokenizer: the tokenizer to use
      examples: a list of examples to process
      max_length: the max length of the examples

    Returns:
      the logits of the model: a tensor of shape (batch_size, max_length)
    """
    assert examples != None, "examples cannot be None"
    if tokenizer != None:
      examples = tokenizer(examples, padding=True, truncation=True, max_length=max_length, return_tensors='pt')
    with torch.no_grad():
      score = model(**examples).logits.detach().flatten()
    print("processed one batch")
    return score

# def collate_webgpt_fn(prompt, completion, ):
#     """
#     A function that takes the webgpt
#     """
#     return prompt + "[SEP]" + completion[]


@find_executable_batch_size(starting_batch_size=512)
def score_completions(batch_size):
    """
    Process a batch of examples and return the logits of the model.
    Automatically finds the batch size that fits on the GPU.

    Args:
      model: the model to use
      tokenizer: the tokenizer to use
      examples: a list of examples to process
      max_length: the max length of the examples

    Returns:
      the logits of the model: a tensor of shape (batch_size, max_length)
    """
    generating_dataset = DataLoader(WebgptComparisons("train"), shuffle=False, batch_size=batch_size)
    print(f"generating with batch_size {batch_size}...")
    scores = []
    for batch in generating_dataset:
        print(batch["completions"][0][0])
        # scores.append(batch["scores"])
        scores.append(reward_model(**batch["answer"]))
        # scores.append(process_batch_inputs(reward_model, tokenizer=language_tokenizer, examples=))
        break
    return scores
        

In [88]:
score_completions()

Found cached dataset webgpt_comparisons (/sailhome/pchatain/.cache/huggingface/datasets/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)


  0%|          | 0/1 [00:00<?, ?it/s]

generating with batch_size 512...
The Simpsons character that was possibly based on Ted Koppel is Kent Brockman.  He is a local news anchor in Springfield and is modeled after Ted Koppel. [1]


[[tensor([ 1.0000,  0.0000,  0.5000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,
           0.3333,  1.0000,  0.0000,  0.5000,  0.0000,  1.0000,  0.0000,  0.0000,
           1.0000,  1.0000,  0.5000, -0.5000, -1.0000,  0.0000,  1.0000, -1.0000,
          -0.5000, -0.5000,  1.0000, -0.5000, -0.5000,  0.0000, -1.0000,  0.0000,
           0.0000,  0.5000,  0.0000,  1.0000,  0.0000,  1.0000, -0.5000,  1.0000,
          -0.5000,  0.0000, -0.5000,  1.0000,  1.0000,  0.3333,  0.5000,  0.5000,
           0.0000, -0.5000, -1.0000,  0.0000, -0.5000, -1.0000,  0.0000,  1.0000,
          -1.0000,  1.0000,  0.0000,  0.0000, -1.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.5000,  0.5000, -0.5000,  0.0000,  0.0000,  0.5000, -0.5000,
           0.5000,  0.6667,  0.6667,  0.0000,  0.5000, -0.5000,  0.0000, -0.3333,
           0.5000,  0.0000,  0.5000,  0.0000,  0.5000, -1.0000,  0.5000, -0.5000,
           0.0000,  0.3333,  1.0000,  1.0000, -0.5000, -0.5000,  1.0000, -0.6667,
           0.000

In [None]:
score_completions()

Found cached dataset summarize_from_feedback (/sailhome/pchatain/.cache/huggingface/datasets/openai___summarize_from_feedback/comparisons/0.0.0/483f970ceb55b926b0a087ef4f678ab1b089bc8174a107a452c6152e88af7ff0)


  0%|          | 0/2 [00:00<?, ?it/s]

generating
{'prompt': ["Hi r/relationships, I could use some advice.\n\nMy partner and I have been together around 9 months. We used to post to r/gonewild a lot, both together and just as her. It never bothered me that she received messages and replied to them and flirted with other men because it was open and there was no deceit.\n\nI recently found out that she has been sending messages to some of her 'fans' from another Reddit account that I don't have access too (found out as she left it logged in on my iPad), specifically so I can't see the messages - she says that in her messages.\n\nThere is one person in particular who she talks to a lot, most days. As far as I saw there was nothing very sexual in her messages (just him complementing her on her r/gonewild posts), but they were very affectionate toward each other. He would say how he hoped they could be together one day, how much he misses her, she said that he's one of the few people that she feels she can talk to and feels com