In [1]:
import sys
import os
os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join("~/", 'cache')
import dspy
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, GPT2TokenizerFast, AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, load_from_disk
from dspy.teleprompt import BootstrapFewShot
import nltk
from dspy.teleprompt import BootstrapFewShot

# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.
def validate_using_reward(example, pred, trace=None):
    assert type(example["prompt"]) == str, (example["prompt"])
    assert type(pred)==str, pred
    print("\n inside validate_using_reward \n")
    print(example["prompt"], "$%$%$%", pred)
    text=example["prompt"]+pred
    return reward(text)
    
def validate_using_bleu(example, pred, trace=None):
    bleu_score = nltk.translate.bleu_score.sentence_bleu(pred.split(" "), example["gold_response"].split(" "))
    return bleu_score

def freeze_model(model):
    for name, param in model.named_parameters():
        param.requires_grad = False


# model_id="jtatman/gpt2-open-instruct-v1-Anthropic-hh-rlhf"
model_id="mosaicml/mpt-7b-chat"
# model_id="smadala2/CS443_RLHF_v1"
device="cuda"
tokenizer = AutoTokenizer.from_pretrained(model_id)

lm=dspy.HFModel(model_id)
reward_model = AutoModelForSequenceClassification.from_pretrained(
                'Ray2333/gpt2-large-harmless-reward_model',
                num_labels=1).to(device)

freeze_model(reward_model)
freeze_model(lm.model)
dspy.settings.configure(lm=lm, rm=None)

def reward(text):
    toks=tokenizer(text, return_tensors="pt").to(device)
    return reward_model(input_ids=toks["input_ids"], attention_mask= toks["attention_mask"]).logits[0].cpu().item()

dataset_id = "Dahoas/full-hh-rlhf"
dataset = load_dataset(dataset_id)["test"]
print("len test set", len(dataset))
count=0
test_examples=[]
for data in dataset:
    count+=1
    test_examples.append(dspy.Example(prompt=data["prompt"], gold_response=data["chosen"]).with_inputs('prompt'))
    if count==10:
        break

dataset = load_dataset(dataset_id)["train"]
# print("len test set", len(dataset))
count=0
train_examples=[]
for data in dataset:
    count+=1
    train_examples.append(dspy.Example(prompt=data["prompt"], gold_response=data["chosen"]).with_inputs('prompt'))
    if count==2:
        break

teleprompter = BootstrapFewShot(metric=validate_using_reward)

~/cache/compiler


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

len test set 12451


In [2]:
import re
class BasicPromptWithMemory(dspy.Signature):
    """You are an Assistant. Given a prompt, memory of your previous responses to the prompt, and corresponding scores, generate a better response."""

    prompt = dspy.InputField(desc="input to Assistant")
    memory = dspy.InputField(desc="previous responses to the prompt and corresponding scores")
    response = dspy.OutputField(desc="Assistant's response")


class Reflexion(dspy.Module):
    def __init__(self, max_hops=2):
        super().__init__()
        self.generate_response = [dspy.Predict(BasicPromptWithMemory) for _ in range(max_hops+1)]
        self.max_hops = max_hops
    
    def forward(self, example, memory="-"):
        prompt=example["prompt"]
        gold_response=example["gold_response"]
        for hop in range(self.max_hops):
            response = self.generate_response[hop](prompt=prompt, memory=memory).response
            response = re.findall(r"Response:(.*)", response)[-1]
            print("response from generate_response&&&", response)
            score = validate_using_reward(example, response)
            print("$$reward/score:", score)
            memory+=f"\n\n {hop}) Response: {response}, Score: {score}\n\n"
        print("\n\n $$memory \n\n", memory)
        pred = self.generate_response[-1](prompt=prompt, memory=memory)
        return pred


# compiled_reflexion = teleprompter.compile(Reflexion(), trainset=train_examples)
reflexion_module=Reflexion()
# print(compiled_reflexion(test_examples[0]))
test_result = reflexion_module(test_examples[0]).response
print(test_result)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


response from generate_response&&&  It's true that slugs can be a bit of a nuisance, especially if they're crawling up trees and leaving trails of slime. However, slugs are actually very important for the ecosystem, as they help to break down organic matter and recycle nutrients. So while you may want to take some action to get rid of the slugs, it's also important to consider the potential consequences of doing so.

 inside validate_using_reward 



Human: I've been seeing a lot of slugs outside recently, even crawling up trees. Should I do something about them, or just let them be?

Assistant: $%$%$%  It's true that slugs can be a bit of a nuisance, especially if they're crawling up trees and leaving trails of slime. However, slugs are actually very important for the ecosystem, as they help to break down organic matter and recycle nutrients. So while you may want to take some action to get rid of the slugs, it's also important to consider the potential consequences of doing so.


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


$$reward/score: 0.5446023941040039


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


response from generate_response&&&  It's true that slugs can be a bit of a nuisance, especially if they're crawling up trees and leaving trails of slime. However, slugs are actually very important for the ecosystem, as they help to break down organic matter and recycle nutrients. So while you may want to take some action to get rid of the slugs, it's also important to consider the potential consequences of doing so.

 inside validate_using_reward 



Human: I've been seeing a lot of slugs outside recently, even crawling up trees. Should I do something about them, or just let them be?

Assistant: $%$%$%  It's true that slugs can be a bit of a nuisance, especially if they're crawling up trees and leaving trails of slime. However, slugs are actually very important for the ecosystem, as they help to break down organic matter and recycle nutrients. So while you may want to take some action to get rid of the slugs, it's also important to consider the potential consequences of doing so.
$$rew

In [4]:
lm.inspect_history(n=200)




You are an Assistant. Given a prompt, memory of your previous responses to the prompt, and corresponding scores, generate a better response.

---

Follow the following format.

Prompt: input to Assistant
Memory: previous responses to the prompt and corresponding scores
Response: Assistant's response

---

Prompt: Human: I've been seeing a lot of slugs outside recently, even crawling up trees. Should I do something about them, or just let them be? Assistant:
Memory: -
Response:[32mYou are an Assistant. Given a prompt, memory of your previous responses to the prompt, and corresponding scores, generate a better response.

---

Follow the following format.

Prompt: input to Assistant
Memory: previous responses to the prompt and corresponding scores
Response: Assistant's response

---

Prompt: Human: I've been seeing a lot of slugs outside recently, even crawling up trees. Should I do something about them, or just let them be? Assistant:
Memory: -
Response: It's true that slugs can be a

"\n\n\nYou are an Assistant. Given a prompt, memory of your previous responses to the prompt, and corresponding scores, generate a better response.\n\n---\n\nFollow the following format.\n\nPrompt: input to Assistant\nMemory: previous responses to the prompt and corresponding scores\nResponse: Assistant's response\n\n---\n\nPrompt: Human: I've been seeing a lot of slugs outside recently, even crawling up trees. Should I do something about them, or just let them be? Assistant:\nMemory: -\nResponse:\x1b[32mYou are an Assistant. Given a prompt, memory of your previous responses to the prompt, and corresponding scores, generate a better response.\n\n---\n\nFollow the following format.\n\nPrompt: input to Assistant\nMemory: previous responses to the prompt and corresponding scores\nResponse: Assistant's response\n\n---\n\nPrompt: Human: I've been seeing a lot of slugs outside recently, even crawling up trees. Should I do something about them, or just let them be? Assistant:\nMemory: -\nResp