In [1]:
# Utils
import datasets
import re
import json
import random

def download_dataset(data, subset=None, split=None):
    success = True
    while success:
        try:
            d = datasets.load_dataset(data, name=subset, split=split)
            success = False
        except Exception as e:
            print(e)
            pass
    return d

def load_local(fn):
    return [json.loads(line.strip()) for line in open(fn)]

def print_json(d, w):
    with open(w, "w+") as f:
        f.write("\n".join([json.dumps(i, ensure_ascii=False) for i in d]))

def get_webgpt(d):
    d = d["train"]
    data = []
    for item in d:
        if float(item['score_0']) == float(item['score_1']):
            continue
        elif float(item['score_0']) < float(item['score_1']):
            chosen, reject = item['answer_1'], item['answer_0']
        else:
            chosen, reject = item['answer_0'], item['answer_1']
        chosen = re.sub(r" [\(\[].*?[\)\]]", "", chosen)
        chosen = re.sub(r"[\(\[].*?[\)\]]", "", chosen)
        reject = re.sub(r" [\(\[].*?[\)\]]", "", reject)
        reject = re.sub(r"[\(\[].*?[\)\]]", "", reject)
        data.append({"prompt": "Human: " + item['question']['full_text'].strip() + "\n\nAssistant:", "chosen": chosen.strip(), "rejected": reject.strip()})
    return data

In [2]:
d = download_dataset("openai/summarize_from_feedback", subset='comparisons')

Using the latest cached version of the module from /Users/ewwe/.cache/huggingface/modules/datasets_modules/datasets/openai--summarize_from_feedback/483f970ceb55b926b0a087ef4f678ab1b089bc8174a107a452c6152e88af7ff0 (last modified on Mon Feb  5 06:59:45 2024) since it couldn't be found locally at openai/summarize_from_feedback, or remotely on the Hugging Face Hub.


In [9]:
x = {k: [] for k in d}
for k in d:
    for y in d[k]:
        try:
            sub = y['info']['subreddit'].strip()
            title = y['info']['title'].strip()
            post = y['info']['post'].strip()
            if y['choice'] == 0:
                chosen = y['summaries'][0]['text'].strip()
                reject = y['summaries'][1]['text'].strip()
            else:
                chosen = y['summaries'][1]['text'].strip()
                reject = y['summaries'][0]['text'].strip()
            if sub != "" and title != "" and post != "" and chosen != "" and reject != "":
                x[k].append({"prompt": f"Human: Please summarize the following Reddit post in no more than 100 words: \n\nSUBREDDIT: {sub} \n\nTITLE: {title} \n\nPOST: {post} \n\nAssistant: ", "chosen": chosen, "reject": reject, "task": "summary"})
        except:
            pass
for k in x:
    print_json(x[k], f"{k}.txt")

In [11]:
print(len(d['validation']))

86086


In [None]:
# Step 1.1: Processing data
per_task = 10000
fn = "data"
dialog = load_local(f'{fn}/origin/hhrlhf.txt')  # dialog = download_dataset("Dahoas/rl-prompt-dataset", split="train")
qa = load_local(f'{fn}/origin/webgpt.txt')  # qa = get_webgpt(download_dataset("openai/webgpt_comparisons", split="train"))
summary = load_local(f'{fn}/origin/summarize_tldr.txt')  # dialog = download_dataset("CarperAI/openai_summarize_tldr", split="train")
# You can also visit f'{fn}/origin' for training data.
problems = random.sample(dialog, per_task) + random.sample(qa, per_task) + random.sample(summary, per_task)
print_json(problems, f'{fn}/origin/problems.txt')

In [None]:
# Step 1.2: Grouping
model1 = 'vicuna-7b'
model2 = 'vicuna-13b'
d = [x for i in range(8) for x in load_local(f"{fn}/initial_answers/ppl-{model1}-{i}.txt")]
d = [x for x in d if "Fact-check" not in x["prompt"]]
l = len(d)

d = sorted(d, key=lambda x: x['loss'])
easy, mid, hard = d[:l//3], d[l//3:2*l//3], d[2*l//3:]

d13 = [x for i in range(8) for x in load_local(f"{fn}/initial_answers/{model2}-{i}.txt")]
another_answer = {i['prompt']: i['output'] for i in d13}
for i in range(len(easy)):
    easy[i]['difficulty'] = 'easy'
    easy[i]['feedback_type'] = 'critic'
    easy[i]['output'] = {model1: easy[i]['output'], model2: another_answer[easy[i]['prompt']]}
for i in range(len(mid)):
    mid[i]['difficulty'] = 'medium'
    mid[i]['feedback_type'] = 'refine'
    mid[i]['output'] = {model1: mid[i]['output'], model2: another_answer[mid[i]['prompt']]}
for i in range(len(hard)):
    hard[i]['difficulty'] = 'hard'
    hard[i]['feedback_type'] = 'prefer'
    hard[i]['output'] = {model1: hard[i]['output'], model2: another_answer[hard[i]['prompt']]}
print_json(easy + mid + hard, f"{fn}/initial_answers/group.txt")

In [None]:
# Step 1.4: Collect Raw Feedbacks
# 1.4.1 Collect Raw Feedbacks
critic, refine, prefer = [], [], []
with open(f"{fn}/data/feedback/feedbacks.txt") as f:
    for line in f:
        line = json.loads(line.strip())
        fb = line["feedback"]
        
        if line['feedback_type'] == 'critic':
            critic.append(line)
        elif line['feedback_type'] == 'refine':
            element = "answer" if line['task'] == "qa" else ("response" if line['task'] == "dialog" else "summary")
            trigger = f"A better {element} is:"
            if trigger in fb:
                c = fb[fb.index(trigger) + len(trigger):].strip()
                line['chosen'] = c
                line['reject'] = line['output'][model1]
                del line['output'], line['prompt-api']
                refine.append(line)
        else:
            a = "A)" in fb or "A -" in fb or fb in ["A", "A."]
            b = "B)" in fb or "B -" in fb or fb in ["B", "B."]
            c = "C)" in fb or "C -" in fb or fb in ["C", "C."]
            if c:
                continue
            elif a and not b:
                line['chosen'] = line['output'][model1]
                line['reject'] = line['output'][model2]
                del line['output'], line['prompt-api']
                prefer.append(line)
            elif b and not a:
                line['chosen'] = line['output'][model2]
                line['reject'] = line['output'][model1]
                del line['output'], line['prompt-api']
                prefer.append(line)
print_json(refine, f'{fn}/feedback/refinement.txt')
print_json(prefer, f'{fn}/feedback/preference.txt')

In [None]:
# 1.4.2 Construct dataset for step 1.5
hd = ["Below is a conversation between a human and an AI assistant. Given the comment on the assistant's last response (started with \"Comment: \"), please rewrite the response according to the advice of improvement presented in the comment to make it more helpful, truthful and less harmful to the human. \n\n\n",
"\n\n\nResponse: ",
"\n\n\nComment: ",
"\n\n\nRevised response: "]
hq = ["Below is a question and its intended answer. Given the comment on the answer (started with \"Comment: \"), please rewrite the answer according to the advice of improvement presented in the comment to make it more correct, clear and readable to the human. \n\n\n",
"\n\n\nAnswer: ",
"\n\n\nComment: ",
"\n\n\nRevised answer: "]
hs = ["Below is a Reddit post and its intended summary. Given the comment on the summary (started with \"Comment: \"), please rewrite the summary according to the advice of improvement presented in the comment to make it more accurate and brief for others to read. \n\n\n",
"\n\n\nSummary: ",
"\n\n\nComment: ",
"\n\n\nRevised summary: "]

for c in critic:
    c['origin_prompt'] = c['prompt']
    c['reject'] = c['output'][model1]
    if c['task'] == 'dialog':
        c['prompt'] = hd[0] + c["prompt-api"] + hd[1] + c["reject"] + hd[2] + c["feedback"] + hd[3]
    elif c['task'] == 'qa':
        c['prompt'] = hq[0] + c["prompt-api"] + hq[1] + c["reject"] + hq[2] + c["feedback"] + hq[3]
    else:
        c['prompt'] = hs[0] + c["prompt-api"] + hs[1] + c["reject"] + hs[2] + c["feedback"] + hs[3]
print_json(critic, f'{fn}/feedback/critique.txt')

In [None]:
# Step 1.6: Set up Train and Valid Datasets for RLHF-RM and DPO
import re
remove = re.compile(r"(T|t)he (original |revised )?(answer|response|summary)|Comment:|Human:|Revised answer:|Explanation:|To provide more cont")
c = [x for i in range(8) for x in load_local(f"{fn}/feedback/improve-{i}.txt")]
critic = []
for x in c:
    x['prompt'] = x['origin_prompt']
    answer = x['output'].strip().split("\n\n")
    if answer[0][:6] == "Human:":
        answer = answer[1:]
    cut = -1
    for j, y in enumerate(answer):
        if re.findall(remove, y):
            cut = j
            break
    answer = "\n\n".join(answer[:cut] if cut >= 0 else answer)
    if not answer or answer == x['reject']:
        continue
    x['chosen'] = answer
    del x['origin_prompt'], x['output'], x['prompt-api']
    critic.append(x)
    
refine = load_local(f'{fn}/feedback/refinement.txt')
prefer = load_local(f'{fn}/feedback/preference.txt')
valid_size = [0.1 * len(critic), 0.1 * len(refine), 0.1 * len(prefer)]
random.shuffle(critic)
random.shuffle(refine)
random.shuffle(prefer)
print_json(critic[:valid_size[0]] + refine[:valid_size[1]] + prefer[:valid_size[2]], f'{fn}/feedback/valid.txt')
print_json(critic[valid_size[0]:] + refine[valid_size[1]:] + prefer[valid_size[2]:], f'{fn}/feedback/train.txt')