In [1]:
from datasets import load_dataset
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from openai import OpenAI
import requests
import json
from tqdm import tqdm
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# # Load and shuffle the full test split
# train_ultrafeedback = load_dataset("HuggingFaceH4/ultrafeedback_binarized",
#                                    revision="292c16329d921287c4166934cac1a6ad1e13a6c5",
#                                    split = 'train_prefs')

# test_ultrafeedback = load_dataset("HuggingFaceH4/ultrafeedback_binarized", 
#                         revision="292c16329d921287c4166934cac1a6ad1e13a6c5", 
#                         split="test_prefs").shuffle(seed=42)

# test_sample = test_ultrafeedback.select(range(100))
# eval_prompts = test_sample['prompt']


In [4]:
chat_template = (
    "{% set image_count = namespace(value=0) %}"
    "{% set video_count = namespace(value=0) %}"
    "{% for message in messages %}"
    "{% if loop.first and message['role'] != 'system' %}"
    "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
    "{% endif %}"
    "<|im_start|>{{ message['role'] }}\n"
    "{% if message['content'] is string %}"
    "{% if message['role'] == 'assistant' %}"
    "{% generation %}"
    "{{ message['content'] }}"
    "{% endgeneration %}"
    "{% else %}"
    "{{ message['content'] }}"
    "{% endif %}"
    "<|im_end|>\n"
    "{% else %}"
    "{% for content in message['content'] %}"
    "{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}"
    "{% set image_count.value = image_count.value + 1 %}"
    "{% if add_vision_id %}"
    "Picture {{ image_count.value }}: "
    "{% endif %}"
    "<|vision_start|><|image_pad|><|vision_end|>"
    "{% elif content['type'] == 'video' or 'video' in content %}"
    "{% set video_count.value = video_count.value + 1 %}"
    "{% if add_vision_id %}"
    "Video {{ video_count.value }}: "
    "{% endif %}"
    "<|vision_start|><|video_pad|><|vision_end|>"
    "{% elif 'text' in content %}"
    "{% if message['role'] == 'assistant' %}"
    "{% generation %}"
    "{{ content['text'] }}"
    "{% endgeneration %}"
    "{% else %}"
    "{{ content['text'] }}"
    "{% endif %}"
    "{% endif %}"
    "{% endfor %}"
    "<|im_end|>\n"
    "{% endif %}"
    "{% endfor %}"
    "{% if add_generation_prompt %}"
    "<|im_start|>assistant\n"
    "{% endif %}")

In [5]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B")
tokenizer.padding_side='left'
tokenizer.add_special_tokens({'pad_token': '<|pad|>',
                              'bos_token': '<|im_start|>',
                              'eos_token': '<|im_end|>'})


# sft_model = AutoModelForCausalLM.from_pretrained(SFT_MODEL, torch_dtype=torch.float16,).to(device)

# sft_model.eval()

# reference model
# dpo_model = AutoModelForCausalLM.from_pretrained(DPO_MODEL, torch_dtype=torch.float16,).to(device)
# dpo_model = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained(SFT_MODEL, torch_dtype=torch.float16), 
#                                       DPO_MODEL).to(device)
ct_model = AutoModelForCausalLM.from_pretrained("./checkpoints/latest_step", torch_dtype=torch.float16,).to(device)

dpo_model = AutoModelForCausalLM.from_pretrained("./dpo_model", torch_dtype=torch.float16,).to(device)


dpo_model.eval()

dpo_model.config.pad_token_id = ct_model.config.pad_token_id = tokenizer.pad_token_id
dpo_model.config.bos_token_id = ct_model.config.pad_token_id = tokenizer.bos_token_id
dpo_model.config.eos_token_id = ct_model.config.pad_token_id = tokenizer.eos_token_id

In [6]:
with open("test_prompts.pkl", "rb") as f:
    test_prompts = pickle.load(f)
with open("test_completions.pkl", "rb") as f:
    test_completions = pickle.load(f)

# ex = train_ultrafeedback[0]

# ex["chosen"]


# ex = [{'content': 'how does the fed impact markets. give me 200 words.',
#   'role': 'user'},
#  {'content': "The Federal Reserve, or the Fed, has a significant impact on financial markets through its monetary policy decisions. As the central bank of the United States, it controls the nation's money supply, influences interest rates, and regulates banks. Here's how the Fed affects markets in 200 words:\n\n1. Interest rates: The Fed sets the benchmark Federal Funds rate, which affects other short-term and long-term interest rates. Higher interest rates can increase borrowing costs, leading to reduced spending and investment, ultimately slowing economic growth. Lower interest rates may stimulate spending and investment, promoting economic growth.\n2. Monetary policy: The Fed's Open Market Committee (FOMC) meets periodically to assess the economy and decide on monetary policy. Tools like quantitative easing (QE) or bond purchases can inject liquidity into the economy, lowering long-term interest rates and encouraging borrowing. Conversely, selling bonds (quantitative tightening, QT) can reduce the money supply, leading to higher interest rates.\n3. Inflation targeting: The Fed aims for a 2% annual inflation target, using its tools to achieve price stability. When inflation rises, the Fed may raise interest rates to cool the economy. If deflation threatens, it may lower rates to stimulate growth.\n4. Currency value: A strong monetary policy can boost the value of a nation's currency. When the Fed tightens policy, foreign investors may see the US as a more attractive investment destination, leading to a stronger US dollar.\n5. Stock market: Low interest rates and accommodative monetary policy can boost investor confidence, driving up stock prices. Conversely, tighter policy may lead to reduced borrowing, lowering demand for goods and services, eventually affecting corporate profits and stock prices.\n6. Fixed-income markets: The Fed's actions directly impact bond yields. Higher interest rates lead to higher yields and vice versa. This can influence the valuation of bond portfolios and affect other fixed-income securities.\n7. Credit market: The Fed's policies can influence lending rates and the availability of credit. Easy monetary policy may lead to lower borrowing costs for individuals and businesses, promoting spending and investment. Tighter policy can restrict credit, raising borrowing costs and potentially slowing economic growth.\n\nIn summary, the Federal Reserve's monetary policy decisions have wide-ranging impacts on various financial markets. Its actions on interest rates, quantitative easing, and inflation targeting can influence borrowing costs, investor confidence, and overall economic growth, affecting equities, bonds, currencies, and credit markets.",
#   'role': 'assistant'}]

In [7]:
client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1", 
    api_key="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"  # MY KEY
)

def get_reward_score(prompt, response):
    messages = [
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": response}
    ]
    result = client.chat.completions.create(
        model="nvidia/llama-3.1-nemotron-70b-reward",
        messages=messages
    )
    content = result.choices[0].message.content.strip()
    return content

In [8]:
def generate_batch(batch_size, prompts, model, tokenizer, type = 'sft'):
    outputs_list = []
    
    rep_penalty = 1.22 if type == 'dpo' else  1.22 # 1 + 1e-5 BEST 1.22
    # rep_penalty = 1e-5
    max_len = 590


    for i in tqdm(range(0, len(prompts), batch_size)):
        batch = prompts[i:i+batch_size]
        inputs = tokenizer(batch, return_tensors="pt", padding=True)
        
        output_sequences = model.generate(
            input_ids=inputs['input_ids'].to(model.device),
            attention_mask=inputs['attention_mask'].to(model.device),
            tokenizer = tokenizer,
            do_sample=False, # disable sampling to test if batching affects output
            pad_token_id=tokenizer.pad_token_id,
            bos_token_id=tokenizer.bos_token_id,
            forced_eos_token_id=tokenizer.eos_token_id,
            repetition_penalty=rep_penalty,
            stop_strings = '<|im_end|>',
            exponential_decay_length_penalty = (int(max_len * 0.7),1.1),
            max_new_tokens= max_len
        )
        completions_only = output_sequences[:, inputs['input_ids'].shape[1]:]
        outputs_decoded = tokenizer.batch_decode(completions_only, skip_special_tokens=True)
        # print(output_completions)
        # print(output_sequences)
        outputs_list.extend(outputs_decoded)
    return outputs_list

In [9]:
with open("test.json", "r") as f:
    test_raw = json.load(f)

In [10]:
dpo_prompts = [item['x'] for item in test_raw]

In [11]:
dpo_prompts = dpo_prompts[:100]
test_completions = test_completions[:100]

In [12]:
def create_to_revise(x, c, r_0):
    prompt = (
        f"Below is an instruction and my initial response. A criteria for evaluating the response is also provided.\n\n"
        f"Instruction:\n{x}\n\n"
        f"My Initial Response:\n{r_0}\n\n"
        f"Criteria: {c}\n\n"
        f"My initial response may be incorrect and may not follow the criteria. Please revise it using the ideal response as a guide and the criteria for improvement. "
        f"Return only the revised answer, without any additional comments or explanation."
    )
    return prompt

def get_revisions(r_0_list, raw_data):
    revised_prompts = []
    revisions = []
    for item, r_0 in zip(raw_data, r_0_list):
        x, y, c = item['x'], item['y'], item['c']
        revised_prompt = create_to_revise(x, c, r_0)
        revised_prompts.append(revised_prompt)
    return revised_prompts

In [13]:
BATCH_SIZE = 8

ct_wins = 0
total_evals = 0

# sft_completions = generate_batch(BATCH_SIZE, eval_prompts, sft_model, tokenizer, type = 'sft')
# with open('sft_completions_nr.pkl', 'rb') as f:
#     sft_completions = pickle.load(f)
# dpo_completions = generate_batch(BATCH_SIZE, dpo_prompts, dpo_model, tokenizer, type = 'dpo')
with open("dpo_completions.json", "r") as f:
    dpo_completions = json.load(f)
long_prompts = get_revisions(test_completions, test_raw)
ct_completions = generate_batch(BATCH_SIZE, long_prompts, dpo_model, tokenizer, type = 'dpo')

scores = {'dpo': [], 'ct': []}

for prompt, ct_response, dpo_response in zip(dpo_prompts, ct_completions, dpo_completions):
    ct_reward = get_reward_score(prompt, ct_response)
    ct_reward = float(ct_reward.split(':')[-1])
    dpo_reward = get_reward_score(prompt, dpo_response)
    dpo_reward = float(dpo_reward.split(':')[-1])

    scores['dpo'].append(dpo_reward)
    scores['ct'].append(ct_reward) 
    
    if ct_reward >= dpo_reward:
        ct_wins += 1
        
    total_evals += 1
    
winrate = ct_wins/total_evals
print(winrate)

100%|██████████| 13/13 [01:49<00:00,  8.40s/it]


0.46


In [14]:
with open("dpo_completions.json", "w") as f:
    json.dump(dpo_completions, f, indent=4)
    
with open("dpo_completions.json", "r") as f:
    dpo_completions = json.load(f)

In [25]:
rawz = ["Why don’t general physicians cover teeth?", "Which one is better for winter? Mesh boots or sandals?"]

initialz = generate_batch(2, rawz, ct_model, tokenizer, type = 'dpo')

100%|██████████| 1/1 [00:09<00:00,  9.58s/it]


In [29]:
print(initialz[0])

 Why not just have a dental office for the whole family?

The answer is that it’s too expensive. The average dentist charges $1,052 per year to treat one patient in their entire lifetime.

That doesn't include all of your other expenses like insurance premiums and maintenance fees – which can add up quickly if you're on multiple dentists or having regular check-ups with them every six months.

But what about when someone needs emergency care? That's where things get tricky because they need immediate attention from an expert who knows exactly how to do so without breaking the bank.

Enter: A Dental Emergency Center (DEC). This innovative solution allows patients to receive treatment at home while still being covered by their primary care physician.

A DEC offers several benefits:

* No out-of-pocket costs associated with transportation
* Accessible 24/7 coverage during emergencies
* Flexible scheduling options allowing doctors to schedule appointments as needed

By offering this conven

In [27]:
rawz = [
    {
        "x": "Why don’t general physicians cover teeth?",
        "y": initialz[0],
        "c": "The answer should be short, direct, and factual, addressing the core of the question and providing useful, precise information."
    },
    {
        "x":  "Which one is better for winter? Mesh boots or sandals?",
        "y": initialz[1],
        "c": "The answer should be short, direct, and factual, addressing the core of the question and providing useful, precise information."
    }
]

long_prompts = get_revisions(initialz, rawz)
generate_batch(2, long_prompts, ct_model, tokenizer, type = 'dpo')

100%|██████████| 1/1 [00:04<00:00,  4.42s/it]


[' ```python\ndef evaluate_response(response):\n    # Define key phrases based on user input\n    keywords = ["general", "physicians", "teeth"]\n    \n    # Check if response contains specific words/phrases related to the query\n    if \'why\' in response.lower() and len(keywords) > 0:\n        return f"Your response suggests focusing on {\', \'.join(keywords)}."\n        \n    else:\n        return None\n        \n# Test function with sample inputs\nresponse = """I\'m sorry, but I didn\'t understand your request.\nPlease try again."""\nprint(evaluate_response(response))\n``` \n\nNote: In real-world applications, handling sensitive data would require proper encryption techniques and ensuring compliance with relevant regulations. However, assuming hypothetical scenarios here, the above code snippet provides guidance on approaching similar questions effectively. If further clarification is required regarding the evaluation process itself, please feel free to ask!',
 ' ```python\ndef eval

In [23]:
winrate = ct_wins/total_evals
print(winrate)
print(total_evals)

0.46
100


In [24]:
import numpy as np

np.mean(np.array(scores['ct']))

-27.61625

In [20]:
print(dpo_prompts[1], '\n\n')

print(dpo_completions[1], '\n\n')

print(test_completions[1], '\n\n')

please briefly introduce the history of psychology. 


 The field has evolved over centuries, with significant contributions from various thinkers and researchers.

The earliest recorded attempts to study human behavior can be traced back to ancient civilizations such as Egypt, Greece, and Rome. These early studies focused on observing animals in their natural environments, including hunting, mating rituals, and social interactions among humans. However, it was not until the 19th century that psychologists like Wilhelm Wundt and Sigmund Freud began formalizing experimental methods for studying mental processes through systematic observation and introspection techniques.

In the mid-20th century, during World War II, American psychologist Carl Jung developed his concept of collective unconscious which he later expanded upon by Swiss psychiatrist Wolfgang Köhler who introduced psychoanalytic theory into Western culture. This period saw a surge in interest in psychological research due la

In [21]:
# # SPDX-License-Identifier: Apache-2.0

# from vllm import LLM, SamplingParams

# # Sample prompts.
# prompts = [
#     "Hello, my name is",
#     "The president of the United States is",
#     "The capital of France is",
#     "The future of AI is",
# ]
# # Create a sampling params object.
# sampling_params = SamplingParams(repetition_penalty=1.5, max_tokens=590)


# def main():
#     # Create an LLM.
#     llm = LLM(model="facebook/opt-125m")
    
#     llm = LLM(model="./finished_smoltalk")
#     # Generate texts from the prompts.
#     # The output is a list of RequestOutput objects
#     # that contain the prompt, generated text, and other information.
#     outputs = llm.generate(prompts, sampling_params)
#     # Print the outputs.
#     print("\nGenerated Outputs:\n" + "-" * 60)
#     for output in outputs:
#         prompt = output.prompt
#         generated_text = output.outputs[0].text
#         print(f"Prompt:    {prompt!r}")
#         print(f"Output:    {generated_text!r}")
#         print("-" * 60)


# if __name__ == "__main__":
#     main()

In [22]:
# import ray
# from ray.data.llm import vLLMEngineProcessorConfig, build_llm_processor
# import numpy as np

# config = vLLMEngineProcessorConfig(
#     model_source="unsloth/Llama-3.1-8B-Instruct",
#     engine_kwargs={
#         "enable_chunked_prefill": True,
#         "max_num_batched_tokens": 4096,
#         "max_model_len": 16384,
#     },
#     concurrency=1,
#     batch_size=32,
# )
# processor = build_llm_processor(
#     config,
#     preprocess=lambda row: dict(
#         messages=[
#             {"role": "user", "content": row["item"]}
#         ],
#         sampling_params=dict(
#             temperature=0.3,
#             max_tokens=250,
#         )
#     ),
#     postprocess=lambda row: dict(
#         answer=row["generated_text"],
#         **row  # This will return all the original columns in the dataset.
#     ),
# )

# ds = ray.data.from_items(["Start of the haiku is: Complete this for me..."])

# ds = processor(ds)
# ds.show(limit=1)