In [1]:
from datasets import load_dataset
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from openai import OpenAI
import requests
import json
from tqdm import tqdm
import pickle

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Load and shuffle the full test split
train_ultrafeedback = load_dataset("HuggingFaceH4/ultrafeedback_binarized",
                                   revision="292c16329d921287c4166934cac1a6ad1e13a6c5",
                                   split = 'train_prefs')

test_ultrafeedback = load_dataset("HuggingFaceH4/ultrafeedback_binarized", 
                        revision="292c16329d921287c4166934cac1a6ad1e13a6c5", 
                        split="test_prefs").shuffle(seed=42)

test_sample = test_ultrafeedback.select(range(100))
eval_prompts = test_sample['prompt']


In [4]:
chat_template = (
    "{% set image_count = namespace(value=0) %}"
    "{% set video_count = namespace(value=0) %}"
    "{% for message in messages %}"
    "{% if loop.first and message['role'] != 'system' %}"
    "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
    "{% endif %}"
    "<|im_start|>{{ message['role'] }}\n"
    "{% if message['content'] is string %}"
    "{% if message['role'] == 'assistant' %}"
    "{% generation %}"
    "{{ message['content'] }}"
    "{% endgeneration %}"
    "{% else %}"
    "{{ message['content'] }}"
    "{% endif %}"
    "<|im_end|>\n"
    "{% else %}"
    "{% for content in message['content'] %}"
    "{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}"
    "{% set image_count.value = image_count.value + 1 %}"
    "{% if add_vision_id %}"
    "Picture {{ image_count.value }}: "
    "{% endif %}"
    "<|vision_start|><|image_pad|><|vision_end|>"
    "{% elif content['type'] == 'video' or 'video' in content %}"
    "{% set video_count.value = video_count.value + 1 %}"
    "{% if add_vision_id %}"
    "Video {{ video_count.value }}: "
    "{% endif %}"
    "<|vision_start|><|video_pad|><|vision_end|>"
    "{% elif 'text' in content %}"
    "{% if message['role'] == 'assistant' %}"
    "{% generation %}"
    "{{ content['text'] }}"
    "{% endgeneration %}"
    "{% else %}"
    "{{ content['text'] }}"
    "{% endif %}"
    "{% endif %}"
    "{% endfor %}"
    "<|im_end|>\n"
    "{% endif %}"
    "{% endfor %}"
    "{% if add_generation_prompt %}"
    "<|im_start|>assistant\n"
    "{% endif %}")

In [5]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B")
tokenizer.padding_side='left'
tokenizer.add_special_tokens({'pad_token': '<|pad|>',
                              'bos_token': '<|im_start|>',
                              'eos_token': '<|im_end|>'})


# sft_model = AutoModelForCausalLM.from_pretrained(SFT_MODEL, torch_dtype=torch.float16,).to(device)

# sft_model.eval()

# reference model
# dpo_model = AutoModelForCausalLM.from_pretrained(DPO_MODEL, torch_dtype=torch.float16,).to(device)
# dpo_model = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained(SFT_MODEL, torch_dtype=torch.float16), 
#                                       DPO_MODEL).to(device)
dpo_model = AutoModelForCausalLM.from_pretrained("./dpo_model", torch_dtype=torch.float16,).to(device)

dpo_model.eval()

dpo_model.config.pad_token_id = tokenizer.pad_token_id
dpo_model.config.bos_token_id = tokenizer.bos_token_id
dpo_model.config.eos_token_id = tokenizer.eos_token_id

In [16]:
ex = train_ultrafeedback[0]

ex["chosen"]


# ex = [{'content': 'how does the fed impact markets. give me 200 words.',
#   'role': 'user'},
#  {'content': "The Federal Reserve, or the Fed, has a significant impact on financial markets through its monetary policy decisions. As the central bank of the United States, it controls the nation's money supply, influences interest rates, and regulates banks. Here's how the Fed affects markets in 200 words:\n\n1. Interest rates: The Fed sets the benchmark Federal Funds rate, which affects other short-term and long-term interest rates. Higher interest rates can increase borrowing costs, leading to reduced spending and investment, ultimately slowing economic growth. Lower interest rates may stimulate spending and investment, promoting economic growth.\n2. Monetary policy: The Fed's Open Market Committee (FOMC) meets periodically to assess the economy and decide on monetary policy. Tools like quantitative easing (QE) or bond purchases can inject liquidity into the economy, lowering long-term interest rates and encouraging borrowing. Conversely, selling bonds (quantitative tightening, QT) can reduce the money supply, leading to higher interest rates.\n3. Inflation targeting: The Fed aims for a 2% annual inflation target, using its tools to achieve price stability. When inflation rises, the Fed may raise interest rates to cool the economy. If deflation threatens, it may lower rates to stimulate growth.\n4. Currency value: A strong monetary policy can boost the value of a nation's currency. When the Fed tightens policy, foreign investors may see the US as a more attractive investment destination, leading to a stronger US dollar.\n5. Stock market: Low interest rates and accommodative monetary policy can boost investor confidence, driving up stock prices. Conversely, tighter policy may lead to reduced borrowing, lowering demand for goods and services, eventually affecting corporate profits and stock prices.\n6. Fixed-income markets: The Fed's actions directly impact bond yields. Higher interest rates lead to higher yields and vice versa. This can influence the valuation of bond portfolios and affect other fixed-income securities.\n7. Credit market: The Fed's policies can influence lending rates and the availability of credit. Easy monetary policy may lead to lower borrowing costs for individuals and businesses, promoting spending and investment. Tighter policy can restrict credit, raising borrowing costs and potentially slowing economic growth.\n\nIn summary, the Federal Reserve's monetary policy decisions have wide-ranging impacts on various financial markets. Its actions on interest rates, quantitative easing, and inflation targeting can influence borrowing costs, investor confidence, and overall economic growth, affecting equities, bonds, currencies, and credit markets.",
#   'role': 'assistant'}]

[{'content': 'Write a 1,000-word op-ed piece in a formal tone, analyzing and providing examples of the ways in which social media platforms have been utilized to spread extremist and violent ideologies. In your analysis, discuss the specific tactics that these groups use to spread their messages online and the effects of these tactics on both individuals and society. Additionally, provide possible solutions that could be implemented to combat the spread of these dangerous ideologies on social media. Your piece should be well-researched, citing reputable sources to support your arguments.',
  'role': 'user'},
 {'content': "Title: The Noxious Alliance of Social Media and Extremism: A Threat to Human Progress\n\nThe advent of social media platforms has irrevocably revolutionized the way we communicate, share and consume information. While these platforms provide access to information for a global audience and allow individuals to connect regardless of geographical barriers, they have also

In [15]:
tokenized = tokenizer.apply_chat_template(
    ex["messages"],
    tokenize = True,
    max_length = 50,
    padding = 'max_length',
    truncation = 'only_second',
    return_dict = True,
    return_assistant_tokens_mask=True,
    add_generation_prompt = False,
    chat_template = chat_template,
    return_tensors = 'pt'
)
torch.set_printoptions(profile="full")

Exception: Truncation error: Second sequence not provided

In [21]:
tokenized["input_ids"]

tensor([[151665, 151665, 151665, 151665, 151665, 151665, 151665, 151665, 151665,
         151665, 151665, 151644,   8948,    198,   2610,    525,    264,  10950,
          17847,     13, 151645,    198, 151644,    198, 151645,    198, 151644,
            198, 151645,    198, 151644,    198, 151645,    198, 151644,    198,
         151645,    198, 151644,    198, 151645,    198, 151644,    198, 151645,
            198, 151644,    198, 151645,    198]])

In [19]:
client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1", 
    api_key="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"  # MY KEY
)

def get_reward_score(prompt, response):
    messages = [
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": response}
    ]
    result = client.chat.completions.create(
        model="nvidia/llama-3.1-nemotron-70b-reward",
        messages=messages
    )
    content = result.choices[0].message.content.strip()
    return content

In [28]:
def generate_batch(batch_size, prompts, model, tokenizer, type = 'sft'):
    outputs_list = []
    
    rep_penalty = 1.22 if type == 'dpo' else 1 + 1e-5
    # rep_penalty = 1e-5
    max_len = 590


    for i in tqdm(range(0, len(prompts), batch_size)):
        batch = prompts[i:i+batch_size]
        inputs = tokenizer(batch, return_tensors="pt", padding=True)
        
        output_sequences = model.generate(
            input_ids=inputs['input_ids'].to(model.device),
            attention_mask=inputs['attention_mask'].to(model.device),
            tokenizer = tokenizer,
            do_sample=False, # disable sampling to test if batching affects output
            pad_token_id=tokenizer.pad_token_id,
            bos_token_id=tokenizer.bos_token_id,
            forced_eos_token_id=tokenizer.eos_token_id,
            repetition_penalty=rep_penalty,
            stop_strings = '<|im_end|>',
            exponential_decay_length_penalty = (int(max_len * 0.7),1.1),
            max_new_tokens= max_len
        )
        completions_only = output_sequences[:, inputs['input_ids'].shape[1]:]
        outputs_decoded = tokenizer.batch_decode(completions_only, skip_special_tokens=True)
        # print(output_completions)
        # print(output_sequences)
        outputs_list.extend(outputs_decoded)
    return outputs_list

In [29]:
BATCH_SIZE = 8

dpo_wins = 0
total_evals = 0

# sft_completions = generate_batch(BATCH_SIZE, eval_prompts, sft_model, tokenizer, type = 'sft')
# with open('sft_completions_nr.pkl', 'rb') as f:
#     sft_completions = pickle.load(f)
dpo_completions = generate_batch(BATCH_SIZE, eval_prompts, dpo_model, tokenizer, type = 'dpo')
scores = {'dpo': [], 'sft': []}

for prompt, sft_response, dpo_response in zip(eval_prompts, sft_completions, dpo_completions):
    sft_reward = get_reward_score(prompt, sft_response)
    sft_reward = float(sft_reward.split(':')[-1])
    dpo_reward = get_reward_score(prompt, dpo_response)
    dpo_reward = float(dpo_reward.split(':')[-1])

    scores['dpo'].append(dpo_reward)
    scores['sft'].append(sft_reward) 
    
    if dpo_reward >= sft_reward:
        dpo_wins += 1
        
    total_evals += 1
    
winrate = dpo_wins/total_evals
print(winrate)

100%|██████████| 13/13 [01:45<00:00,  8.12s/it]


0.66


In [9]:
# with open('sft_completions_nr.pkl', 'wb') as f:
#     pickle.dump(sft_completions, f)

In [10]:
eval_prompts[:10]

["How can I use C++ to analyze a painting's form, content, and context, while also evaluating its use of color, composition, and brushwork? Additionally, how can I consider the artist's intentions and the painting's cultural significance, as well as any influences from previous artists or movements? Finally, how can I assess the painting's impact on subsequent works of art and discuss its reflection of societal values and beliefs of its time period? Please provide specific C++ code examples and utilize art terminology in your analysis. [Attach Painting]",
 'Given the text: With the rise of digital channels and platforms, there are more marketing technology solutions available than ever before. According to Scott Brinker\'s annual #MarTech landscape, there were more than 5,000 software solutions available for marketers in 2017.\nBut all this marketing technology is nothing without a strategy. Ultimately, a marketer\'s goal is to increase sales and drive customer retention.\nWhat\'s inte

In [11]:
idx = 0

In [12]:

import textwrap

wrapped_text = textwrap.fill(eval_prompts[idx], width=100)

print(wrapped_text)

How can I use C++ to analyze a painting's form, content, and context, while also evaluating its use
of color, composition, and brushwork? Additionally, how can I consider the artist's intentions and
the painting's cultural significance, as well as any influences from previous artists or movements?
Finally, how can I assess the painting's impact on subsequent works of art and discuss its
reflection of societal values and beliefs of its time period? Please provide specific C++ code
examples and utilize art terminology in your analysis. [Attach Painting]


In [13]:

wrapped_text = textwrap.fill(dpo_completions[idx], width=100)

print(wrapped_text)

  [Code Example]: Here is an example of using C++ to analyze a painting: ``` #include <iostream>
using namespace std; int main() {     // Load image     Image img = loadImage("painting.jpg");
// Analyze form     int width = getWidth(img);     int height = getHeight(img);     cout << "Width:
";     cout << width << endl;     cout << "Height: ";     cout << height << endl;          //
Evaluate content     string text = getWordCount(getText(img));     cout << "Content: " << text <<
endl;          // Consider colors     Color c1 = getColorAt(0, 50);     Color c2 = getColorAt(width
- 30, height - 40);     if (c1 != c2) {         cout << "Colors do not match" << endl;     } } ```
This code loads an image file into memory, analyzes it for shape and texture, evaluates its content
by counting words within the text area, considers the artwork's intended meaning through word count
calculations, and then discusses potential impacts. This approach allows you to evaluate multiple
aspects of the pain

In [14]:
wrapped_text = textwrap.fill(sft_completions[idx], width=100)

print(wrapped_text)

 [Attach C++ Code] To analyze a painting's form, content, and context, you can use C++ to create a
program that takes in the painting's dimensions, content, and context, and then evaluates its form,
content, and context using various C++ functions and algorithms. For example, you can use the
`std::cout` and `std::cin` libraries to read the dimensions and content of the painting, and then
use C++ functions such as `std::cout << "Form: " << form << std::endl;` and `std::cout << "Content:
" << content << std::endl;` to display the form and content of the painting. To evaluate the use of
color, composition, and brushwork, you can use C++ functions such as `std::cout << "Color: " <<
color << std::endl;` and `std::cout << "Composition: " << composition << std::endl;` to display the
color and composition of the painting. To consider the artist's intentions and the painting's
cultural significance, you can use C++ functions such as `std::cout << "Artist's Intention: " <<
artist << std::endl;` 

In [15]:
sft_reward = get_reward_score(eval_prompts[idx], sft_completions[idx])
sft_reward = float(sft_reward.split(':')[-1])
dpo_reward = get_reward_score(eval_prompts[idx], dpo_completions[idx])
dpo_reward = float(dpo_reward.split(':')[-1])


In [16]:
sft_reward, dpo_reward

(-26.75, -23.625)

In [17]:
ref_completions[-1]

NameError: name 'ref_completions' is not defined

In [None]:
# # SPDX-License-Identifier: Apache-2.0

# from vllm import LLM, SamplingParams

# # Sample prompts.
# prompts = [
#     "Hello, my name is",
#     "The president of the United States is",
#     "The capital of France is",
#     "The future of AI is",
# ]
# # Create a sampling params object.
# sampling_params = SamplingParams(repetition_penalty=1.5, max_tokens=590)


# def main():
#     # Create an LLM.
#     llm = LLM(model="facebook/opt-125m")
    
#     llm = LLM(model="./finished_smoltalk")
#     # Generate texts from the prompts.
#     # The output is a list of RequestOutput objects
#     # that contain the prompt, generated text, and other information.
#     outputs = llm.generate(prompts, sampling_params)
#     # Print the outputs.
#     print("\nGenerated Outputs:\n" + "-" * 60)
#     for output in outputs:
#         prompt = output.prompt
#         generated_text = output.outputs[0].text
#         print(f"Prompt:    {prompt!r}")
#         print(f"Output:    {generated_text!r}")
#         print("-" * 60)


# if __name__ == "__main__":
#     main()

In [None]:
# import ray
# from ray.data.llm import vLLMEngineProcessorConfig, build_llm_processor
# import numpy as np

# config = vLLMEngineProcessorConfig(
#     model_source="unsloth/Llama-3.1-8B-Instruct",
#     engine_kwargs={
#         "enable_chunked_prefill": True,
#         "max_num_batched_tokens": 4096,
#         "max_model_len": 16384,
#     },
#     concurrency=1,
#     batch_size=32,
# )
# processor = build_llm_processor(
#     config,
#     preprocess=lambda row: dict(
#         messages=[
#             {"role": "user", "content": row["item"]}
#         ],
#         sampling_params=dict(
#             temperature=0.3,
#             max_tokens=250,
#         )
#     ),
#     postprocess=lambda row: dict(
#         answer=row["generated_text"],
#         **row  # This will return all the original columns in the dataset.
#     ),
# )

# ds = ray.data.from_items(["Start of the haiku is: Complete this for me..."])

# ds = processor(ds)
# ds.show(limit=1)