In [2]:
import os
import getpass
from openai import OpenAI
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from dotenv import load_dotenv
load_dotenv()

True

# CREATE MOCKUP Q AND A

In [3]:
TOPIC_GENERATION_PROMPT_TEMPLATE = """Given the following topic, generate a list of {n_subtopics} subtopics that are related to the topic.
The topic is: {topic}
The list must be without numbers, and without any description of the subtopics. The subtopics should be separated by a comma. There must be no other text than the list.
"""

In [4]:
QUESTION_PROMPT_TEMPLATE = """Given the following topic, generate {n_questions} questions that could be asked about that topic. Your response should be in a list format.
The topic is: {sub_topic}
The list must be without numbers. The questions should be separated by a newline character. There must be no other text than the list.
"""

In [5]:
RESPONSE_PROMPT_TEMPLATE = """Given a question, generate 2 responses that could be given to that question. Your response should be in a list format.
The question is: {question}

The list must be in the format:

RESPONSE A: Response A text here
RESPONSE B: Response B text here
"""

In [9]:
nvidia_api_key = os.getenv('NVIDIA_API_KEY')
print(nvidia_api_key)

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


In [10]:
client = OpenAI(base_url="https://integrate.api.nvidia.com/v1", api_key=nvidia_api_key)

In [11]:
topic = "Computer Science"
n_subtopics = 3
n_questions = 2

In [12]:
def generate_subtopics(client, topic, n_subtopics):
    prompt = TOPIC_GENERATION_PROMPT_TEMPLATE.format(topic=topic, n_subtopics=n_subtopics)
    response = client.chat.completions.create(
        model="nvidia/nemotron-4-340b-instruct",
        messages=[
            {"role" : "user",
             "content" : prompt}
        ],
        temperature=0.2,
        top_p=0.7,
        max_tokens=1024,
    )
    return response

In [13]:
responses = generate_subtopics(client, topic=topic, n_subtopics=n_subtopics)

In [14]:
print(responses.choices[0].message.content)

Data Structures, Algorithms, Artificial Intelligence



In [15]:
subtopic_list = [x.strip() for x in responses.choices[0].message.content.split(",")]
subtopic_list

['Data Structures', 'Algorithms', 'Artificial Intelligence']

In [16]:
def generate_questions(client, sub_topic, n_questions):
    prompt = QUESTION_PROMPT_TEMPLATE.format(sub_topic=sub_topic, n_questions=n_questions)
    response = client.chat.completions.create(
        model="nvidia/nemotron-4-340b-instruct",
        messages=[
            {"role" : "user",
             "content" : prompt}
        ],
        temperature=0.2,
        top_p=0.7,
        max_tokens=1024,
    )
    return response.choices[0].message.content

In [17]:
def question_generator(client, subtopic_list, n_question):
    question_list = [generate_questions(client, subtopic, n_question) for subtopic in tqdm(subtopic_list)]
    return question_list

In [18]:
question_list = question_generator(client, subtopic_list, n_questions)

100%|██████████| 3/3 [00:09<00:00,  3.15s/it]


In [19]:
question_list

['What are the key differences between array and linked list data structures?\n\nCan you explain the concept of a hash table and its advantages in data storage and retrieval?\n',
 'What are some of the most common types of algorithms used in computer science?\n\nCan you explain how algorithms are used in everyday applications, such as social media or search engines?\n',
 'What are the potential benefits and risks of artificial intelligence for society?\n\nHow can we ensure that the development and use of artificial intelligence is ethical and unbiased?\n']

In [20]:
question_list_formatted = []

for question_set in question_list:
    question_list_formatted += question_set.split("\n")

question_list_formatted = [x for x in question_list_formatted if x]

In [21]:
question_list_formatted

['What are the key differences between array and linked list data structures?',
 'Can you explain the concept of a hash table and its advantages in data storage and retrieval?',
 'What are some of the most common types of algorithms used in computer science?',
 'Can you explain how algorithms are used in everyday applications, such as social media or search engines?',
 'What are the potential benefits and risks of artificial intelligence for society?',
 'How can we ensure that the development and use of artificial intelligence is ethical and unbiased?']

In [22]:
len(question_list_formatted)

6

In [23]:
def generate_responses(client, question):
    prompt = RESPONSE_PROMPT_TEMPLATE.format(question=question)
    response = client.chat.completions.create(
        model="nvidia/nemotron-4-340b-instruct",
        messages=[
            {"role" : "user",
             "content" : prompt}
        ],
        temperature=0.2,
        top_p=0.7,
        max_tokens=1024,
    )
    return response.choices[0].message.content

In [24]:
def response_generator(client, question_list):
    response_list = [generate_responses(client, question) for question in tqdm(question_list)]
    return response_list

In [25]:
question_response_list = response_generator(client, question_list_formatted)

100%|██████████| 6/6 [01:07<00:00, 11.19s/it]


In [26]:
question_response_pair_list = []
for question, response_set in zip(question_list_formatted, question_response_list):
    question_response_pair_list.append(
        {
            "question" : question,
            "responses" : {
                "response_a" : {"response" : response_set.split("RESPONSE B:")[0].replace("RESPONSE A:", "").strip()},
                "response_b" : {"response" : response_set.split("RESPONSE B:")[-1].split("\n\n")[0].strip()}
            },
        }
    )

In [27]:
question_response_pair_list 

[{'question': 'What are the key differences between array and linked list data structures?',
  'responses': {'response_a': {'response': 'One key difference between array and linked list data structures is that arrays have a fixed size, while linked lists can grow or shrink in size dynamically. Additionally, arrays allow for constant time access to any element by index, whereas linked lists require sequential access from the head or tail, resulting in linear time complexity for accessing an element by index.'},
   'response_b': {'response': 'Another difference between array and linked list data structures is that arrays are stored contiguously in memory, which can lead to issues with memory allocation and fragmentation. Linked lists, on the other hand, are stored non-contiguously, with each node containing a pointer to the next node. This allows for easy insertion and deletion of elements, but can result in additional memory overhead due to the need to store pointers.'}}},
 {'question':

### Using Nemotron-4 340B Reward to Generate a Preference Dataset
Equipped with a dataset that has questions that have response pairs, a preference dataset that is compatible with DPO training, SteerLM reward model training, and RLHF reward model training can be generated straightforwardly thanks to [Nemotron-4 340B Reward](https://build.nvidia.com/nvidia/nemotron-4-340b-reward) available through [build.nvidia.com](https://build.nvidia.com/explore/discover)!

First, an example of how to use the endpoint.

1. You must both provide a user message, and an assistant message!
2. It will return a chat-style message with the scores, as well as the scores in the logprogs parameter.

The response package will include scores related to five attributes:

1. Helpfulness: Overall helpfulness of the response to the prompt.
2. Correctness: Inclusion of all pertinent facts without errors.
3. Coherence: Consistency and clarity of expression.
4. Complexity: Intellectual depth required to write response (i.e. whether the response can be written by anyone with basic language competency or requires deep domain expertise).
5. Verbosity: Amount of detail included in the response, relative to what is asked for in the prompt.

In [28]:
messages = [
    {
        "role" : "user",
        "content" : "Hello!"
    },
    {
        "role": "assistant",
        "content": "Hello! How can I help you today?"
    },
]

In [29]:
response = client.chat.completions.create(model="nvidia/nemotron-4-340b-reward", messages=messages)

In [30]:
response

ChatCompletion(id='559708ee-9b60-42c8-a87f-2eaaf158b443', choices=[Choice(finish_reason='length', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='helpfulness', bytes=None, logprob=4.09375, top_logprobs=[]), ChatCompletionTokenLogprob(token='correctness', bytes=None, logprob=4.03125, top_logprobs=[]), ChatCompletionTokenLogprob(token='coherence', bytes=None, logprob=4.25, top_logprobs=[]), ChatCompletionTokenLogprob(token='complexity', bytes=None, logprob=0.5703125, top_logprobs=[]), ChatCompletionTokenLogprob(token='verbosity', bytes=None, logprob=1.109375, top_logprobs=[])], refusal=None), message=[ChatCompletionMessage(content='helpfulness:4.09375,correctness:4.03125,coherence:4.25,complexity:0.5703125,verbosity:1.109375', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None)])], created=None, model=None, object=None, service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=1, prompt_t

In [31]:
response.choices[0].logprobs.content

[ChatCompletionTokenLogprob(token='helpfulness', bytes=None, logprob=4.09375, top_logprobs=[]),
 ChatCompletionTokenLogprob(token='correctness', bytes=None, logprob=4.03125, top_logprobs=[]),
 ChatCompletionTokenLogprob(token='coherence', bytes=None, logprob=4.25, top_logprobs=[]),
 ChatCompletionTokenLogprob(token='complexity', bytes=None, logprob=0.5703125, top_logprobs=[]),
 ChatCompletionTokenLogprob(token='verbosity', bytes=None, logprob=1.109375, top_logprobs=[])]

In [32]:
def get_scores_from_response(openai_response_template):
    logprobs = openai_response_template.choices[0].logprobs.content
    score_dict = {}
    for score in logprobs:
        score_dict[score.token] = score.logprob
    return score_dict

In [33]:
get_scores_from_response(response)

{'helpfulness': 4.09375,
 'correctness': 4.03125,
 'coherence': 4.25,
 'complexity': 0.5703125,
 'verbosity': 1.109375}

In [34]:
def get_response_and_scores(client, model, question, response_content):
    messages = [
        {
            "role": "user",
            "content": question
        },
        {
            "role": "assistant",
            "content": response_content
        },
    ]

    response = client.chat.completions.create(
        model=model,
        messages=messages,
    )

    scores = get_scores_from_response(response)
    return scores

In [35]:
def process_question_response_pairs(client, model, question_response_score_list):
    results = []

    for question_response_pair in tqdm(question_response_score_list):
        question = question_response_pair["question"]
        resp_a = get_response_and_scores(client, model, question, question_response_pair["responses"]["response_a"]["response"])
        resp_b = get_response_and_scores(client, model, question, question_response_pair["responses"]["response_b"]["response"])
        results.append((resp_a, question_response_pair, "response_a"))
        results.append((resp_b, question_response_pair, "response_b"))

    for result, question_response_pair, response_key in results:
        question_response_pair["responses"][response_key].update(result)

In [36]:
question_response_score_list = question_response_pair_list.copy()
process_question_response_pairs(client, "nvidia/nemotron-4-340b-reward", question_response_score_list)

100%|██████████| 6/6 [00:09<00:00,  1.58s/it]


In [37]:
question_response_score_list[0]

{'question': 'What are the key differences between array and linked list data structures?',
 'responses': {'response_a': {'response': 'One key difference between array and linked list data structures is that arrays have a fixed size, while linked lists can grow or shrink in size dynamically. Additionally, arrays allow for constant time access to any element by index, whereas linked lists require sequential access from the head or tail, resulting in linear time complexity for accessing an element by index.',
   'helpfulness': 3.421875,
   'correctness': 3.3125,
   'coherence': 3.859375,
   'complexity': 1.8671875,
   'verbosity': 1.0859375},
  'response_b': {'response': 'Another difference between array and linked list data structures is that arrays are stored contiguously in memory, which can lead to issues with memory allocation and fragmentation. Linked lists, on the other hand, are stored non-contiguously, with each node containing a pointer to the next node. This allows for easy in

In [38]:
threshold = 3.0

In [None]:
with open(f'synthetic_data_with_scores_filtered-{threshold}.jsonl', 'w') as f:
    for item in question_response_score_list:
        question = item["question"]
        response_a = item["responses"]["response_a"]
        response_b = item["responses"]["response_b"]
        response_a["question"] = question
        response_b["question"] = question
        if response_a["helpfulness"] < threshold and response_b["helpfulness"] < threshold:
            continue
        f.write(json.dumps(response_a))
        f.write('\n')
        f.write(json.dumps(response_b))
        f.write('\n')