In [3]:
from utils.models import get_model, get_tokenizer, ground_truth_reward_model
from utils.data_loader import get_data
from utils.reward_scoring import generate_output, get_ground_truth_rewards, truncate_batch
from utils.data_preprocessing import response_quality_control
from peft import PeftModel
import pandas as pd
import numpy as np

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


In [4]:

# CONFIG
batch_size = 100
for_sft = False
for_dpo = True
prompt_length = 20
max_length = 196
# Get Model
wdir = '.'
model_name = 'google/gemma-3-270m'

base_model = get_model(model_name).to('cuda')
base_model = PeftModel.from_pretrained(base_model,
                                      f'{wdir}/models/sft/best_model')
tok = get_tokenizer(model_name)

# Get data
base_data = get_data('train', 0, 12000)

Loading rows 0 to 12000 from 'stanfordnlp/imdb'...


In [5]:
first_responses = []
second_responses = []
negative_responses = []
first_response_scores = []
second_response_scores = []
negative_response_scores = []
# Loop from 0 to the end of the list, jumping by batch_size each time
for i in range(0, len(base_data), batch_size):
    # Slice the list to get the current batch
    batch = base_data[i : i + batch_size]

    print(f"Processing batch starting at index {i}: {batch[0][:20]}")
    responses = generate_output(base_model, tok, batch, prompt_length, max_length,2)
    scores = get_ground_truth_rewards(ground_truth_reward_model, responses)

    batch = truncate_batch(tok, batch, max_length)
    batch_score = get_ground_truth_rewards(ground_truth_reward_model, batch)

    first_responses.extend(responses[0::2])
    second_responses.extend(responses[1::2])
    negative_responses.extend(batch)
    first_response_scores.extend(scores[0::2])
    second_response_scores.extend(scores[1::2])
    negative_response_scores.extend(batch_score)


# Create a DataFrame from the lists
df = pd.DataFrame({
    'first_response': first_responses,
    'second_response': second_responses,
    'negative_response': negative_responses,
    'first_response_score': first_response_scores,
    'second_response_score': second_response_scores,
    'negative_response_score': negative_response_scores
})

# Example usage:
df_medium = response_quality_control(df, 6000)
df_low = response_quality_control(df, 12000)
df_high = df[['first_response', 'second_response', 'first_response_score', 'second_response_score']]


# Generate one permutation of indices from the original df
shuffled_index = np.random.permutation(df.index)

# Display the first few rows of the DataFrame
df_medium.reindex(shuffled_index).reset_index(drop=True).to_csv(f'{wdir}/data/dpo_data_medium.csv', index=False)
df_low.reindex(shuffled_index).reset_index(drop=True).to_csv(f'{wdir}/data/dpo_data_low.csv', index=False)
df_high.reindex(shuffled_index).reset_index(drop=True).to_csv(f'{wdir}/data/dpo_data_high.csv', index=False)

Processing batch starting at index 0: I rented I AM CURIOU
Processing batch starting at index 100: Terrible movie. Nuff
Processing batch starting at index 200: This is an action We
Processing batch starting at index 300: Unlike "The Adventur
Processing batch starting at index 400: This was an incredib
Processing batch starting at index 500: When I ordered this 


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Processing batch starting at index 600: I've been going thro
Processing batch starting at index 700: "Feast of All Saints
Processing batch starting at index 800: I've come to realise
Processing batch starting at index 900: Honestly before I wa
Processing batch starting at index 1000: Although I have to a
Processing batch starting at index 1100: The only thing remar
Processing batch starting at index 1200: Della Myers (Kim Bas
Processing batch starting at index 1300: It's hard to believe
Processing batch starting at index 1400: I wish more movies w
Processing batch starting at index 1500: Start of with the go
Processing batch starting at index 1600: NATIONAL LAMPOON'S C
Processing batch starting at index 1700: This film is not dev
Processing batch starting at index 1800: Formula flick of guy
Processing batch starting at index 1900: Need a lesson in pur
Processing batch starting at index 2000: Darcy and her young 
Processing batch starting at index 2100: I go to UCSB and tak
Processing b

OSError: Cannot save file into a non-existent directory: 'data'

In [8]:
# Display the first few rows of the DataFrame
df_medium.reindex(shuffled_index).reset_index(drop=True).to_csv(f'{wdir}/data/dpo_data_medium.csv', index=False)
df_low.reindex(shuffled_index).reset_index(drop=True).to_csv(f'{wdir}/data/dpo_data_low.csv', index=False)
df_high.reindex(shuffled_index).reset_index(drop=True).to_csv(f'{wdir}/data/dpo_data_high.csv', index=False)