In [3]:
!pip install  -q bert-score accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.9/116.9 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m108.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m96.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
import os
import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import DataLoader
import bert_score

from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    logging,
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, PPOTrainer, PPOConfig
import pandas as pd
import re

# Optional: For monitoring training progress
%load_ext tensorboard


In [5]:
import torch
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

################################################################################
# Step 1: Define and process the raw dataset
################################################################################

# Specify the path to your text file (You can use a file from Google Drive or upload it manually)
file_path = '/content/sample_data/training_dataset_2000.txt'  # Adjust the path as necessary

# Read the data from the text file
with open(file_path, 'r', encoding='utf-8') as file:
    lines = file.readlines()

# Split the data into Bio, Interests, and Conversation Starter
def process_raw_dataset(lines):
    data = []
    for i in range(0, len(lines), 3):  # Assuming every 3 lines are Bio, Interests, and Conversation Starter
        # Check if there are enough lines left for a complete set
        if i + 2 < len(lines):
            bio = lines[i].strip().replace('Bio:', '').strip()
            interests = lines[i + 1].strip().replace('Interests:', '').strip()
            conversation_starter = lines[i + 2].strip().replace('Conversation Starter:', '').strip()

            # Only add non-empty bio, interests, and conversation starter
            if bio and interests and conversation_starter:
                data.append({
                    "Bio": bio,
                    "Interests": interests,
                    "Conversation Starter": conversation_starter
                })
        else:
            print(f"Skipping incomplete entry at lines {i} to {i+2}")

    return data

# Process the raw data
data = process_raw_dataset(lines)

# Define a function to transform the data into the LLaMA 2 format
def transform_to_llama_format(example):
    bio = example['Bio']
    interests = example['Interests']
    conversation_starter = example['Conversation Starter']

    # The bio and interests are part of the human input, and the conversation starter is the assistant's response
    prompt = f"<s>[INST] Bio: {bio} Interests: {interests} [/INST] {conversation_starter} </s>"
    return {"text": prompt}

# Apply the transformation to all data
formatted_data = [transform_to_llama_format(item) for item in data]

# Debug: print the first few transformed entries
print("First 5 transformed entries:")
for entry in formatted_data[:5]:
    print(entry)

# Convert the list to a Huggingface Dataset
dataset = Dataset.from_dict({"text": [item['text'] for item in formatted_data]})

################################################################################
# Step 2: Model, dataset, and fine-tuning configuration
################################################################################

# The model you want to train from the Hugging Face hub
model_name = "EleutherAI/gpt-neo-1.3B"

# Name for the fine-tuned model
new_model = "Llama-2-7b-chat-finetune"

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

# TrainingArguments parameters
output_dir = "./results"
num_train_epochs = 1
fp16 = False
bf16 = False
per_device_train_batch_size = 4
per_device_eval_batch_size = 4
gradient_accumulation_steps = 1
gradient_checkpointing = True
max_grad_norm = 0.3
learning_rate = 2e-4
weight_decay = 0.001
optim = "paged_adamw_32bit"
lr_scheduler_type = "cosine"
max_steps = -1
warmup_ratio = 0.03
group_by_length = True
save_steps = 0
logging_steps = 25

# SFT parameters
max_seq_length = None
packing = False
device_map = {"": 0}

################################################################################
# Step 3: Define the compute dtype for 4-bit model
################################################################################

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

################################################################################
# Step 4: Load the base model and tokenizer
################################################################################

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

################################################################################
# Step 5: Load LoRA configuration
################################################################################

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

################################################################################
# Step 6: Set training parameters
################################################################################

training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

################################################################################
# Step 7: Train the model with SFTTrainer
################################################################################

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

# Train the model
trainer.train()

# # Save the fine-tuned model
# model.save_pretrained(new_model)
# tokenizer.save_pretrained(new_model)


First 5 transformed entries:
{'text': '<s>[INST] Bio: A bookworm who loves coffee and quiet mornings. I’m always looking for my next read. Interests: Adventure, Camping, Road trips [/INST] What’s your go-to book when you’re out on a camping trip </s>'}
{'text': '<s>[INST] Bio: Proud dog mom! I love taking long walks with my pup and exploring new parks. Interests: Fitness, Pets, Food [/INST] What’s your dogs name? I would love to meet it and perhaps a cute picnic in one of your favorite parks? </s>'}
{'text': '<s>[INST] Bio: I’m a night owl who loves stargazing and late-night conversations. Looking for someone to share deep talks with. Interests: Adventure, Camping, Road trips [/INST] I love camping too! What’s your favorite spot for stargazing? </s>'}
{'text': '<s>[INST] Bio: Entrepreneur by day, DJ by night. I love music, dancing, and creating new things. Interests: Music, Technology, dance [/INST] I would love to come to one of your next DJ night, that sounds so cool! </s>'}
{'text':

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

Some weights of GPTNeoForCausalLM were not initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]



Map:   0%|          | 0/11 [00:00<?, ? examples/s]

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss


TrainOutput(global_step=3, training_loss=3.802546819051107, metrics={'train_runtime': 2.7519, 'train_samples_per_second': 3.997, 'train_steps_per_second': 1.09, 'total_flos': 3109973852160.0, 'train_loss': 3.802546819051107, 'epoch': 1.0})

In [6]:
# Step 1: Define and process the test dataset

# Specify the path to your test text file
test_file_path = '/content/sample_data/test.txt'  # Adjust the path as necessary

# Read the data from the test text file
with open(test_file_path, 'r', encoding='utf-8') as file:
    test_lines = file.readlines()

# Function to process raw test data
def process_test_data(lines):
    data = []
    for i in range(0, len(lines), 3):
        if i + 2 < len(lines):
            bio = lines[i].strip().replace('Bio:', '').strip()
            interests = lines[i + 1].strip().replace('Interests:', '').strip()
            conversation_starter = lines[i + 2].strip().replace('Conversation Starter:', '').strip()
            if bio and interests and conversation_starter:
                data.append({
                    "Bio": bio,
                    "Interests": interests,
                    "Conversation Starter": conversation_starter
                })
        else:
            print(f"Skipping incomplete entry at lines {i} to {i+2}")
    return data

# Process the test data
test_data = process_test_data(test_lines)

# Debug: Print the number of processed test entries
print(f"Total test entries processed: {len(test_data)}")


Total test entries processed: 10


Generate Conversation Starters Using the Fine-Tuned Model

In [8]:
import re
import re

# Function to generate conversation starters
def generate_conversation_starter(bio, interests, max_length=100):
    prompt = f"Bio: {bio}\nInterests: {interests}\n flirty one sentence conversation starter:"

    # Tokenize the prompt and generate response
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(model.device)
    output = model.generate(
        **inputs,
        max_length=max_length,
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,  # Enable sampling for more variety
        top_k=50,        # Use top-k sampling for more focused generation
        temperature=0.7  # Lower temperature for better coherence
    )

    # Decode the generated output
    conversation_starter = tokenizer.decode(output[0], skip_special_tokens=True)

    # Remove the prompt from the generated text if necessary
    if conversation_starter.startswith(prompt):
        conversation_starter = conversation_starter[len(prompt):].strip()

    # Extract the first line (or two) as the final conversation starter
    conversation_starter_sentences = re.split(r'[.?!]', conversation_starter)

    if len(conversation_starter_sentences) > 1:
        # Return the first two sentences (if available)
        return f"{conversation_starter_sentences[0].strip()}. {conversation_starter_sentences[1].strip()}."
    elif conversation_starter_sentences:
        # Return the first sentence (if there's only one)
        return conversation_starter_sentences[0].strip() + "."
    else:
        # Fallback if no valid sentence is found
        return "Could you tell me more about yourself?"

# Generate conversation starters for the test data
test_responses = [generate_conversation_starter(item['Bio'], item['Interests']) for item in test_data]

# Debug: Print some generated responses
print("Generated Conversation Starters:")
for idx, response in enumerate(test_responses[:10]):  # Show first 10 responses
    print(f"Generated Response {idx + 1}: {response}")



Generated Conversation Starters:
Generated Response 1: Do you have a friend who’s got a sense of humor. I’m a very outgoing person.
Generated Response 2: "I met a guy and we just started talking and I went to his house and had sex with him and he didn't know I was a writer, that's how we met. ".
Generated Response 3: I love when the weather comes up cold; but, I hate when it’s hot. Current Affiliation: I am a single mom, who is the most important person in my life.
Generated Response 4: “I found out about a new coffee shop in town that has a patio with a nice view. You must visit their coffee shop.
Generated Response 5: “I’m really interested in the way that you can use your body to express yourself in a way that is really personal and not just in a superficial way. So that’s why I’m interested in how you can use your body.
Generated Response 6: “What is the name of this movie. ”

Bored: I like a good movie with a good plot.
Generated Response 7: I have a friend who is a fitness fanati

Verify and Prepare Data for Manual Rating

In [9]:
# Function to load true conversation starters
def load_true_conversation_starters(lines):
    starters = []
    for i in range(0, len(lines), 3):
        if i + 2 < len(lines):
            conversation_starter = lines[i + 2].strip().replace('Conversation Starter:', '').strip()
            if conversation_starter:
                starters.append(conversation_starter)
    return starters

# Load the ground truth conversation starters
true_conversation_starters = load_true_conversation_starters(test_lines)

# Debug: Print a sample of true conversation starters
for idx, starter in enumerate(true_conversation_starters[:5]):  # Show first 5 true conversation starters
    print(f"True Conversation Starter {idx + 1}: {starter}")


True Conversation Starter 1: "What’s the toughest hike you’ve ever conquered? I’m up for the next one—are you?"
True Conversation Starter 2: "What’s your favorite line from a poem you’ve written? I’d love to hear it!"
True Conversation Starter 3: "Which country totally surprised you when you visited? Maybe we could plan the next surprise together!"
True Conversation Starter 4: "Where’s the best spot to watch the sunset with a cup of coffee? I’m all ears for recommendations."
True Conversation Starter 5: "What’s the one dish you absolutely have to try at least once in your life?"


Create a DataFrame for Evaluation

In [10]:
import pandas as pd

# Prepare data for the DataFrame
data_eval = []
for idx in range(len(test_data)):
    bio = test_data[idx]['Bio']
    interests = test_data[idx]['Interests']
    generated_starter = test_responses[idx] if idx < len(test_responses) else "No Generated Starter"
    true_starter = true_conversation_starters[idx] if idx < len(true_conversation_starters) else "No True Starter"
    data_eval.append([bio, interests, generated_starter, true_starter])

# Create the DataFrame
df = pd.DataFrame(data_eval, columns=['Bio', 'Interests', 'Generated Conversation Starter', 'True Conversation Starter'])

# Add an empty column for manual rating
df['Manual Rating'] = ""

# Display the first 5 rows to verify
print(df.head())

# Save the DataFrame to a CSV for manual rating
df.to_csv('/content/sample_data/conversation_starters_evaluation.csv', index=False)
print("Rated data saved to 'conversation_starters_evaluation.csv'")


                                                 Bio  \
0  A weekend warrior who loves hiking and explori...   
1  Creative writer by day, poetry enthusiast by n...   
2  A traveler who’s seen 30 countries and countin...   
3  Coffee addict and sunset chaser. Let’s talk ab...   
4  Full-time student, part-time foodie. I’m alway...   

                         Interests  \
0          Hiking, Fitness, Nature   
1      Writing, Poetry, Literature   
2  Traveling, Photography, Culture   
3          Coffee, Sunsets, Travel   
4          Food, Cooking, Learning   

                      Generated Conversation Starter  \
0  Do you have a friend who’s got a sense of humo...   
1  "I met a guy and we just started talking and I...   
2  I love when the weather comes up cold; but, I ...   
3  “I found out about a new coffee shop in town t...   
4  “I’m really interested in the way that you can...   

                           True Conversation Starter Manual Rating  
0  "What’s the toughest hike

TEST on comparision test set

In [11]:
# Specify the path to your new comparison test set
comparison_test_file_path = '/content/sample_data/test_for_comparision.txt'

# Read the data from the new test set
with open(comparison_test_file_path, 'r', encoding='utf-8') as file:
    comparison_test_lines = file.readlines()

# Process the comparison test set similar to the original test set
comparison_test_data = process_test_data(comparison_test_lines)

# Debug: Print the number of processed comparison test entries
print(f"Total comparison test entries processed: {len(comparison_test_data)}")


Total comparison test entries processed: 10


In [12]:
# Generate conversation starters for the comparison test data
comparison_test_responses = [generate_conversation_starter(item['Bio'], item['Interests']) for item in comparison_test_data]

# Debug: Print some generated responses from the new test set
print("Generated Conversation Starters for Comparison Test:")
for idx, response in enumerate(comparison_test_responses[:10]):  # Show first 10 responses
    print(f"Generated Response {idx + 1}: {response}")


  return fn(*args, **kwargs)


Generated Conversation Starters for Comparison Test:
Generated Response 1: “I’m a big fan of the internet but I also have a strong desire to write a few lines to people who may have seen my blog,”

Mentoring: I try to mentor students. I have a few groups that I mentor and I’m always.
Generated Response 2: “My main thing is to keep that energy up. I usually don’t have time for a social life.
Generated Response 3: “You are so beautiful, I can’t believe I’m meeting you today. ”

I can’t believe I’m meeting you today, babe.
Generated Response 4: "Have you ever found a great solution to a problem. ”

My name is Shaya and I'm a home improvement junkie.
Generated Response 5: “My favorite movie is…”

Why you should be a writer: I’m just passionate about writing and have a lot to say. I love books, movies, and TV.
Generated Response 6: I am a passionate woman who loves to create. I am a woman who is constantly trying new things and always trying out new clothes, colors, and styles.
Generated Re

In [13]:
# Load the ground truth conversation starters for the comparison test set
# Load the ground truth conversation starters for the comparison test set
comparison_true_conversation_starters = load_true_conversation_starters(comparison_test_lines)

# Debug: Print a sample of true conversation starters for comparison
print("True Conversation Starters for Comparison Test:")
for idx, starter in enumerate(comparison_true_conversation_starters[:5]):  # Show first 5 true conversation starters
    print(f"True Conversation Starter {idx + 1}: {starter}")



True Conversation Starters for Comparison Test:
True Conversation Starter 1: "What’s the most underrated artist you’re listening to right now? I need some new tracks."
True Conversation Starter 2: "What’s your game day ritual? Maybe I can cheer you on at the next match!"
True Conversation Starter 3: "What’s your favorite beach to unwind at? Let’s see if we have the same spot."
True Conversation Starter 4: "What’s your latest project? Maybe we can build something awesome together!"
True Conversation Starter 5: "What’s your ultimate feel-good movie for a lazy Sunday? I need some recs!"


BERT-SCORE

In [14]:
from bert_score import score

# Calculate BERTScore for the generated and true conversation starters
P, R, F1 = score(comparison_test_responses, comparison_true_conversation_starters, lang='en', verbose=True)

# Print the BERTScore for Precision (P), Recall (R), and F1-score
print(f"BERTScore Precision: {P.mean():.4f}")
print(f"BERTScore Recall: {R.mean():.4f}")
print(f"BERTScore F1-score: {F1.mean():.4f}")





tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.37 seconds, 27.36 sentences/sec
BERTScore Precision: 0.8568
BERTScore Recall: 0.8564
BERTScore F1-score: 0.8565


Define the Reward Model Class

In [15]:
class RewardModel(nn.Module):
    def __init__(self, base_model):
        super(RewardModel, self).__init__()
        self.base_model = base_model
        self.reward_head = nn.Linear(base_model.config.hidden_size, 1)  # Predicts a single reward score

    def forward(self, input_ids, attention_mask=None):
        # Forward pass through the base model to obtain hidden states
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)

        # Extract the last hidden state of the last token
        hidden_states = outputs.hidden_states[-1]  # Shape: [batch_size, sequence_length, hidden_size]
        last_hidden_state = hidden_states[:, -1, :]  # Shape: [batch_size, hidden_size]

        # Compute the reward score
        reward = self.reward_head(last_hidden_state).squeeze()  # Shape: [batch_size]
        return reward


 Load and Prepare the Reward Dataset

In [16]:
# Define the path to your rated dataset
csv_file = "/content/sample_data/conversation_starters_evaluation.csv"  # Adjust this path accordingly

# Load the reward dataset
reward_data = pd.read_csv(csv_file)

# Convert the pandas DataFrame to a Hugging Face Dataset
reward_dataset = Dataset.from_pandas(reward_data)


Define the Collate Function for DataLoader

In [17]:
def collate_fn(batch):
    # Combine Generated and True Conversation Starters
    input_texts = [f"Generated: {b['Generated Conversation Starter']} True: {b['True Conversation Starter']}" for b in batch]

    # Extract manual ratings
    ratings = torch.tensor([b['Manual Rating'] for b in batch], dtype=torch.float32)

    # Tokenize the input texts
    encoding = tokenizer(input_texts, return_tensors='pt', padding=True, truncation=True)

    return encoding, ratings


Create DataLoader for the Reward Dataset

In [18]:
# Define batch size
batch_size = 8  # Adjust based on your GPU memory

# Create DataLoader
reward_dataloader = DataLoader(
    reward_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=2,  # Adjust based on your CPU
    pin_memory=True
)


Initialize the Reward Model

In [19]:
# Initialize the reward model using the fine-tuned model's base
reward_model = RewardModel(model).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))


Define Optimizer and Loss Function

In [20]:
# Define optimizer
optimizer = AdamW(reward_model.parameters(), lr=5e-5)

# Define loss function
loss_fn = nn.MSELoss()  # Mean Squared Error Loss for continuous reward scores


Training Loop for the Reward Model

In [21]:
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
from datasets import Dataset
import pandas as pd
import re

# Assuming RewardModel class is already defined
class RewardModel(nn.Module):
    def __init__(self, base_model):
        super(RewardModel, self).__init__()
        self.base_model = base_model
        self.reward_head = nn.Linear(base_model.config.hidden_size, 1)  # Use hidden_size of the model

    def forward(self, input_ids, attention_mask=None):
        # Get the model outputs (hidden states)
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)

        # Get the hidden states from the last hidden layer
        hidden_states = outputs.hidden_states[-1]  # shape: [batch_size, sequence_length, hidden_size]

        # Take the hidden state of the last token in the sequence for each sample
        last_hidden_state = hidden_states[:, -1, :]  # shape: [batch_size, hidden_size]

        # Apply the reward head to get the reward score
        reward = self.reward_head(last_hidden_state).squeeze()  # Predict the reward score
        return reward

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize and move the reward model to the device
reward_model = RewardModel(model).to(device)

# Define the optimizer and loss function
optimizer = AdamW(reward_model.parameters(), lr=5e-5)
loss_fn = nn.MSELoss()  # Use Mean Squared Error Loss for the continuous reward scores

# Training loop for the Reward Model
epochs = 10
reward_model.train()
for epoch in range(epochs):
    total_loss = 0.0
    for batch in reward_dataloader:
        encoding, ratings = batch
        input_ids = encoding['input_ids'].to(device)
        attention_mask = encoding['attention_mask'].to(device)
        ratings = ratings.to(device)

        # Forward pass through the model
        optimizer.zero_grad()
        predicted_rewards = reward_model(input_ids=input_ids, attention_mask=attention_mask).squeeze()

        # Compute the loss
        loss = loss_fn(predicted_rewards, ratings)
        loss.backward()

        # Update parameters
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(reward_dataloader)
    print(f"Epoch {epoch + 1}/{epochs} - Average Loss: {avg_loss:.4f}")


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch 1/10 - Average Loss: 5.2600
Epoch 2/10 - Average Loss: 5.4504
Epoch 3/10 - Average Loss: 6.3825
Epoch 4/10 - Average Loss: 2.8421
Epoch 5/10 - Average Loss: 4.7173
Epoch 6/10 - Average Loss: 2.6537
Epoch 7/10 - Average Loss: 3.6472
Epoch 8/10 - Average Loss: 1.9862
Epoch 9/10 - Average Loss: 1.4932
Epoch 10/10 - Average Loss: 1.4913


In [None]:
# # Define the path to save the reward model
# reward_model_path = "./reward_model.pth"

# # Save the model state_dict
# torch.save(reward_model.state_dict(), reward_model_path)
# print(f"Reward model saved at: {reward_model_path}")


Reward model saved at: ./reward_model.pth


Fine-Tuning with PPO Using the Reward Model

Create a PPO Dataset Class

In [22]:
from torch.utils.data import Dataset

class PPODataset(Dataset):
    def __init__(self, dataframe):
        """
        Args:
            dataframe (pd.DataFrame): DataFrame containing 'Bio', 'Interests',
                                      'Generated Conversation Starter', and 'Manual Rating'.
        """
        self.prompts = dataframe.apply(
            lambda row: f"Bio: {row['Bio']}\nInterests: {row['Interests']}\nflirty one sentence conversation starter:",
            axis=1
        ).tolist()
        self.responses = dataframe['Generated Conversation Starter'].tolist()
        self.rewards = dataframe['Manual Rating'].tolist()

    def __len__(self):
        return len(self.prompts)

    def __getitem__(self, idx):
        return {
            'prompt': self.prompts[idx],
            'response': self.responses[idx],
            'reward': self.rewards[idx]
        }


Initialize the PPO Dataset and DataLoader

In [23]:
import pandas as pd
from torch.utils.data import DataLoader

# Load the rated dataset
csv_file = "/content/sample_data/conversation_starters_evaluation.csv"  # Adjust this path as necessary
reward_data = pd.read_csv(csv_file)

# Initialize the PPO Dataset
ppo_dataset = PPODataset(reward_data)

# Create a DataLoader for PPO training
ppo_dataloader = DataLoader(
    ppo_dataset,
    batch_size=1,       # Set to 1 for simplicity; adjust as needed
    shuffle=True,       # Shuffle for better training
    num_workers=2,      # Adjust based on your CPU
    pin_memory=True     # Improves data transfer speed to GPU
)



Initializing the PPO Trainer

In [24]:
import torch
from trl import PPOTrainer, PPOConfig
from trl.models import AutoModelForCausalLMWithValueHead
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize and move the PPO model to the device
model_with_value_head = AutoModelForCausalLMWithValueHead.from_pretrained(model)
model_with_value_head.gradient_checkpointing_enable()
model_with_value_head.to(device)




AutoModelForCausalLMWithValueHead(
  (pretrained_model): GPTNeoForCausalLM(
    (transformer): GPTNeoModel(
      (wte): Embedding(50257, 2048)
      (wpe): Embedding(2048, 2048)
      (drop): Dropout(p=0.0, inplace=False)
      (h): ModuleList(
        (0-23): 24 x GPTNeoBlock(
          (ln_1): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
          (attn): GPTNeoAttention(
            (attention): GPTNeoSelfAttention(
              (attn_dropout): Dropout(p=0.0, inplace=False)
              (resid_dropout): Dropout(p=0.0, inplace=False)
              (k_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
              (v_proj): Linear4bit(
                in_features=2048, out_features=2048, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=64, bias=False)
                )
 

Define PPO Configuration

In [25]:
# Define PPO Configuration with Memory Optimizations
ppo_config = PPOConfig(
    learning_rate=5e-6,      # Adjust as necessary
    batch_size=1,            # Reduced batch size for memory constraints
    ppo_epochs=4,            # Number of PPO iterations over the data
    max_grad_norm=0.3,       # Gradient clipping to prevent exploding gradients
    log_with=None,           # Disable logging to external services
)


. Initialize PPOTrainer

In [26]:
# Initialize PPOTrainer
ppo_trainer = PPOTrainer(
    config=ppo_config,
    model=model_with_value_head,
    ref_model=model_with_value_head,   # Reference model can be the same as the base model
    tokenizer=tokenizer,
)




Defining the PPO Training Loop

In [27]:
# Number of PPO training epochs
ppo_epochs = 3

# Set the reward model to evaluation mode
reward_model.eval()

# Fine-tune with PPO
for epoch in range(ppo_epochs):
    print(f"Starting PPO Epoch {epoch + 1}/{ppo_epochs}")
    for batch in ppo_dataloader:
        prompt = batch['prompt']
        response = batch['response']
        reward = batch['reward']

        # Since the DataLoader has batch_size=1, extract the first element
        prompt = prompt[0]
        response = response[0]
        reward = reward[0]

        # Tokenize the prompt without squeezing to retain batch dimension
        tokenized_prompt = tokenizer(prompt, return_tensors='pt').to(device)
        input_ids = tokenized_prompt['input_ids']  # Shape: [1, query_length]
        attention_mask = tokenized_prompt['attention_mask']  # Shape: [1, query_length]

        # Tokenize the response without squeezing to retain batch dimension
        tokenized_response = tokenizer(response, return_tensors='pt').to(device)
        response_ids = tokenized_response['input_ids']  # Shape: [1, response_length]
        response_attention_mask = tokenized_response['attention_mask']  # Shape: [1, response_length]

        # Compute rewards using the reward model
        with torch.no_grad():
            reward_score = reward_model(input_ids=response_ids, attention_mask=response_attention_mask)
            reward_score = reward_score.squeeze()  # Shape: [] or [1] depending on implementation

        # Prepare the lists of tensors
        queries = [input_ids.squeeze(0)]                # List of torch.LongTensor with shape [query_length]
        responses = [response_ids.squeeze(0)]           # List of torch.LongTensor with shape [response_length]
        scores = [reward_score]                          # List of torch.FloatTensor

        # PPO training step with positional arguments
        ppo_trainer.step(
            queries,      # List of torch.LongTensor
            responses,    # List of torch.LongTensor
            scores        # List of torch.FloatTensor
        )

        # Free up memory
        del prompt, response, reward, tokenized_prompt, tokenized_response, reward_score, queries, responses, scores
        torch.cuda.empty_cache()

    print(f"PPO Epoch {epoch + 1} completed.")


Starting PPO Epoch 1/3


  std_scores = torch.stack(data["scores"]).std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()


PPO Epoch 1 completed.
Starting PPO Epoch 2/3
PPO Epoch 2 completed.
Starting PPO Epoch 3/3
PPO Epoch 3 completed.


In [None]:
from trl import PPOTrainer
help(PPOTrainer.step)


Help on function step in module trl.trainer.ppo_trainer:

step(self, queries: List[torch.LongTensor], responses: List[torch.LongTensor], scores: List[torch.FloatTensor])
    Run a PPO optimisation step given a list of queries, model responses, and rewards.
    
    Args:
        queries (List[`torch.LongTensor`]):
            List of tensors containing the encoded queries of shape (`query_length`)
        responses (List[`torch.LongTensor`]):
            List of tensors containing the encoded responses of shape (`response_length`)
        scores (List[`torch.FloatTensor`]):
            List of tensors containing the scores.
    
    Returns:
        `dict[str, Any]`: A summary of the training statistics



Testing on the PPO trained "model"

Testing for comparision

In [38]:
import re
import re

# Function to generate conversation starters
def generate_conversation_starter(bio, interests, max_length=100):
    prompt = f"Bio: {bio}\nInterests: {interests}\n flirty one sentence conversation starter:"

    # Tokenize the prompt and generate response
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(model.device)
    output = model.generate(
        **inputs,
        max_length=max_length,
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,  # Enable sampling for more variety
        top_k=30,        # Use top-k sampling for more focused generation
        temperature=0.5  # Lower temperature for better coherence
    )

    # Decode the generated output
    conversation_starter = tokenizer.decode(output[0], skip_special_tokens=True)

    # Remove the prompt from the generated text if necessary
    if conversation_starter.startswith(prompt):
        conversation_starter = conversation_starter[len(prompt):].strip()

    # Extract the first line (or two) as the final conversation starter
    conversation_starter_sentences = re.split(r'[.?!]', conversation_starter)

    if len(conversation_starter_sentences) > 1:
        # Return the first two sentences (if available)
        return f"{conversation_starter_sentences[0].strip()}. {conversation_starter_sentences[1].strip()}."
    elif conversation_starter_sentences:
        # Return the first sentence (if there's only one)
        return conversation_starter_sentences[0].strip() + "."
    else:
        # Fallback if no valid sentence is found
        return "Could you tell me more about yourself?"

# Generate conversation starters for the test data
test_responses = [generate_conversation_starter(item['Bio'], item['Interests']) for item in test_data]

# Debug: Print some generated responses
print("Generated Conversation Starters:")
for idx, response in enumerate(test_responses[:10]):  # Show first 10 responses
    print(f"Generated Response {idx + 1}: {response}")


Generated Conversation Starters:
Generated Response 1: I like to hike, I like to hike, I like to hike, I like to hike, I like to hike, I like to hike, I like to hike, I like to hike, I like to hike, I like to hike, I like to hike, I like to hike, I like to.
Generated Response 2: "Why do you always say the same thing. "

Tuesday, July 22, 2015

Hello, my name is J.
Generated Response 3: I’m a fan of the music of the 1970s and 80s. I’m a fan of the music of the 1970s and 80s.
Generated Response 4: “I like to go on adventures with my friends. ”

My passion is to help people to find the best way to live life, no matter how big or small, with the least amount of stress and the most joy.
Generated Response 5: “I’m a passionate and dedicated foodie. ”

I’m a passionate and dedicated foodie.
Generated Response 6: I think I’m going to be a movie buff. If you have a passion for movies, you should check out the new movie buff movie buff list.
Generated Response 7: I'm a fitness fanatic, but I'm a

In [37]:
import bert_score
from bert_score import score

# Extract ground truth conversation starters
comparison_true_conversation_starters = [item['Conversation Starter'] for item in comparison_test_data]

# Compute BERTScore
P, R, F1 = score(comparison_test_responses, comparison_true_conversation_starters, lang="en", verbose=True)

# Compute the overall average scores for Precision, Recall, and F1
avg_precision = P.mean().item()
avg_recall = R.mean().item()
avg_f1 = F1.mean().item()

# Print the overall BERTScore results
print(f"Average BERTScore Precision: {avg_precision:.4f}")
print(f"Average BERTScore Recall: {avg_recall:.4f}")
print(f"Average BERTScore F1: {avg_f1:.4f}")



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.26 seconds, 38.48 sentences/sec
Average BERTScore Precision: 0.8568
Average BERTScore Recall: 0.8564
Average BERTScore F1: 0.8565
