In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("train.csv")[:1000]


In [3]:
df.head()

Unnamed: 0,id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,30192,gpt-4-1106-preview,gpt-4-0613,"[""Is it morally right to try to have a certain...","[""The question of whether it is morally right ...","[""As an AI, I don't have personal beliefs or o...",1,0,0
1,53567,koala-13b,gpt-4-0613,"[""What is the difference between marriage lice...","[""A marriage license is a legal document that ...","[""A marriage license and a marriage certificat...",0,1,0
2,65089,gpt-3.5-turbo-0613,mistral-medium,"[""explain function calling. how would you call...","[""Function calling is the process of invoking ...","[""Function calling is the process of invoking ...",0,0,1
3,96401,llama-2-13b-chat,mistral-7b-instruct,"[""How can I create a test set for a very rare ...","[""Creating a test set for a very rare category...","[""When building a classifier for a very rare c...",1,0,0
4,198779,koala-13b,gpt-3.5-turbo-0314,"[""What is the best way to travel from Tel-Aviv...","[""The best way to travel from Tel Aviv to Jeru...","[""The best way to travel from Tel-Aviv to Jeru...",0,1,0


In [4]:
import ast
import json

# Function to safely parse the string as a list
def parse_list_string(s):
    try:
        # First try to parse as JSON
        return json.loads(s)
    except:
        try:
            # Then try to parse as Python literal
            return ast.literal_eval(s)
        except:
            # If all fails, return as is
            return [s]

# Create a new dataframe to store the expanded data
expanded_rows = []

for _, row in df.iterrows():
    # Parse the prompts and responses
    prompts = parse_list_string(row['prompt'])
    responses_a = parse_list_string(row['response_a'])
    responses_b = parse_list_string(row['response_b'])
    
    # Make sure all lists have the same length
    max_len = max(len(prompts), len(responses_a), len(responses_b))
    prompts = prompts + [''] * (max_len - len(prompts))
    responses_a = responses_a + [''] * (max_len - len(responses_a))
    responses_b = responses_b + [''] * (max_len - len(responses_b))
    
    # Create a new row for each prompt-response pair
    for i in range(max_len):
        expanded_rows.append({
            'id': row['id'],
            'model_a': row['model_a'],
            'model_b': row['model_b'],
            'prompt': prompts[i],
            'response_a': responses_a[i],
            'response_b': responses_b[i],
            'winner_model_a': row['winner_model_a'],
            'winner_model_b': row['winner_model_b'],
            'winner_tie': row['winner_tie']
        })

# Create the expanded dataframe
expanded_df = pd.DataFrame(expanded_rows)

# Display the first few rows of the expanded dataframe
expanded_df.head()

Unnamed: 0,id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,30192,gpt-4-1106-preview,gpt-4-0613,Is it morally right to try to have a certain p...,The question of whether it is morally right to...,"As an AI, I don't have personal beliefs or opi...",1,0,0
1,30192,gpt-4-1106-preview,gpt-4-0613,"OK, does pineapple belong on a pizza? Relax an...","Ah, the age-old culinary conundrum that has di...","As an AI, I don't eat, so I don't have persona...",1,0,0
2,53567,koala-13b,gpt-4-0613,What is the difference between marriage licens...,A marriage license is a legal document that al...,A marriage license and a marriage certificate ...,0,1,0
3,53567,koala-13b,gpt-4-0613,How can I get both of them as quick as possibl...,If you want to get both a marriage license and...,"In California, here are the general steps to o...",0,1,0
4,53567,koala-13b,gpt-4-0613,What is the minimal time to get them? 1 day or...,The minimal time to get a marriage license and...,The marriage license in California is issued i...,0,1,0


In [5]:
filtered_df = expanded_df[expanded_df['winner_tie'] == 0]
filtered_df = filtered_df.drop(columns=['winner_tie', 'id', 'model_a', 'model_b'])
print(f"Original shape: {expanded_df.shape}")
print(f"After removing ties: {filtered_df.shape}")
print(filtered_df[['winner_model_a', 'winner_model_b']].value_counts())

Original shape: (1245, 9)
After removing ties: (895, 5)
winner_model_a  winner_model_b
0               1                 456
1               0                 439
Name: count, dtype: int64


In [6]:
filtered_df.head()

Unnamed: 0,prompt,response_a,response_b,winner_model_a,winner_model_b
0,Is it morally right to try to have a certain p...,The question of whether it is morally right to...,"As an AI, I don't have personal beliefs or opi...",1,0
1,"OK, does pineapple belong on a pizza? Relax an...","Ah, the age-old culinary conundrum that has di...","As an AI, I don't eat, so I don't have persona...",1,0
2,What is the difference between marriage licens...,A marriage license is a legal document that al...,A marriage license and a marriage certificate ...,0,1
3,How can I get both of them as quick as possibl...,If you want to get both a marriage license and...,"In California, here are the general steps to o...",0,1
4,What is the minimal time to get them? 1 day or...,The minimal time to get a marriage license and...,The marriage license in California is issued i...,0,1


In [7]:
filtered_df.to_csv("filtered_results.csv", index=False)

In [8]:
test = pd.read_csv("filtered_results.csv")
test.head()


Unnamed: 0,prompt,response_a,response_b,winner_model_a,winner_model_b
0,Is it morally right to try to have a certain p...,The question of whether it is morally right to...,"As an AI, I don't have personal beliefs or opi...",1,0
1,"OK, does pineapple belong on a pizza? Relax an...","Ah, the age-old culinary conundrum that has di...","As an AI, I don't eat, so I don't have persona...",1,0
2,What is the difference between marriage licens...,A marriage license is a legal document that al...,A marriage license and a marriage certificate ...,0,1
3,How can I get both of them as quick as possibl...,If you want to get both a marriage license and...,"In California, here are the general steps to o...",0,1
4,What is the minimal time to get them? 1 day or...,The minimal time to get a marriage license and...,The marriage license in California is issued i...,0,1


In [9]:
# Create a new DataFrame with the desired format
new_df = pd.DataFrame({
    'text': '[PROMPT] ' + filtered_df['prompt'] + ' [SEP] [RESPONSE_A] ' + 
            filtered_df['response_a'] + ' [SEP] [RESPONSE_B] ' + filtered_df['response_b'],
    'label': (filtered_df['winner_model_b'] == 1).astype(int)
})

new_df.to_csv('formatted_data.csv', index=False)

In [10]:
test = pd.read_csv("formatted_data.csv")
print(test["text"][0])
print(test.shape)

[PROMPT] Is it morally right to try to have a certain percentage of females on managerial positions? [SEP] [RESPONSE_A] The question of whether it is morally right to aim for a certain percentage of females in managerial positions is a complex ethical issue that involves considerations of fairness, equality, diversity, and discrimination.

Here are some arguments in favor of and against such policies:

**Arguments in favor:**

1. **Correcting Historical Inequities:** Women have historically been underrepresented in leadership roles due to various cultural, institutional, and social barriers. Aiming for a specific percentage can be seen as a corrective measure to address past and ongoing discrimination.

2. **Promoting Diversity:** Diverse leadership teams can enhance decision-making and represent a broader range of perspectives. This can lead to better outcomes for organizations and society as a whole.

3. **Equality of Opportunity:** Setting targets for female representation in manage

In [11]:
# Cell 2: Import libraries
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from transformers import (
    DistilBertTokenizer, 
    DistilBertForSequenceClassification,
    Trainer, 
    TrainingArguments,
    DataCollatorWithPadding
)
from datasets import Dataset






In [12]:
# Cell 3: Load your data
# If you saved to CSV, you can load it with:
# test = pd.read_csv('formatted_data.csv')
# If not, use the new_df you created earlier
test = new_df
print(f"Dataset shape: {test.shape}")
print(f"Sample text: {test['text'][0]}")

Dataset shape: (895, 2)
Sample text: [PROMPT] Is it morally right to try to have a certain percentage of females on managerial positions? [SEP] [RESPONSE_A] The question of whether it is morally right to aim for a certain percentage of females in managerial positions is a complex ethical issue that involves considerations of fairness, equality, diversity, and discrimination.

Here are some arguments in favor of and against such policies:

**Arguments in favor:**

1. **Correcting Historical Inequities:** Women have historically been underrepresented in leadership roles due to various cultural, institutional, and social barriers. Aiming for a specific percentage can be seen as a corrective measure to address past and ongoing discrimination.

2. **Promoting Diversity:** Diverse leadership teams can enhance decision-making and represent a broader range of perspectives. This can lead to better outcomes for organizations and society as a whole.

3. **Equality of Opportunity:** Setting target

In [13]:
# Cell 4: Split data into train and validation sets
train_df, val_df = train_test_split(test, test_size=0.2, random_state=42)
print(f"Training set: {train_df.shape}")
print(f"Validation set: {val_df.shape}")

Training set: (716, 2)
Validation set: (179, 2)


In [14]:
# Cell 5: Convert pandas dataframes to Hugging Face datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

In [15]:
# Cell 6: Set up tokenizer and ensure proper data format
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# First, check what the data looks like
print("Sample data point:")
print(train_dataset[0])

# Make sure 'text' field contains strings
# Convert the text column to string explicitly to avoid issues
train_dataset = train_dataset.map(lambda x: {"text": str(x["text"])})
val_dataset = val_dataset.map(lambda x: {"text": str(x["text"])})

def tokenize_function(examples):
    # Use a try-except to catch any errors
    try:
        return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=512)
    except Exception as e:
        print(f"Error during tokenization: {e}")
        print(f"Problematic input: {examples['text'][:2]}")  # Print first few examples
        raise e

Sample data point:
{'text': '[PROMPT] A room contains 3 boxes, a yellow box, a red box, and a green box. Bob places a teacup in the green box, then leaves the room. Emily comes in the room and moves the teacup to the yellow box before leaving. Bob returns to the room, where does he expect to find the teacup? [SEP] [RESPONSE_A] As an AI language model, I do not have access to the specific details of the situation or the rules that govern it. However, based on the given information, it is likely that Bob would expect to find the teacup in the yellow box, as that is the only box that he has accessed and moved since placing the teacup in the green box. [SEP] [RESPONSE_B] * Bob originally placed the teacup in the green box\n* Emily moved the teacup to the yellow box after Bob left\n* When Bob returns, he does not know that Emily moved the teacup\n* Therefore, Bob expects to find the teacup in the green box, where he originally placed it.\n\nSo the answer is: Bob expects to find the teacup i

Map:   0%|          | 0/716 [00:00<?, ? examples/s]

Map:   0%|          | 0/179 [00:00<?, ? examples/s]

In [16]:
# Cell 7: Apply tokenization with proper error handling
# Process batches with smaller size for better error detection
tokenized_train = train_dataset.map(
    tokenize_function, 
    batched=True,
    batch_size=16,  # Smaller batch size
    desc="Tokenizing training data"
)

tokenized_val = val_dataset.map(
    tokenize_function, 
    batched=True,
    batch_size=16,
    desc="Tokenizing validation data"
)

# Verify tokenized data
print("Sample tokenized data:")
sample_keys = list(tokenized_train[0].keys())
print(f"Available keys: {sample_keys}")
print(f"Input IDs length: {len(tokenized_train[0]['input_ids'])}")

Tokenizing training data:   0%|          | 0/716 [00:00<?, ? examples/s]

Tokenizing validation data:   0%|          | 0/179 [00:00<?, ? examples/s]

Sample tokenized data:
Available keys: ['text', 'label', '__index_level_0__', 'input_ids', 'attention_mask']
Input IDs length: 512


In [17]:
# Cell 8: Load model
model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased', 
    num_labels=2
)
print(model.config)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.42.4",
  "vocab_size": 30522
}



In [18]:
# Cell 9: Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)



In [19]:
# Cell 10: Create data collator and initialize trainer
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator
)

In [20]:
trainer.train()

  0%|          | 0/270 [00:00<?, ?it/s]

{'loss': 0.6833, 'grad_norm': 1.8834651708602905, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.11}
{'loss': 0.7009, 'grad_norm': 2.8462729454040527, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.22}


KeyboardInterrupt: 

In [22]:
# Cell 12: Save the model
model_path = "./distilbert-model-preference"
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)
print(f"Model saved to {model_path}")

Model saved to ./distilbert-model-preference


In [25]:
# Cell 13: Load the model and test predictions
loaded_model = DistilBertForSequenceClassification.from_pretrained("./distilbert-model-preference")
loaded_tokenizer = DistilBertTokenizer.from_pretrained("./distilbert-model-preference")

def predict(text):
    inputs = loaded_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = loaded_model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()
    return predicted_class

for i in range(len(test)):
    sample_text = test['text'][i]
    prediction = predict(sample_text)
    print(f"Prediction for sample: {prediction}")
    print(f"Actual label: {test['label'][i]}")
    print("--------------------------------")



Prediction for sample: 1
Actual label: 0
--------------------------------
Prediction for sample: 1
Actual label: 0
--------------------------------
Prediction for sample: 1
Actual label: 1
--------------------------------
Prediction for sample: 0
Actual label: 1
--------------------------------
Prediction for sample: 1
Actual label: 1
--------------------------------


KeyError: 5