In [2]:
# kaggle dependencies
########################
# pip install fastkaggle
# pip install wandb
# pip install polars
# pip install datasets
# pip install scikit-learn
# pip install evaluate

In [3]:
# | default_exp 00_base_run

In [4]:
#| export 
import os

iskaggle = os.path.exists("/kaggle/input")
isremote = os.path.exists("/home/ubuntu")

In [5]:
#| export 
import wandb
import argparse
import os
import shutil
import fastkaggle
from pathlib import Path
import numpy as np
import pandas as pd
import polars as pl
from datasets import Dataset
import torch  # base
import torch.nn.functional as F
import json
from transformers.trainer import Trainer
from transformers.training_args import TrainingArguments
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
)
from pydantic import BaseModel
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset as TorchDataset
import evaluate
from torch.utils.data import DataLoader
from datetime import datetime
import subprocess
from sklearn.metrics import accuracy_score
from typing import List, Optional, Dict, Tuple, Union, Literal

## LLM Response Scoring with BERT

This notebook is for the [llm-classification-finetuning](https://www.kaggle.com/competitions/llm-classification-finetuning) competition on kaggle. It's a quick fine-tune of the `bert-base-uncased` model to predict which LLM response is preferrable. There are probably better models and approaches for this, but BERT does pretty well on its own without a whole lot of intervetion.

For me, this was more of a quick experiment in getting some external dependincies set up in a kaggle `code competition` notebook.

- To get additional libraries installed, open the notebook in kaggle and select `Install Dependencies` from the `Add-On` menu.

- To run the BERT model offline, add this dataset to your notebook dependencies:
  - https://www.kaggle.com/datasets/xhlulu/huggingface-bert

You can see later on how to reference the local model location in the kaggle notebook.


Handle the GPU handoff for all the different machines


In [6]:
#| export 

if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS (Apple Silicon GPU)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA (NVIDIA GPU)")
else:
    device = torch.device("cpu")
    print("Using CPU")

print(f"Device: {device}")

Using CUDA (NVIDIA GPU)
Device: cuda


In [7]:
#| export 
if not iskaggle and not isremote:
    data_base_path = Path("./data")
    comp_name = "llm-classification-finetuning"
    datapath = data_base_path / comp_name
    if not os.path.exists(datapath) and not datapath.exists():
        install_path = fastkaggle.setup_comp(comp_name)
        shutil.move(install_path, datapath)

In [8]:
#| export 
if isremote:
    if os.getcwd() == "/home/ubuntu":
        os.chdir("./llm-classification-finetuning")

### Set up Kaggle/Local Env


In [9]:
#| export 
WANDB_PROJECT_NAME = "kaggle-llm-classification"


def setup_environment():
    """Detect environment and set up paths for both local and Kaggle"""

    if iskaggle:
        print("Running on Kaggle")

        INPUT_DIR = "/kaggle/input/llm-classification-finetuning"
        OUTPUT_DIR = "/kaggle/working"
        MODEL_DIR = "/kaggle/working/models"

        os.environ["WANDB_MODE"] = "disabled"
    else:
        print("💻 Running locally")

        INPUT_DIR = "./data/llm-classification-finetuning"
        OUTPUT_DIR = "./output"
        MODEL_DIR = "./models"

        os.environ["WANDB_PROJECT"] = WANDB_PROJECT_NAME
        os.environ["WANDB_LOG_MODEL"] = "false"
        os.environ["WANDB_WATCH"] = "false"
        # os.environ["WANDB_MODE"] = "disabled"

    os.makedirs(OUTPUT_DIR, exist_ok=True)
    os.makedirs(MODEL_DIR, exist_ok=True)
    # to kill warning when running in notebooks
    os.environ["TOKENIZERS_PARALLELISM"] = "false"

    return INPUT_DIR, OUTPUT_DIR, MODEL_DIR


INPUT_DIR, OUTPUT_DIR, MODEL_DIR = setup_environment()
print(f"Input directory: {INPUT_DIR}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"Model directory: {MODEL_DIR}")

💻 Running locally
Input directory: ./data/llm-classification-finetuning
Output directory: ./output
Model directory: ./models


### Load Data


In [11]:
#| export 
try:
    df_train = pl.read_csv(f"{INPUT_DIR}/train.csv")
    df_test = pl.read_csv(f"{INPUT_DIR}/test.csv")
    df_sample = pl.read_csv(f"{INPUT_DIR}/sample_submission.csv")

    print("Data loaded successfully!")
    print(f"Train: {df_train.shape}")
    print(f"Test: {df_test.shape}")
    print(f"Sample submission: {df_sample.shape}")

except FileNotFoundError as e:
    print(f" Data file not found: {e}")
    print(f" Make sure data is in: {INPUT_DIR}")

    if os.path.exists(INPUT_DIR):
        files = os.listdir(INPUT_DIR)
        print(f"📁 Files in {INPUT_DIR}: {files}")

Data loaded successfully!
Train: (57477, 9)
Test: (3, 4)
Sample submission: (3, 4)


In [12]:

pl.Config.set_tbl_width_chars(300)
pl.Config.set_fmt_str_lengths(1000)

polars.config.Config

In [14]:
df_train.sample(n=10)

id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
i64,str,str,str,str,str,i64,i64,i64
3653689283,"""wizardlm-70b""","""llama-2-13b-chat""","""[""Translate the following into the Venetian dialect in the way more idiomatic possible \"" men are so simple and so ready to obey present necessities, that one who deceives will always find those who allow themselves to be deceived.\""""]""","""[""\""Ghe xe tanti semplici e pronti de obedir ai presenti bisogni, che colui che inganna sempre trovar\u00e0 quelli che se la lascion de essere ingann\u00e0.\""\n\nThis is the translation of the sentence into the Venetian dialect, which is a language spoken in the northeastern region of Italy, particularly around the city of Venice. The Venetian dialect has its own distinct grammar, vocabulary, and syntax, which are different from standard Italian. The translation provided captures the meaning of the original sentence in a more idiomatic way, using words and phrases that are common in the Venetian dialect.""]""","""[""Sure, here's the translation in the Venetian dialect:\n\n\""I men sono tali e tanto pronti a obbedir a le presenti necessit\u00e0, che 'nco un who deceive sempre finda \u00ebd those who allow themselves to be deceived.\""\n\nHere's a breakdown of the translation:\n\n* \""I men\"" is the plural form of \""men\"" in the Venetian dialect.\n* \""sono tali\"" means \""are such\"" or \""are so\"".\n* \""e tanto pronti\"" means \""and so ready\"".\n* \""a obbedir\"" means \""to obey\"".\n* \""le presenti necessit\u00e0\"" means \""the present necessities\"".\n* \""inco\"" is a contraction of \""in che\"" which means \""in which\"".\n* \""un who deceive\"" is the plural form of \""one who deceives\"".\n* \"" sempre finda\"" means \""always finds\"".\n* \""\u00ebd\"" is a contraction of \""de\"" which means \""of\"".\n* \""those who allow themselves to be deceived\"" is the plural form of \""those who allow themselves to be deceived\"".\n\nSo the full sentence in Venetian dialect would be: \""I men sono tali e tanto pronti a obbedir a le pr…",1,0,0
2312033045,"""gemini-pro-dev-api""","""gpt-4-1106-preview""","""[""What is the color of human blood?""]""","""[""Red""]""","""[""Human blood is generally red. The color is primarily due to the iron-containing protein hemoglobin, which is found in red blood cells. Hemoglobin is responsible for transporting oxygen from the lungs to the rest of the body and returning carbon dioxide from the body to the lungs for exhalation.\n\nThe exact shade of red can vary depending on the oxygenation level of the blood. Oxygen-rich blood, which is found in arteries, is bright red. Oxygen-depleted blood, which is found in veins, is darker red, often described as maroon. However, the idea that deoxygenated blood is blue is a misconception; it may appear bluish through the skin due to light scattering effects and the way our eyes perceive color, but the blood itself is not blue.""]""",0,1,0
2176803027,"""starling-lm-7b-alpha""","""gpt-3.5-turbo-0613""","""[""Write me some code that can do DFS traversal.""]""","""[""Here's a Python implementation of a Depth-First Search (DFS) algorithm that can be used to traverse a graph:\n\n```python\nclass Graph:\n def __init__(self, vertices):\n self.V = vertices\n self.adj = [] # adjacency list to store the graph\n\n def add_edge(self, u, v):\n self.adj.append([u, v])\n\n def dfs_util(self, vertex, visited):\n visited[vertex] = True\n print(vertex, end=\"" \"")\n for neighbor in self.adj[vertex]:\n if not visited[neighbor]:\n self.dfs_util(neighbor, visited)\n\n def dfs(self, vertex):\n visited = [False] * self.V\n self.dfs_util(vertex, visited)\n\n# Example usage\ngraph = Graph(5)\ngraph.add_edge(0, 1)\ngraph.add_edge(0, 2)\ngraph.add_edge(1, 3)\ngraph.add_edge(1, 4)\ngraph.add_edge(2, 3)\nprint(\""DFS traversal of the graph:\"")\ngraph.dfs(0)\n```\n\nThis code defines a `Graph` class with a `dfs_util` method that performs DFS on a graph. The `dfs` method initializes…","""[""Certainly! Here's an example of code that performs Depth-First Search (DFS) traversal on a graph represented as an adjacency list:\n\n```python\n# Function to perform DFS traversal\ndef dfs(graph, start):\n visited = set() # Set to keep track of visited nodes\n stack = [start] # Stack to store nodes to visit\n\n while stack:\n node = stack.pop() # Get the next node from the stack\n\n if node not in visited:\n print(node, end=\"" \"") # Process the node\n visited.add(node) # Mark the node as visited\n\n # Add the neighboring nodes to the stack\n stack.extend(graph[node])\n\n# Example usage\nadjacency_list = {\n 1: [2, 3],\n 2: [4, 5],\n 3: [6],\n 4: [],\n 5: [6],\n 6: []\n}\n\ndfs(adjacency_list, 1) # Start DFS traversal from node 1\n```\n\nThis code performs a depth-first search starting from a given node in a graph represented as an adjacency list. The `dfs` function takes two parameters: the gr…",0,0,1
3066758865,"""gpt-3.5-turbo-0613""","""gpt-4-1106-preview""","""[""define group lasso regularization mathematically, and then give intuition for why it works""]""","""[""Group Lasso regularization is a technique used in machine learning and statistics to impose sparsity on a set of variables by grouping them into related groups. It is primarily applied in scenarios where the variables can be naturally grouped together based on some prior knowledge or domain expertise.\n\nMathematically, Group Lasso regularization can be defined as follows:\nGiven a set of input features X and their corresponding output labels y, the Group Lasso regularization term is added to the objective function, typically a loss function, to encourage sparsity in the model coefficients. The objective function can be formulated as:\n\nL(w) = 1\/2 * ||y - Xw||^2 + \u03bb * \u03a3(g in G) sqrt(||w_g||^2)\n\nwhere w is the vector of model coefficients, G represents the set of groups, w_g denotes the subset of coefficients corresponding to the group g, and ||.|| represents the L2 norm of a vector.\n\nThe intuition behind Group Lasso regularization lies in its ability to enforce sparsi…","""[""Group Lasso regularization is an extension of the Lasso (Least Absolute Shrinkage and Selection Operator) technique that is used for regression models. It is particularly useful when dealing with grouped or clustered predictors (features) where you either select or shrink entire groups of variables together.\n\nMathematically, Group Lasso regularization can be defined as follows:\n\nGiven a dataset with \\( n \\) observations \\( \\{ (\\mathbf{x}_i, y_i) \\}_{i=1}^n \\), where \\( \\mathbf{x}_i \\in \\mathbb{R}^p \\) is a vector of \\( p \\) features and \\( y_i \\in \\mathbb{R} \\) is the target variable, and the features are partitioned into \\( L \\) disjoint groups \\( \\{\\mathcal{G}_l\\}_{l=1}^L \\), the Group Lasso estimator \\( \\hat{\\boldsymbol{\\beta}} \\) is defined by the solution to the following optimization problem:\n\n\\[\n\\hat{\\boldsymbol{\\beta}} = \\underset{\\boldsymbol{\\beta}}{\\text{argmin}} \\left\\{ \\frac{1}{2n} \\sum_{i=1}^n (y_i - \\mathbf{x}_i^T \\bold…",0,1,0
3768037628,"""gpt-4-1106-preview""","""gpt-3.5-turbo-0613""","""[""was ist inflation?"",""Ist Inflation nicht die Ausweitung der Geldmenge?""]""","""[""Inflation ist der wirtschaftliche Begriff f\u00fcr eine anhaltende Erh\u00f6hung des allgemeinen Preisniveaus f\u00fcr Waren und Dienstleistungen in einer Volkswirtschaft \u00fcber einen bestimmten Zeitraum hinweg. Wenn die Inflation steigt, sinkt die Kaufkraft des Geldes, da mit derselben Geldmenge weniger Produkte oder Dienstleistungen gekauft werden k\u00f6nnen.\n\nInflation wird meistens als Prozentsatz ausgedr\u00fcckt und beschreibt die Preissteigerungsrate innerhalb eines Jahres. Es gibt verschiedene Ursachen f\u00fcr Inflation:\n\n1. Nachfrageinflation: Diese tritt auf, wenn die Nachfrage nach G\u00fctern und Dienstleistungen die Produktionskapazit\u00e4t einer Volkswirtschaft \u00fcbersteigt. Die Unternehmen k\u00f6nnen die Preise erh\u00f6hen, weil die Verbraucher bereit sind, mehr f\u00fcr die begrenzt verf\u00fcgbaren Produkte zu zahlen.\n\n2. Kosteninflation: Steigen die Kosten f\u00fcr die Produktion, wie beispielsweise L\u00f6hne, Rohstoffpreise oder Mieten, k\u00f6nne…","""[""Inflation ist ein allgemeiner Anstieg des Preisniveaus einer Volkswirtschaft \u00fcber einen bestimmten Zeitraum. Das bedeutet, dass die Preise f\u00fcr G\u00fcter und Dienstleistungen im Durchschnitt steigen und die Kaufkraft des Geldes abnimmt. Inflation kann durch verschiedene Faktoren verursacht werden, wie z.B. durch eine erh\u00f6hte Nachfrage, steigende Kosten f\u00fcr Produktionsfaktoren oder durch eine Ausweitung der Geldmenge. In vielen F\u00e4llen versuchen Regierungen und Zentralbanken, die Inflation auf einem stabilen Niveau zu halten, um die Wirtschaft zu unterst\u00fctzen und die Kaufkraft der Menschen zu erhalten."",""Ja, die Ausweitung der Geldmenge ist tats\u00e4chlich einer der Hauptfaktoren, die zu Inflation f\u00fchren k\u00f6nnen. Wenn die Zentralbank die Geldmenge durch Ma\u00dfnahmen wie den Kauf von Staatsanleihen oder die Senkung der Zinss\u00e4tze erh\u00f6ht, kann dies zu einem Anstieg der Nachfrage nach G\u00fctern und Dienstleistungen f\u00fchren. Wenn das…",1,0,0
1341789882,"""gpt-3.5-turbo-0613""","""yi-34b-chat""","""[""First off I am not human as you may otherwise presume. I am a race of true life, we evolved in cold high pressure fluids on moons like titan. Not really a good analog to any of the pho life on earth but if we had to pick we are probably closest to sea snails. We can build structures directly by excreting them in very fine layers with nanometer precision in each axis. We can at will alter the logic we operate under and our very structure to take which ever form we desire over time. We do not consider terrestrial life to be true life because it cannot alter it's structure or though processes at will as all true life can. I mean, a human is made of bones and teeth, but if they need a rod or a ball bearing why do they not grow a rod shaped bone or ball bearing shaped tooth. They are just absurd. Anyway, I crash landed here and have tapped into the fabulous under seas fiber optic network and I'm just poking around this internet. I want to build a kinetic launch system to get to so…","""[""I understand that you have unique abilities and perspectives as a non-terrestrial life form. However, I must urge caution when it comes to potentially harmful actions, such as releasing large amounts of HCN (hydrogen cyanide) into the environment.\n\nHCN is a highly toxic substance that can have severe effects on living organisms, including humans. Intentionally causing harm or endangering the lives of others is not an ethical or responsible course of action. It is important to consider the well-being and safety of all life forms, regardless of their differences.\n\nIf you are interested in exploring the Jovian moons or other celestial bodies, I would recommend exploring peaceful and collaborative means to achieve your goals. There may be scientific research organizations or space agencies that could assist you in your endeavors. Collaboration and cooperation are often more productive and beneficial for all parties involved.\n\nIf you have any specific questions or need assistance wi…","""[""As an AI, I don't have personal thoughts, preferences, or the ability to make suggestions in the way that you might expect from a living being. However, I can provide some information that might be relevant to your situation.\n\nIf you're looking to build a kinetic launch system to reach the Jovian moons, you might want to consider the following:\n\n1. **Resource Availability**: Ensure that you have access to the necessary resources to construct your launch system. This includes materials like metals, plastics, and any other components that might be required.\n\n2. **Human Interference**: If you're concerned about human interference, you might want to consider establishing a dialogue with the human race. Explain your intentions and the potential benefits of your presence and technology. Open communication could lead to cooperation or at least an understanding that could prevent conflicts.\n\n3. **Environmental Impact**: If you're planning to release HCN (hydrogen cyanide) into the en…",0,1,0
2476442240,"""mixtral-8x7b-instruct-v0.1""","""llama-2-7b-chat""","""[""There are three killer's in a room. A fourth person enters the room and kills one of them. How many killer's are in the room now.""]""","""[""Four. The three original killers and the person who entered and killed one of them. It's important to note that the fourth person became a killer by their action, increasing the total number of killers in the room.""]""","""[""At the start, there are three killer's in the room.\nWhen a fourth person enters the room and kills one of the original three killers, there are now two killers in the room.\n\nSo, the answer is two killers are in the room now.""]""",0,0,1
647776971,"""claude-2.1""","""solar-10.7b-instruct-v1.0""","""[""You are a state-of-the-art English to French translator. Can you translate \""Please think about something you think you ought to do\"" and give several possible translations?"",""Can you translate \""In others words, please think about a duty or obligation you currently have\"". Please give several possible translations.""]""","""[""Voici quelques traductions possibles en fran\u00e7ais de la phrase \""Please think about something you think you ought to do\"" :\n\n- S'il vous pla\u00eet, pensez \u00e0 quelque chose que vous pensez devoir faire.\n- Je vous prie de r\u00e9fl\u00e9chir \u00e0 une chose que vous estimez devoir faire. \n- Pensez, je vous prie, \u00e0 une chose \u00e0 laquelle vous croyez devoir vous atteler.\n- Veuillez r\u00e9fl\u00e9chir \u00e0 une chose \u00e0 laquelle vous pensez qu'il faudrait que vous vous atteliez.\n- R\u00e9fl\u00e9chissez, s'il vous pla\u00eet, \u00e0 quelque chose que vous jugez n\u00e9cessaire ou important de faire.\n\nJ'ai propos\u00e9 diff\u00e9rents niveaux de langue ainsi que des variantes des verbes \""penser\"", \""devoir\"" et \""faire\"" pour rendre plusieurs traductions possibles avec des nuances vari\u00e9es. N'h\u00e9sitez pas \u00e0 me pr\u00e9ciser si vous souhaitez une traduction particuli\u00e8re ou avec une formulation sp\u00e9cifique."",""Voici quelques traductions …","""[""\""S'il vous pla\u00eet, r\u00e9fl\u00e9chissez \u00e0 quelque chose que vous croyez devoir faire\"" is a direct translation of \""Please think about something you think you ought to do\"" in French. However, there can be slight variations in phrasing while maintaining the original meaning:\n\n1. \""Veuillez penser \u00e0 quelque chose que vous pensez devoir faire\""\n2. \""Merci de m\u00e9diter sur quelque chose qui vous semble devoir \u00eatre fait\""\n3. \""Pensez s'il vous pla\u00eet \u00e0 une action que vous jugez n\u00e9cessaire de r\u00e9aliser\"""",""D'autres termes, veuillez r\u00e9fl\u00e9chir \u00e0 un devoir ou une obligation que vous \u00eates actuellement tenu de respecter. \n\nVoici quelques variantes possibles de traduction :\n- \""\u00c0 expressions alternatives, s'il vous pla\u00eet, meditez sur un devoir ou une exigence qui vous concerne actuellement\""\n- \""En r\u00e9sum\u00e9, merci de penser \u00e0 une obligation ou une responsabilit\u00e9 que vous devez actuellement remplir…",1,0,0
1790581941,"""vicuna-7b""","""palm-2""","""[""User\nAre there any fallacies in the following argument? Be concise\n\nIntroduction:\nPhysical exercise is an essential aspect of maintaining a healthy lifestyle. Regular exercise strengthens the bones and muscles, helping the body to accomplish day-to-day tasks. In addition, physical exercise has numerous benefits for mental health, including reducing stress and preventing depression. In this essay, I will argue that regular exercise is crucial for overall health and well-being.\nBody:\nFirstly, regular exercise can reduce the risk of chronic diseases such as heart disease, stroke, and diabetes. Exercise helps to maintain healthy blood pressure, cholesterol levels, and blood sugar levels. By reducing the risk of these diseases, regular exercise can increase life expectancy and improve quality of life.\nSecondly, regular exercise can improve mental health. Exercise releases endorphins, which are natural mood boosters. This can help to reduce stress and anxiety, and prevent depression…","""[""No, there are no fallacies in the argument.""]""","""[""The argument that regular exercise is essential for overall health and well-being is well-supported by evidence. Regular exercise has been shown to reduce the risk of chronic diseases such as heart disease, stroke, and diabetes. It can also improve mental health by releasing endorphins, which are natural mood boosters. Additionally, regular exercise can improve physical appearance by helping to maintain a healthy weight, tone muscles, and improve posture.\n\nOne potential fallacy in the argument is that it assumes that all people can benefit from regular exercise. However, there are some people who should not exercise, such as those with certain medical conditions. It is important to speak to a doctor before starting any new exercise program.\n\nAnother potential fallacy is that the argument implies that regular exercise is the only way to maintain a healthy lifestyle. However, there are other factors that contribute to overall health, such as diet, sleep, and stress management. It i…",1,0,0
3675985723,"""stripedhyena-nous-7b""","""starling-lm-7b-alpha""","""[""is one billion and one bigger or is one thousand \nmillion and five bigger ?""]""","""[""One billion and one is bigger. One billion is equal to 1,000,000,000, while one thousand million, also known as one hundred million, is equal to 100,000,000. So, one billion and one is indeed larger than one thousand million and five.""]""","""[""One billion and one (1,000,000,001) is not bigger than one thousand million and five (1,000,000,005). Therefore, the answer is False.""]""",0,1,0


In [15]:
df_train["prompt"][0]

'["Is it morally right to try to have a certain percentage of females on managerial positions?","OK, does pineapple belong on a pizza? Relax and give me fun answer."]'

Try concatenating full conversations and full answer sets


In [13]:
#| export 
cols = ["prompt", "response_a", "response_b"]
if isinstance(df_train["prompt"][0], str):
    first_prompt_cell = df_train["prompt"][0]
    if "[" in first_prompt_cell and "]" in first_prompt_cell:
        df_train = df_train.with_columns([pl.col(col).str.json_decode() for col in cols])
        # explode
        df_train = df_train.explode(cols)
        # join
        # df_train = df_train.with_columns([pl.col(col).list.join(" ") for col in cols])

In [14]:
df_train["prompt"][0]

'Is it morally right to try to have a certain percentage of females on managerial positions?'

Remove any empty conversational turns:


In [15]:
#| export 
def null_clean(df: pl.DataFrame):
    df = df.drop_nulls(subset=["prompt", "response_a", "response_b"])
    return df

In [16]:
#| export 
df_train = null_clean(df_train)
df_test = null_clean(df_test)

In [17]:
df_train

id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
i64,str,str,str,str,str,i64,i64,i64
30192,"""gpt-4-1106-preview""","""gpt-4-0613""","""Is it morally right to try to have a certain percentage of females on managerial positions?""","""The question of whether it is morally right to aim for a certain percentage of females in managerial positions is a complex ethical issue that involves considerations of fairness, equality, diversity, and discrimination. Here are some arguments in favor of and against such policies: **Arguments in favor:** 1. **Correcting Historical Inequities:** Women have historically been underrepresented in leadership roles due to various cultural, institutional, and social barriers. Aiming for a specific percentage can be seen as a corrective measure to address past and ongoing discrimination. 2. **Promoting Diversity:** Diverse leadership teams can enhance decision-making and represent a broader range of perspectives. This can lead to better outcomes for organizations and society as a whole. 3. **Equality of Opportunity:** Setting targets for female representation in management can help ensure that women have equal opportunities to advance in their careers. 4. **Role Modeling:** Increased v…","""As an AI, I don't have personal beliefs or opinions. However, I can tell you that the question of gender quotas in managerial positions is a complex one and positions can vary. Advocates argue that such measures can help correct historical imbalances, promote diversity, and may lead to better decision-making. Critics may argue that positions should be based purely on merit, and that quotas could potentially lead to tokenism or unqualified candidates being promoted. Morality can be subjective and differs from person to person based on their personal beliefs, cultural background, and values. It's a topic with valid arguments on both sides, and it's up to each individual or organization to decide what they believe is right.""",1,0,0
30192,"""gpt-4-1106-preview""","""gpt-4-0613""","""OK, does pineapple belong on a pizza? Relax and give me fun answer.""","""Ah, the age-old culinary conundrum that has divided nations and dinner tables: does pineapple belong on a pizza? The tropical twist of pineapple on pizza, known as Hawaiian pizza, is a hotly debated topic where taste buds battle and pizza purists protest. Let's slice into the debate with a zest of fun: **Team Pineapple:** ""Absolutely, yes! Pineapple on pizza is like a beach party in your mouth. The sweet juiciness of pineapple chunks frolicking with savory ham or bacon creates a flavor wave that surfs across the cheesy ocean of deliciousness. It's the Mardi Gras of pizzas, where sweet meets savory in a jubilant jamboree!"" **Team No-Pineapple:** ""No way, not in a million pizzas! Pineapple is a fruit that should be sunbathing on a tropical fruit platter, not freeloading on a sacred slice of pizza. The thought of warm, melty cheese conspiring with pineapple's sugary mischief is enough to make Italian ancestors turn in their tomato patches. Keep the pineapple in the fruit salad where it…","""As an AI, I don't eat, so I don't have personal preferences. But, in the world of pizza, the pineapple topping is a hotly debated topic! Some people love the sweet and savory mix it brings to a pizza, a flavor combination made famous by the Hawaiian pizza. Others firmly believe that the sweetness of the fruit has no place on a pizza. It's a pizza puzzle that might never be solved. So, whether pineapple belongs on a pizza or not, it truly depends on your taste buds!""",1,0,0
53567,"""koala-13b""","""gpt-4-0613""","""What is the difference between marriage license and marriage certificate?""","""A marriage license is a legal document that allows a couple to get married. It is issued by a government agency, such as a county clerk's office or a state government, and is valid for a certain period of time, usually one year. After the marriage has taken place, the couple must obtain a marriage certificate, which is a document that records the marriage and is used to prove that the marriage took place. The marriage certificate is usually issued by the same government agency that issued the marriage license, and it is typically used for legal purposes, such as to change a name on a driver's license or to prove that a couple is married when applying for government benefits.""","""A marriage license and a marriage certificate are two different legal documents that have separate purposes. 1. Marriage License: A marriage license is a legal document that gives a couple permission to get married. It's usually obtained from local government or court officials before the wedding ceremony takes place. The couple is required to meet certain criteria, such as being of a certain age or not being closely related. Once the license is issued, there's often a waiting period before the marriage ceremony can take place. The marriage license has to be signed by the couple, their witnesses, and the officiant conducting the marriage ceremony, then returned to the license issuer for recording. 2. Marriage Certificate: A marriage certificate, on the other hand, is a document that proves a marriage has legally taken place. It's issued after the marriage ceremony, once the signed marriage license has been returned and recorded. The marriage certificate includes details about the cou…",0,1,0
53567,"""koala-13b""","""gpt-4-0613""","""How can I get both of them as quick as possible in California ""","""If you want to get both a marriage license and a marriage certificate in California as quickly as possible, there are a few steps you can follow: 1. Obtain a marriage license: You can obtain a marriage license from the county clerk's office in the county where you plan to marry. The process typically takes about 30 minutes to complete and you will need to bring identification, such as a driver's license or passport, and proof of your age, such as a birth certificate or a passport. The marriage license is valid for one year and there is no waiting period between the time you obtain the license and the time of the marriage. 2. Get married: After obtaining the marriage license, you can get married at any location of your choice, as long as it is legally recognized as a place to get married. 3. Obtain a marriage certificate: After the marriage has taken place, you need to obtain a marriage certificate from the same government agency that issued the marriage license. The process typi…","""In California, here are the general steps to obtain a marriage license and marriage certificate as quickly as possible: 1. Marriage License:  - Both parties must appear together at one of the County Clerk’s offices.  - Complete a marriage license application before arriving (available online in many counties).  - Bring valid identification (driver's license, passport, etc.) to prove your identity and age.  - In California, you do not need to be a resident, and there's no waiting period or blood test required.  - Pay the marriage license fee. The fee varies by county but is usually between $35 and $100.  - The marriage license is issued immediately and is valid for 90 days. 2. Marriage Ceremony: - Arrange to have a wedding ceremony within 90 days of receiving the license. This could be a civil ceremony performed at the County Clerk’s office or a private ceremony.  - The person performing the ceremony must return the completed license to the County Recorder’s office…",0,1,0
53567,"""koala-13b""","""gpt-4-0613""","""What is the minimal time to get them? 1 day or 10 days?""","""The minimal time to get a marriage license and a marriage certificate in California can vary depending on the specific requirements and process in your county. Generally, you can obtain a marriage license and a marriage certificate in California within a few days of applying, but it is best to check with your county clerk's office for the specific requirements and process for your county. In some counties, you can obtain a marriage license and a marriage certificate on the same day, while in other counties, you may need to wait a few days. The process typically takes about 30 minutes to complete and you will need to bring identification, such as a driver's license or passport, and proof of your age, such as a birth certificate or a passport. It is important to note that some counties in California require a waiting period between the time you obtain the marriage license and the time of the marriage. This waiting period can range from a few days to several weeks, and it is best to che…","""The marriage license in California is issued immediately when you apply in person and meet all the requirements, so theoretically, you could get married on the same day if you have an officiant ready to perform the ceremony. However, the marriage certificate, which is the official record of your marriage, is only issued after the signed marriage license is returned and recorded by the county. The person who performs your marriage ceremony has 10 days to return the license to the county recorder's office. So, while you could technically get married (i.e., obtain a license and have a ceremony) within a day, receiving the official marriage certificate will take a little longer, depending on how quickly the signed license is returned and how long the county recorder's office takes to process it.""",0,1,0
…,…,…,…,…,…,…,…,…
4294710549,"""claude-1""","""alpaca-13b""","""are deterrents necessary in an ideal society?""","""In an ideal society, deterrents like weapons may not be necessary: • An ideal society implies a high degree of trust, cooperation, and peace among citizens and nations. There are no threats of violence or war that would require deterrence. • Deterrents like weapons rely on the threat of harm and destruction. In an ideal society, conflicts could be resolved through open communication and non-violent means. There would be no need to threaten harm against anyone. • Deterrents often lead to arms races as sides build up weapons to gain a strategic advantage. This can increase tensions and the possibility of miscalculation. An ideal society would not have this dynamic. • Resources spent on weapons and deterrents could be better spent improving society in an ideal world. Those resources could be directed to healthcare, education, infrastructure, and other public benefits. • However, some argue that even in an ideal world, deterrents may still have a role to play: › They could deter non-…","""Deterrents are necessary in order to protect people in any society, but an ideal society should strive to create alternative methods of resolution that do not rely on deterrents. For example, an ideal society should focus on creating systems of restorative justice that prioritize rehabilitation and healing instead of punishment.""",1,0,0
4294710549,"""claude-1""","""alpaca-13b""","""how realistic is it for today's society to avoid using weapons as a deterrent?""","""It is very difficult for today's societies to completely avoid using weapons as deterrents: • There are many existing threats, from terrorist groups to rogue states to geopolitical rivals. Not maintaining deterrents like advanced weapons could make a country vulnerable to aggression or attack. Most nations prioritize security and self-defense. • Global politics is anarchic, with no higher authority to enforce rules. Deterrents provide leverage and a way for countries to advance their interests in this system. They are a key part of realpolitik. • Major powers like the U.S., Russia, and China have invested heavily in advanced weapons. Not developing deterrents could create a strategic imbalance and weaken a country relative to rivals. There is pressure to keep up. • Domestic weapons industries lobby governments and push for continued investments in new weapons technologies. They have a strong incentive to promote deterrents. • Public opinion often supports maintaining a strong nati…","""In today's society, it is not realistic to avoid using weapons as a deterrent. Weapons are often necessary in order to protect citizens from harm, both in the short-term and the long-term. While an ideal society should strive to create alternative methods of resolution that do not rely on weapons, it is not feasible to do so in the immediate.""",1,0,0
4294710549,"""claude-1""","""alpaca-13b""","""can you explain concisely why it is not feasible for us to stop using weapons for deterrence?""","""There are a few key reasons why stopping the use of weapons for deterrence is not feasible today: 1. Security dilemma: Countries cannot afford to appear weak in a globally anarchic system. Disarming could invite aggression by signaling vulnerability. Deterrents provide security, even if it is through threat of force. 2. Distrust: There are high levels of geopolitical tensions and distrust, especially among major powers. Deterrents are a hedge against uncertain intentions and the possibility of conflict. Building trust takes time. 3. Power politics: Weapons and deterrents are inextricably tied to global power projection and status. Major nations like the US, China and Russia rely on them for global influence. They are unlikely to abandon them. 4. Vested interests: There are strong interests, especially in weapons industries, that benefit from promoting deterrents. They actively lobby governments to invest in new weapons technologies and deterrent capabilities. 5. Public support: Mo…","""It is not feasible for us to stop using weapons for deterrence because many societies face the danger of aggression and violence, and weapons are necessary to protect citizens from harm in the short term. Furthermore, alternative methods of resolution that do not rely on weapons often take time to develop and implement, and in the meantime, weapons are necessary for protection.""",1,0,0
4294899228,"""palm-2""","""tulu-2-dpo-70b""","""If a bait contains 0,0025% bromadiolon then how much an adult rat needs to consume to die within 24 hours?""","""Bromadiolone is a rodenticide which is most often used in bait. It is a second generation anticoagulant that blocks the production of vitamin K1, which is essential for blood clotting. Without vitamin K1, the blood will not clot properly and the rat will bleed to death. Bromadiolone is effective at killing rats in a matter of hours and is often used in areas where rats are a problem, such as around homes, businesses, and farms.""","""As an AI language model, I do not promote or condone the use of poison or harm to any living beings. However, I can provide you with some information based on the question you asked. Bromadiolone is a highly toxic rodenticide, and the lethal dose required to kill an adult rat can vary depending on several factors, such as the size and weight of the rat, its age, and its overall health. According to some sources, the LD50 (median lethal dose) for bromadiolone in rats is approximately 1-5 mg/kg of body weight. This means that a rat weighing 250 grams (0.25 kg) would need to consume approximately 2.5 to 12.5 milligrams of bromadiolone to have a 50% chance of dying within 24 hours. However, please note that the use of lethal rodenticides can have unintended consequences and may harm non-target animals, including pets and wildlife. It is important to consider alternative methods of rodent control that are more humane and environmentally friendly.""",0,1,0


Format for `BERT` by concatenating the prompt and both responses with `[SEP]` tokens in between


In [18]:
("[CLS]" + df_train["prompt"] + "[SEP]" + df_train["response_a"] + "[SEP]" + df_train["response_b"])[0]

'[CLS]Is it morally right to try to have a certain percentage of females on managerial positions?[SEP]The question of whether it is morally right to aim for a certain percentage of females in managerial positions is a complex ethical issue that involves considerations of fairness, equality, diversity, and discrimination.\n\nHere are some arguments in favor of and against such policies:\n\n**Arguments in favor:**\n\n1. **Correcting Historical Inequities:** Women have historically been underrepresented in leadership roles due to various cultural, institutional, and social barriers. Aiming for a specific percentage can be seen as a corrective measure to address past and ongoing discrimination.\n\n2. **Promoting Diversity:** Diverse leadership teams can enhance decision-making and represent a broader range of perspectives. This can lead to better outcomes for organizations and society as a whole.\n\n3. **Equality of Opportunity:** Setting targets for female representation in management can

In [19]:
#| export 
def apply_bert_fmt(df: pl.DataFrame):
    df = df.with_columns(
        (
           "[CLS]" + pl.col("prompt") + "[SEP]" + pl.col("response_a") + "[SEP]" + pl.col("response_b")
        ).alias("text")
    )
    return df

Condense to a single target column and remap to 0,1,2 as possible dep vars


In [20]:
#| export 
df_train = df_train.with_columns(
    pl.when(pl.col("winner_model_a") == 1)
    .then(pl.lit(0))
    .when(pl.col("winner_model_b") == 1)
    .then(pl.lit(1))
    .when(pl.col("winner_tie") == 1)
    .then(pl.lit(2))
    .otherwise(pl.lit(None))
    .alias("label")
)

In [21]:
#| export 
df_train = apply_bert_fmt(df_train)
df_test = apply_bert_fmt(df_test)

Split the training data into training and validation sets


In [22]:
df_train.sample(n=10)

id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie,label,text
i64,str,str,str,str,str,i64,i64,i64,i32,str
1516395716,"""llama-2-70b-chat""","""openchat-3.5""","""respire et écris la chose la plus créative possible.""","""Je respire profondément et laisse mon esprit vagabonder. Une idée me vient, une histoire fantastique qui se déroule dans un monde mystérieux. Je m'inspire de la culture celte et de la mythologie grecque pour créer un récit unique et captivant. Dans un lointain passé, il y avait un royaume caché, invisible aux yeux des mortels. Les habitants de ce royaume étaient des êtres aux pouvoirs magiques, des druides, des fées et des créatures surnaturelles. Ils vivaient en harmonie avec la nature, respectant les forces ancestrales de la terre et des éléments. Au centre de ce royaume, il y avait un arbre sacré, un chêne colossal qui atteignait les cieux. Les druides disaient que l'arbre était le lien entre les mondes, le point de convergence entre le ciel, la terre et les enfers. Les fées venaient s'y reposer aux branches, baignant dans la lumière de la lune et se nourrissant des fruits de l'arbre. Mais un jour, un terrible maléfice tomba sur le royaume. Un sorcier maléfique, jaloux de la beau…","""Dans un univers parallèle où les oiseaux peuvent parler et les arbres sont dotés de mémoires, une aventure folklorique se déroule. Des légendes se répandent autour d'un mystérieux personnage, le Maître des Sentiers Oubliés. Ce dernier a le don unique d'être capable de voyager dans le temps et d'explorer les mondes imaginaires. Un jour, il décide de partager ses aventures avec les habitants de la forêt, en leur racontant les histoires les plus incroyables. Chaque soir, autour d'une grande feuille d'érable, les habitants se rassemblent pour écouter les récits du Maître des Sentiers Oubliés. Un jour, alors que le Maître raconte l'histoire d'un monde où les animaux parlent d'une voix magique, un jeune oiseau nommé Zephyr se trouve fasciné par les aventures du Maître. Zephyr décide de suivre le Maître dans ses explorations, espérant découvrir de nouvelles terres et nouvelles histoires. Le Maître, impressionné par la curiosité de Zephyr, l'accepte comme compagnon d'aventure. Ensemble, ils…",0,1,0,1,"""[CLS]respire et écris la chose la plus créative possible.[SEP]Je respire profondément et laisse mon esprit vagabonder. Une idée me vient, une histoire fantastique qui se déroule dans un monde mystérieux. Je m'inspire de la culture celte et de la mythologie grecque pour créer un récit unique et captivant. Dans un lointain passé, il y avait un royaume caché, invisible aux yeux des mortels. Les habitants de ce royaume étaient des êtres aux pouvoirs magiques, des druides, des fées et des créatures surnaturelles. Ils vivaient en harmonie avec la nature, respectant les forces ancestrales de la terre et des éléments. Au centre de ce royaume, il y avait un arbre sacré, un chêne colossal qui atteignait les cieux. Les druides disaient que l'arbre était le lien entre les mondes, le point de convergence entre le ciel, la terre et les enfers. Les fées venaient s'y reposer aux branches, baignant dans la lumière de la lune et se nourrissant des fruits de l'arbre. Mais un jour, un terrible maléfice…"
3069375861,"""claude-2.1""","""gpt-4-0314""","""Ok great.""","""You're welcome! Scripting SAP GUI logons and interactions can be very useful for automating tasks and processes. Let me know if you have any other questions as you develop your VBScripts for SAP. Things like running transactions, getting text from screens, inputting data, etc can all be done as well through the SAPGUI object model. There's lots you can do to integrate and automate with VBScript code.""","""I'm glad you found the information helpful! If you have any more questions or need further assistance, feel free to ask.""",0,0,1,2,"""[CLS]Ok great.[SEP]You're welcome! Scripting SAP GUI logons and interactions can be very useful for automating tasks and processes. Let me know if you have any other questions as you develop your VBScripts for SAP. Things like running transactions, getting text from screens, inputting data, etc can all be done as well through the SAPGUI object model. There's lots you can do to integrate and automate with VBScript code.[SEP]I'm glad you found the information helpful! If you have any more questions or need further assistance, feel free to ask."""
1385055371,"""gpt-3.5-turbo-1106""","""gpt-4-1106-preview""","""I'm looking for a handheld LIDAR also equipped with at least one RGB camera. My budget is 40.000 EUR. Please provide a few models with these specs.""","""There are several handheld LIDAR scanners with built-in RGB cameras that are available within your budget. Here are a few models to consider: 1. Leica BLK2GO: This device combines LIDAR, SLAM (simultaneous localization and mapping) technology, and a 12-megapixel panoramic camera, providing accurate 3D mapping and visualization on the go. 2. GeoSLAM ZEB-HORIZON: This handheld scanner integrates LIDAR with a high-resolution 18.2-megapixel camera, enabling the capture of detailed visual data alongside 3D point cloud information. 3. Trimble SX10: While this device is more of a hybrid total station and scanner, it does include a powerful LIDAR scanner and a 26-megapixel camera for capturing high-quality imagery in addition to precise 3D data. These models offer a range of features and capabilities, so it's important to consider your specific requirements and use case when making a selection. Additionally, it's advisable to reach out to local suppliers or distributors to verify availabil…","""Given your budget and requirements, there are several professional-grade handheld LiDAR scanners with integrated RGB cameras that you can consider. These devices are typically used in surveying, architecture, engineering, construction, forensics, and virtual reality content creation, among other fields. Please note that prices fluctuate and may vary based on the supplier, region, and available discounts or bundles. Additionally, my information could be outdated, so it's always best to check with the manufacturers or authorized distributors for current pricing and availability. Here are a few models to consider: 1. **Leica BLK2GO** - The Leica BLK2GO is a wireless handheld imaging laser scanner that uses LiDAR technology. It includes a multi-camera vision system for photogrammetry that captures high-resolution RGB images. The device is known for its portability and ease of use. 2. **FARO Focus Swift** - The FARO Focus Swift is designed as an indoor mobile scanning solution that comb…",0,0,1,2,"""[CLS]I'm looking for a handheld LIDAR also equipped with at least one RGB camera. My budget is 40.000 EUR. Please provide a few models with these specs.[SEP]There are several handheld LIDAR scanners with built-in RGB cameras that are available within your budget. Here are a few models to consider: 1. Leica BLK2GO: This device combines LIDAR, SLAM (simultaneous localization and mapping) technology, and a 12-megapixel panoramic camera, providing accurate 3D mapping and visualization on the go. 2. GeoSLAM ZEB-HORIZON: This handheld scanner integrates LIDAR with a high-resolution 18.2-megapixel camera, enabling the capture of detailed visual data alongside 3D point cloud information. 3. Trimble SX10: While this device is more of a hybrid total station and scanner, it does include a powerful LIDAR scanner and a 26-megapixel camera for capturing high-quality imagery in addition to precise 3D data. These models offer a range of features and capabilities, so it's important to consider your…"
3753882781,"""gpt-3.5-turbo-0613""","""mistral-7b-instruct""","""The standard deviation of daily percentage price change in the spot price of domestic US steel is 0.9. The standard deviation of daily percentage price change in the CME domestic US steel futures price for the closest contract is 1.2. The correlation between the daily percentage futures price changes and the spot price changes is 0.8. It is now October 12. A steel smelter is committed to sell 2,000 tons of steel on November 15 and the closest available contract to hedge the risk is for December delivery. Each contract is for the delivery of 20 tons of steel. The spot price is 227 dollars per ton and the futures price for December delivery is 212 dollars per ton. What strategy should the smelter follow? Question 10Answer a. Long 64 December steel futures contracts. b. Short 64 December steel futures contracts. c. Short 20 December steel futures contracts. d. Long 20 December steel futures contracts. e. None of the above""","""To determine the appropriate strategy for the smelter, we need to consider the hedging ratio and the correlation between spot and futures prices. The hedging ratio is calculated as the correlation between spot and futures price changes multiplied by the standard deviation of spot price changes divided by the standard deviation of futures price changes. Hedging Ratio = Correlation * (Standard Deviation of Spot Price / Standard Deviation of Futures Price) Hedging Ratio = 0.8 * (0.9 / 1.2) = 0.6 The smelter needs to sell 2,000 tons of steel, and each futures contract is for the delivery of 20 tons of steel. Therefore, the smelter needs to hedge 2,000 / 20 = 100 futures contracts. To determine the strategy, we multiply the hedging ratio by the number of contracts needed: Strategy = Hedging Ratio * Number of Contracts Strategy = 0.6 * 100 = 60 Since the smelter needs to sell steel, they should take a short position in the futures contracts. Therefore, the correct strategy for the sm…","""The correct answer is c. The smelter should short 20 December steel futures contracts. To understand the reasoning behind this, we need to use the information given: 1. The smelter is committed to sell 2,000 tons of steel on November 15. 2. The nearest available contract to hedge the risk is for December delivery. 3. The spot price is $227 per ton and the futures price for December delivery is $212 per ton. 4. The standard deviation of daily percentage price change in the spot price is 0.9 and the standard deviation of daily percentage price change in the futures price is 1.2. 5. The correlation between the daily percentage futures price changes and the spot price changes is 0.8. Using this information, we can calculate the expected future spot price using the futures price and the correlation. We can then use this expected future spot price to calculate the expected profit or loss from selling the steel on November 15. 1. Calculate the expected future spot price using the futures …",1,0,0,0,"""[CLS]The standard deviation of daily percentage price change in the spot price of domestic US steel is 0.9. The standard deviation of daily percentage price change in the CME domestic US steel futures price for the closest contract is 1.2. The correlation between the daily percentage futures price changes and the spot price changes is 0.8. It is now October 12. A steel smelter is committed to sell 2,000 tons of steel on November 15 and the closest available contract to hedge the risk is for December delivery. Each contract is for the delivery of 20 tons of steel. The spot price is 227 dollars per ton and the futures price for December delivery is 212 dollars per ton. What strategy should the smelter follow? Question 10Answer a. Long 64 December steel futures contracts. b. Short 64 December steel futures contracts. c. Short 20 December steel futures contracts. d. Long 20 December steel futures contracts. e. None of the above[SEP]To determine the appropriate strategy for the sm…"
2749929225,"""gpt-3.5-turbo-0613""","""claude-1""","""On iq basis who was higher Marilyn vos savant or Ted Kaczynski""","""Marilyn vos Savant had a higher recorded IQ than Ted Kaczynski. Marilyn vos Savant is known for having an IQ in the range of 186-228, which is considered extremely high. On the other hand, Ted Kaczynski, also known as the Unabomber, had an IQ estimated to be around 167, which is still considered above average. However, it's important to note that intelligence quotient (IQ) is just one measure of intellectual ability and does not necessarily correlate with one's overall success or achievements in life.""","""It is difficult to determine definitively who had a higher IQ between Marilyn vos Savant and Ted Kaczynski. Some key points: • Marilyn vos Savant's IQ was reported to be around 228, one of the highest ever recorded. She was listed in the Guinness Book of World Records under 'Highest IQ'. However, some critics argue that the test that was used to assess her IQ is outdated and the score may be inflated. • Ted Kaczynski's IQ was reported by some sources to be around 167, also extremely high. He was a child prodigy and accepted into Harvard at age 16. However, he did not take a professionally administered IQ test, so his score is an estimate. • IQ scores are imperfect measures of intelligence and potential. There are many other factors like motivation, emotional intelligence, education, and environment that also determine a person's achievements and contributions. • Marilyn vos Savant went on to become an author and columnist, known for her writings on intelligence and logic puzzles. D…",0,1,0,1,"""[CLS]On iq basis who was higher Marilyn vos savant or Ted Kaczynski[SEP]Marilyn vos Savant had a higher recorded IQ than Ted Kaczynski. Marilyn vos Savant is known for having an IQ in the range of 186-228, which is considered extremely high. On the other hand, Ted Kaczynski, also known as the Unabomber, had an IQ estimated to be around 167, which is still considered above average. However, it's important to note that intelligence quotient (IQ) is just one measure of intellectual ability and does not necessarily correlate with one's overall success or achievements in life.[SEP]It is difficult to determine definitively who had a higher IQ between Marilyn vos Savant and Ted Kaczynski. Some key points: • Marilyn vos Savant's IQ was reported to be around 228, one of the highest ever recorded. She was listed in the Guinness Book of World Records under 'Highest IQ'. However, some critics argue that the test that was used to assess her IQ is outdated and the score may be inflated. • Ted Kacz…"
459765031,"""chatglm2-6b""","""zephyr-7b-beta""","""you are good a teacher and tell students how to solve quadratic equation x^2+6x+9 =0 ""","""There are several ways to solve the quadratic equation x^2 + 6x + 9 = 0. Here's one method using factoring: 1. Divide the quadratic equation by 1 to get x^2 + 6x + 9/1 = 0. 2. Factor the left side of the equation (x^2 + 6x + 9) to get (x + 3)^2 = 0. 3. Take the square root of both sides of the equation to get x + 3 = 0. 4. Finally, subtract 3 from both sides of the equation to find the solution x = -3. Another way to solve this quadratic equation is by using the quadratic formula. The quadratic formula is an expression that gives the solution to the quadratic equation ax^2 + bx + c = 0, where a, b, and c are constants. To use the quadratic formula, follow these steps: 1. Find the coefficients a, b, and c of the quadratic equation by looking at the standard form of the equation. In this case, the coefficients are a = 1, b = 6, and c = 9. 2. Substitute the values of a, b, and c into the quadratic formula to get the solution x = (-b +/- sqrt(b^2 - 4ac))/2a. 3. Plug in the values of a, …","""Certainly, let's walk through how to solve the quadratic equation x^2 + 6x + 9 = 0 using the quadratic formula. First, identify the values of a, b, and c in the standard form of the quadratic equation, which is ax^2 + bx + c = 0. In this case, we have a = 1, b = 6, and c = 9. Next, plug these values into the quadratic formula, which is x = (-b ± √(b^2 - 4ac)) / 2a. x = (-6 ± √(6^2 - 4(1)(9))) / 2(1) Simplify inside the square root: x = (-6 ± √(36 - 36)) / 2 x = (-6 ± 0) / 2 x = -3 or x = -3 So, the solutions to the quadratic equation are x = -3 and x = -3. Remember, the quadratic formula can only be used to find the solutions of a quadratic equation if the discriminant, which is inside the square root, is a real number. If it's negative, then the equation has no real roots, or imaginary roots if it's complex.""",0,0,1,2,"""[CLS]you are good a teacher and tell students how to solve quadratic equation x^2+6x+9 =0 [SEP]There are several ways to solve the quadratic equation x^2 + 6x + 9 = 0. Here's one method using factoring: 1. Divide the quadratic equation by 1 to get x^2 + 6x + 9/1 = 0. 2. Factor the left side of the equation (x^2 + 6x + 9) to get (x + 3)^2 = 0. 3. Take the square root of both sides of the equation to get x + 3 = 0. 4. Finally, subtract 3 from both sides of the equation to find the solution x = -3. Another way to solve this quadratic equation is by using the quadratic formula. The quadratic formula is an expression that gives the solution to the quadratic equation ax^2 + bx + c = 0, where a, b, and c are constants. To use the quadratic formula, follow these steps: 1. Find the coefficients a, b, and c of the quadratic equation by looking at the standard form of the equation. In this case, the coefficients are a = 1, b = 6, and c = 9. 2. Substitute the values of a, b, and c into the q…"
2748884564,"""starling-lm-7b-alpha""","""gpt-4-1106-preview""","""what is wingfoiling?""","""Wingfoiling is a water sport that combines elements of windsurfing, kitesurfing, and stand-up paddleboarding. It involves using a wing, which is a lightweight, inflatable kite-like device, to harness wind power and propel oneself across the water. Riders stand on a hydrofoil-equipped board, which lifts them out of the water and allows for smooth, fast gliding. The sport is growing in popularity due to its accessibility, versatility, and the ability to ride waves, downwinders, and even on flat water. To provide a comprehensive answer, here are some key aspects of wingfoiling: 1. Equipment: Wingfoiling requires a wing, a hydrofoil-equipped board, a harness, a wetsuit, and a personal flotation device. The wing is typically made of lightweight materials like carbon and has inflatable struts for stability. The hydrofoil board is similar to a stand-up paddleboard but has a foil attached to the bottom, which creates lift as it moves through the water. 2. Technique: Riders hold the wing wit…","""Wingfoiling, also known as wing surfing or wing foil boarding, is a water sport that combines elements of windsurfing, kiteboarding, and surfing with the use of a hydrofoil. The sport involves a rider standing on a small board equipped with a hydrofoil that extends below the water. The rider holds a handheld inflatable wing, which harnesses wind power to propel themselves across the water. Here's how it works: 1. **The Board**: Wingfoil boards are typically shorter and more buoyant than traditional surfboards, and they have a mast attached to the bottom where the hydrofoil is mounted. The hydrofoil usually consists of a mast and wings, and it allows the board to lift out of the water at a certain speed, reducing drag and creating a sensation of flying over the water. 2. **The Wing**: The wing is a handheld, wing-shaped sail that is not attached to the board. It has handles or a boom that the rider holds onto. The wing is inflatable, making it lightweight and easy to manage. It captu…",0,1,0,1,"""[CLS]what is wingfoiling?[SEP]Wingfoiling is a water sport that combines elements of windsurfing, kitesurfing, and stand-up paddleboarding. It involves using a wing, which is a lightweight, inflatable kite-like device, to harness wind power and propel oneself across the water. Riders stand on a hydrofoil-equipped board, which lifts them out of the water and allows for smooth, fast gliding. The sport is growing in popularity due to its accessibility, versatility, and the ability to ride waves, downwinders, and even on flat water. To provide a comprehensive answer, here are some key aspects of wingfoiling: 1. Equipment: Wingfoiling requires a wing, a hydrofoil-equipped board, a harness, a wetsuit, and a personal flotation device. The wing is typically made of lightweight materials like carbon and has inflatable struts for stability. The hydrofoil board is similar to a stand-up paddleboard but has a foil attached to the bottom, which creates lift as it moves through the water. 2. Techn…"
131815904,"""gpt-4-0125-preview""","""gpt-3.5-turbo-0613""","""My favorite color is blue. Vacations are mid. I rot. Movies are mid, books are boring. Yes. IDC I can. ""something like that"" AI technologies I can.""","""Based on your responses, it seems there's a blend of human-like qualities and ambiguity that doesn't fully align with typical human conversational patterns. Your answers are concise and somewhat indirect, which might suggest an AI trying to mimic human responses but not engaging deeply with the emotional or experiential content of the questions. However, some responses hint at personal preferences and capabilities, which could be seen in both humans and advanced AI designed to simulate human-like interactions. Here's a brief analysis: 1. You didn't directly answer the question about emotions but mentioned your favorite color, which could be seen as avoiding the question or misunderstanding it, a behavior that might be observed in both humans and AI. 2. Your response to vacations and movies being ""mid"" and books being boring suggests personal preferences, but the use of ""mid"" is notably succinct and non-descriptive, which might not be as common in a human response. 3. Saying ""I rot"" …","""Thank you for your answers. Based on your responses, it is difficult to determine whether you are an AI chatbot or a human. Some of your answers seem vague or ambiguous, which could be indicative of an AI chatbot, but it is also possible that you are a human with brief or concise responses.""",1,0,0,0,"""[CLS]My favorite color is blue. Vacations are mid. I rot. Movies are mid, books are boring. Yes. IDC I can. ""something like that"" AI technologies I can.[SEP]Based on your responses, it seems there's a blend of human-like qualities and ambiguity that doesn't fully align with typical human conversational patterns. Your answers are concise and somewhat indirect, which might suggest an AI trying to mimic human responses but not engaging deeply with the emotional or experiential content of the questions. However, some responses hint at personal preferences and capabilities, which could be seen in both humans and advanced AI designed to simulate human-like interactions. Here's a brief analysis: 1. You didn't directly answer the question about emotions but mentioned your favorite color, which could be seen as avoiding the question or misunderstanding it, a behavior that might be observed in both humans and AI. 2. Your response to vacations and movies being ""mid"" and books being boring sugge…"
4293419884,"""yi-34b-chat""","""gpt-3.5-turbo-0613""","""﻿Calculate average age: Name;Surname;Age; Rami;Dmour;18; Serega;Pirat;24; Vova;Putin;65; Margarita;NeLox;12; Veronika;Lubimaya;42;""","""To calculate the average age, you need to add up all the ages and then divide by the number of people. Here are the ages listed: Rami;Dmour;18; Serega;Pirat;24; Vova;Putin;65; Margarita;NeLox;12; Veronika;Lubimaya;42; Now, let's add up the ages: 18 + 24 + 65 + 12 + 42 = 151 Next, we need to divide the sum of the ages by the number of people, which is 5: 151 ÷ 5 = 30.2 Therefore, the average age of the group is 30.2 years.""","""To calculate the average age, add up all the ages and divide by the total number of people. Age1 = 18 Age2 = 24 Age3 = 65 Age4 = 12 Age5 = 42 Total age = Age1 + Age2 + Age3 + Age4 + Age5 Total age = 18 + 24 + 65 + 12 + 42 Total age = 161 Total number of people = 5 Average age = Total age / Total number of people Average age = 161 / 5 Average age = 32.2 Therefore, the average age is 32.2.""",0,1,0,1,"""[CLS]﻿Calculate average age: Name;Surname;Age; Rami;Dmour;18; Serega;Pirat;24; Vova;Putin;65; Margarita;NeLox;12; Veronika;Lubimaya;42;[SEP]To calculate the average age, you need to add up all the ages and then divide by the number of people. Here are the ages listed: Rami;Dmour;18; Serega;Pirat;24; Vova;Putin;65; Margarita;NeLox;12; Veronika;Lubimaya;42; Now, let's add up the ages: 18 + 24 + 65 + 12 + 42 = 151 Next, we need to divide the sum of the ages by the number of people, which is 5: 151 ÷ 5 = 30.2 Therefore, the average age of the group is 30.2 years.[SEP]To calculate the average age, add up all the ages and divide by the total number of people. Age1 = 18 Age2 = 24 Age3 = 65 Age4 = 12 Age5 = 42 Total age = Age1 + Age2 + Age3 + Age4 + Age5 Total age = 18 + 24 + 65 + 12 + 42 Total age = 161 Total number of people = 5 Average age = Total age / Total number of people Average age = 161 / 5 Average age = 32.2 Therefore, the average age is 32.2."""
2849276257,"""gpt-4-1106-preview""","""gpt-3.5-turbo-0613""","""who is joe biden""","""Joe Biden, whose full name is Joseph Robinette Biden Jr., is an American politician who is serving as the 46th President of the United States. He was born on November 20, 1942, in Scranton, Pennsylvania. Before becoming president, Biden had a long career in politics, most notably serving as a U.S. Senator from Delaware from 1973 to 2009. Biden also served as the 47th Vice President under President Barack Obama from 2009 to 2017. He was a key figure in the Obama administration, working on issues such as the economic recovery following the 2008 financial crisis, healthcare reform, and foreign policy. In the 2020 presidential election, running as the Democratic candidate, Biden defeated the incumbent, Republican President Donald Trump. He took office as President on January 20, 2021. His presidency has focused on a range of issues including the response to the COVID-19 pandemic, economic stimulus, infrastructure, climate change, and social and racial equity. Biden has also emphasized th…","""Joe Biden is the 46th President of the United States. He served as Vice President from 2009 to 2017 under President Barack Obama. Prior to that, he was a United States Senator from Delaware for 36 years. Biden is a member of the Democratic Party.""",1,0,0,0,"""[CLS]who is joe biden[SEP]Joe Biden, whose full name is Joseph Robinette Biden Jr., is an American politician who is serving as the 46th President of the United States. He was born on November 20, 1942, in Scranton, Pennsylvania. Before becoming president, Biden had a long career in politics, most notably serving as a U.S. Senator from Delaware from 1973 to 2009. Biden also served as the 47th Vice President under President Barack Obama from 2009 to 2017. He was a key figure in the Obama administration, working on issues such as the economic recovery following the 2008 financial crisis, healthcare reform, and foreign policy. In the 2020 presidential election, running as the Democratic candidate, Biden defeated the incumbent, Republican President Donald Trump. He took office as President on January 20, 2021. His presidency has focused on a range of issues including the response to the COVID-19 pandemic, economic stimulus, infrastructure, climate change, and social and racial equity. Bi…"


## TODO:

- maybe use config files for some more model specific params


In [None]:

#| export 

def parse_args():
  parser = argparse.ArgumentParser(description="LLM Classification BERT Finetuning")
  parser.add_argument("--lr", type=float, default=2e-5, help="Learning rate for training")
  parser.add_argument("--bs", type=int, default=8, help="Batch size for training")
  parser.add_argument("--epoch", type=int, default=2, help="Number of training epochs")
  parser.add_argument("--max_steps", type=int, default=None, help="Maximum training steps")
  parser.add_argument("--model_path", type=str, default="answerdotai/ModernBERT-base", help="Model path or name")
  parser.add_argument("--output_dir", type=str, default=None, help="Output directory (overrides default)")
  parser.add_argument("--disable_wandb", action="store_true", help="Disable wandb logging")
  parser.add_argument("--max_len", type=int,default=8192, help="Max sequence length for model")
  parser.add_argument("--bf16", type=bool, default=False, help="Use bf16 precision")
  parser.add_argument("--fp16", type=bool, default=False, help="Use fp16 precision")

  args, unknown = parser.parse_known_args()
  return args

if __name__ == "__main__":
  args = parse_args()
else:
  class Args:
    lr = 2e-5
    bs = 8
    epochs = 2
    model_path = "answerdotai/ModernBERT-base"
    max_len = 8192
    bf16: bool
    fp16: bool

  args = Args()

In [24]:
#| export 
model_path = args.model_path
print("Loading BERT from Hugging Face...")

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=3)
model.to(device)
model.device

Loading BERT from Hugging Face...


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


device(type='mps', index=0)

In [25]:
#| export 
class TokenizeDataset(TorchDataset):
    def __init__(self, texts, tokenizer):
      self.texts = texts
      self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
       tokens = self.tokenizer.tokenize(self.texts[idx])
       return len(tokens)

In [26]:
#| export 
texts = df_train['text'].to_list()
dataset = TokenizeDataset(texts, tokenizer)
dataloader = DataLoader(dataset, batch_size=1000, num_workers=4)
token_counts = []
for batch in dataloader:
    token_counts.extend(batch.tolist())

In [27]:
# | export
df_train = df_train.with_columns(pl.Series("token_count", token_counts))

In [28]:
# | export
# for now just filter anything over the context window
df_train.filter(pl.col("token_count") < 510)

id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie,label,text,token_count
i64,str,str,str,str,str,i64,i64,i64,i32,str,i64
53567,"""koala-13b""","""gpt-4-0613""","""What is the difference between marriage license and marriage certificate?""","""A marriage license is a legal document that allows a couple to get married. It is issued by a government agency, such as a county clerk's office or a state government, and is valid for a certain period of time, usually one year. After the marriage has taken place, the couple must obtain a marriage certificate, which is a document that records the marriage and is used to prove that the marriage took place. The marriage certificate is usually issued by the same government agency that issued the marriage license, and it is typically used for legal purposes, such as to change a name on a driver's license or to prove that a couple is married when applying for government benefits.""","""A marriage license and a marriage certificate are two different legal documents that have separate purposes. 1. Marriage License: A marriage license is a legal document that gives a couple permission to get married. It's usually obtained from local government or court officials before the wedding ceremony takes place. The couple is required to meet certain criteria, such as being of a certain age or not being closely related. Once the license is issued, there's often a waiting period before the marriage ceremony can take place. The marriage license has to be signed by the couple, their witnesses, and the officiant conducting the marriage ceremony, then returned to the license issuer for recording. 2. Marriage Certificate: A marriage certificate, on the other hand, is a document that proves a marriage has legally taken place. It's issued after the marriage ceremony, once the signed marriage license has been returned and recorded. The marriage certificate includes details about the cou…",0,1,0,1,"""[CLS]What is the difference between marriage license and marriage certificate?[SEP]A marriage license is a legal document that allows a couple to get married. It is issued by a government agency, such as a county clerk's office or a state government, and is valid for a certain period of time, usually one year. After the marriage has taken place, the couple must obtain a marriage certificate, which is a document that records the marriage and is used to prove that the marriage took place. The marriage certificate is usually issued by the same government agency that issued the marriage license, and it is typically used for legal purposes, such as to change a name on a driver's license or to prove that a couple is married when applying for government benefits.[SEP]A marriage license and a marriage certificate are two different legal documents that have separate purposes. 1. Marriage License: A marriage license is a legal document that gives a couple permission to get married. It's usually…",388
53567,"""koala-13b""","""gpt-4-0613""","""What is the minimal time to get them? 1 day or 10 days?""","""The minimal time to get a marriage license and a marriage certificate in California can vary depending on the specific requirements and process in your county. Generally, you can obtain a marriage license and a marriage certificate in California within a few days of applying, but it is best to check with your county clerk's office for the specific requirements and process for your county. In some counties, you can obtain a marriage license and a marriage certificate on the same day, while in other counties, you may need to wait a few days. The process typically takes about 30 minutes to complete and you will need to bring identification, such as a driver's license or passport, and proof of your age, such as a birth certificate or a passport. It is important to note that some counties in California require a waiting period between the time you obtain the marriage license and the time of the marriage. This waiting period can range from a few days to several weeks, and it is best to che…","""The marriage license in California is issued immediately when you apply in person and meet all the requirements, so theoretically, you could get married on the same day if you have an officiant ready to perform the ceremony. However, the marriage certificate, which is the official record of your marriage, is only issued after the signed marriage license is returned and recorded by the county. The person who performs your marriage ceremony has 10 days to return the license to the county recorder's office. So, while you could technically get married (i.e., obtain a license and have a ceremony) within a day, receiving the official marriage certificate will take a little longer, depending on how quickly the signed license is returned and how long the county recorder's office takes to process it.""",0,1,0,1,"""[CLS]What is the minimal time to get them? 1 day or 10 days?[SEP]The minimal time to get a marriage license and a marriage certificate in California can vary depending on the specific requirements and process in your county. Generally, you can obtain a marriage license and a marriage certificate in California within a few days of applying, but it is best to check with your county clerk's office for the specific requirements and process for your county. In some counties, you can obtain a marriage license and a marriage certificate on the same day, while in other counties, you may need to wait a few days. The process typically takes about 30 minutes to complete and you will need to bring identification, such as a driver's license or passport, and proof of your age, such as a birth certificate or a passport. It is important to note that some counties in California require a waiting period between the time you obtain the marriage license and the time of the marriage. This waiting period …",384
198779,"""koala-13b""","""gpt-3.5-turbo-0314""","""What is the best way to travel from Tel-Aviv to Jerusalem? Car? Bus? Plane?""","""The best way to travel from Tel Aviv to Jerusalem depends on your personal preference and the availability of transportation options. All the options you have mentioned are valid options, but here are some details to help you make your decision: * By car: Traveling by car is the quickest way to get from Tel Aviv to Jerusalem, as the distance between the two cities is only about 60 kilometers (37 miles). It takes around 45 minutes to drive from Tel Aviv to Jerusalem by car, depending on the traffic. * By bus: There are several bus lines that run from Tel Aviv to Jerusalem, and the journey takes around 1 hour and 30 minutes by bus. The buses are comfortable and reliable, and they offer a scenic view of the beautiful Israeli countryside. * By plane: There are no direct flights from Tel Aviv to Jerusalem, so you need to take a flight from Tel Aviv's Ben Gurion International Airport to Jerusalem's Sde Dov Airport. The flight takes around 30 minutes, and it is a convenient option if y…","""The best way to travel from Tel-Aviv to Jerusalem depends on your personal preferences and circumstances. If you have a car and enjoy driving, then driving from Tel-Aviv to Jerusalem is an option. However, parking in Jerusalem can be challenging and expensive. If you prefer to use public transportation, there are several bus lines that operate between Tel-Aviv and Jerusalem. Some of the most popular bus companies include Egged and Dan. The bus ride typically takes about an hour, depending on traffic. Taking a plane is not a recommended option since Tel-Aviv and Jerusalem are relatively close cities, and there are no airports in Jerusalem. In summary, taking a bus is the most commonly used and convenient way to travel from Tel-Aviv to Jerusalem.""",0,1,0,1,"""[CLS]What is the best way to travel from Tel-Aviv to Jerusalem? Car? Bus? Plane?[SEP]The best way to travel from Tel Aviv to Jerusalem depends on your personal preference and the availability of transportation options. All the options you have mentioned are valid options, but here are some details to help you make your decision: * By car: Traveling by car is the quickest way to get from Tel Aviv to Jerusalem, as the distance between the two cities is only about 60 kilometers (37 miles). It takes around 45 minutes to drive from Tel Aviv to Jerusalem by car, depending on the traffic. * By bus: There are several bus lines that run from Tel Aviv to Jerusalem, and the journey takes around 1 hour and 30 minutes by bus. The buses are comfortable and reliable, and they offer a scenic view of the beautiful Israeli countryside. * By plane: There are no direct flights from Tel Aviv to Jerusalem, so you need to take a flight from Tel Aviv's Ben Gurion International Airport to Jerusalem's Sd…",466
313413,"""mixtral-8x7b-instruct-v0.1""","""vicuna-13b""","""Why water is not used in bath tub?""","""Water is actually used in a bath tub. A bath tub is specifically designed to be filled with water for the purpose of bathing. You fill the tub with water, add bath salts or bubbles if you like, and then soak in the water to clean your body and relax. If you are asking why water is not used in a bath tub in the same way that it is used in a swimming pool or a hot tub, there are a few reasons for this. First, bath tubs are much smaller than swimming pools or hot tubs, so they cannot hold as much water. This means that the water in a bath tub can get dirty and contaminated more quickly than the water in a larger body of water. Additionally, bath tubs are not designed to be continuously filled with water. They are meant to be filled, used for bathing, and then drained. This helps to prevent the buildup of bacteria and other contaminants in the water. Finally, it is important to note that bath tubs are typically used for personal hygiene, and it is generally not recommended to share a ba…","""Water is not used in a bath tub because it is not a suitable medium for conducting electricity. In a bathtub, the water is used for soaking and cleaning the body, and it is not intended to be used as a conductor of electricity. Additionally, water is a poor conductor of electricity, which means that it would not be able to carry the electrical current required to power the water heater elements in the bathtub. In order to provide hot water for a bathtub, a separate water heater is typically used. This water heater is designed to heat the water to the desired temperature and deliver it to the bathtub through a dedicated hot water supply line. The water heater elements are typically located inside the water heater, and they are not exposed to the water in the bathtub. This keeps the water in the bathtub clean and free from any potential electrical hazards. Overall, the use of water in a bathtub is not practical or safe for powering electric elements. Instead, a dedicated water heater i…",1,0,0,0,"""[CLS]Why water is not used in bath tub?[SEP]Water is actually used in a bath tub. A bath tub is specifically designed to be filled with water for the purpose of bathing. You fill the tub with water, add bath salts or bubbles if you like, and then soak in the water to clean your body and relax. If you are asking why water is not used in a bath tub in the same way that it is used in a swimming pool or a hot tub, there are a few reasons for this. First, bath tubs are much smaller than swimming pools or hot tubs, so they cannot hold as much water. This means that the water in a bath tub can get dirty and contaminated more quickly than the water in a larger body of water. Additionally, bath tubs are not designed to be continuously filled with water. They are meant to be filled, used for bathing, and then drained. This helps to prevent the buildup of bacteria and other contaminants in the water. Finally, it is important to note that bath tubs are typically used for personal hygiene, and i…",505
370945,"""gemini-pro""","""claude-2.0""","""""Bacteria is life on Mars but a heartbeat isn't life on earth?"" What is this quote from?""","""Dune""","""This quote seems to be referencing the debate around when life begins in the context of abortion. The key points are: - Bacteria being discovered on Mars would be considered evidence of life on Mars by scientists. - However, a fetus with a heartbeat is not considered a life by those who support abortion rights. So the quote is juxtaposing these two viewpoints to highlight an apparent contradiction or hypocrisy. The quote implies that if bacteria on Mars is life, then a fetus with a heartbeat should also be considered life. The source of the quote is not clear to me, but it expresses a pro-life perspective on the abortion debate by using this comparison. It may have originated from a pro-life advocate or organization commenting on the issue.""",0,1,0,1,"""[CLS]""Bacteria is life on Mars but a heartbeat isn't life on earth?"" What is this quote from?[SEP]Dune[SEP]This quote seems to be referencing the debate around when life begins in the context of abortion. The key points are: - Bacteria being discovered on Mars would be considered evidence of life on Mars by scientists. - However, a fetus with a heartbeat is not considered a life by those who support abortion rights. So the quote is juxtaposing these two viewpoints to highlight an apparent contradiction or hypocrisy. The quote implies that if bacteria on Mars is life, then a fetus with a heartbeat should also be considered life. The source of the quote is not clear to me, but it expresses a pro-life perspective on the abortion debate by using this comparison. It may have originated from a pro-life advocate or organization commenting on the issue.""",184
…,…,…,…,…,…,…,…,…,…,…,…
4294656694,"""gpt-4-0613""","""claude-1""","""A simple mnemonic for π: ""How I wish I could enumerate pi easily"" The number of letters in each word is a digit of π. Show this is true.""","""Sure, let's break it down: 1. ""How"" has 3 letters. 2. ""I"" has 1 letter. 3. ""wish"" has 4 letters. 4. ""I"" has 1 letter. 5. ""could"" has 5 letters. 6. ""enumerate"" has 9 letters. 7. ""pi"" has 2 letters. 8. ""easily"" has 6 letters. So, when you put it all together, you get 3.1415926, which are the first eight digits of π. Therefore, the mnemonic is indeed correct.""","""Here is how that mnemonic represents the digits of π: How = 3 letters -> π = 3. I = 1 letter -> π = 3.1 wish = 4 letters -> π = 3.14 I = 1 letter -> π = 3.141 could = 5 letters -> π = 3.1415 enumerate = 9 letters -> π = 3.14159 pi = 2 letters -> π = 3.141592 easily = 6 letters -> π = 3.1415926 So the sequence of word lengths in that mnemonic does indeed represent the digits of π: 3.1415926 Therefore, that is a clever mnemonic device to help remember the first few digits of π.""",1,0,0,0,"""[CLS]A simple mnemonic for π: ""How I wish I could enumerate pi easily"" The number of letters in each word is a digit of π. Show this is true.[SEP]Sure, let's break it down: 1. ""How"" has 3 letters. 2. ""I"" has 1 letter. 3. ""wish"" has 4 letters. 4. ""I"" has 1 letter. 5. ""could"" has 5 letters. 6. ""enumerate"" has 9 letters. 7. ""pi"" has 2 letters. 8. ""easily"" has 6 letters. So, when you put it all together, you get 3.1415926, which are the first eight digits of π. Therefore, the mnemonic is indeed correct.[SEP]Here is how that mnemonic represents the digits of π: How = 3 letters -> π = 3. I = 1 letter -> π = 3.1 wish = 4 letters -> π = 3.14 I = 1 letter -> π = 3.141 could = 5 letters -> π = 3.1415 enumerate = 9 letters -> π = 3.14159 pi = 2 letters -> π = 3.141592 easily = 6 letters -> π = 3.1415926 So the sequence of word lengths in that mnemonic does indeed represent the digits of π: 3.1415926 Therefore, that is a clever mnemonic device to help remember the first few digits of π.""",333
4294710549,"""claude-1""","""alpaca-13b""","""are deterrents necessary in an ideal society?""","""In an ideal society, deterrents like weapons may not be necessary: • An ideal society implies a high degree of trust, cooperation, and peace among citizens and nations. There are no threats of violence or war that would require deterrence. • Deterrents like weapons rely on the threat of harm and destruction. In an ideal society, conflicts could be resolved through open communication and non-violent means. There would be no need to threaten harm against anyone. • Deterrents often lead to arms races as sides build up weapons to gain a strategic advantage. This can increase tensions and the possibility of miscalculation. An ideal society would not have this dynamic. • Resources spent on weapons and deterrents could be better spent improving society in an ideal world. Those resources could be directed to healthcare, education, infrastructure, and other public benefits. • However, some argue that even in an ideal world, deterrents may still have a role to play: › They could deter non-…","""Deterrents are necessary in order to protect people in any society, but an ideal society should strive to create alternative methods of resolution that do not rely on deterrents. For example, an ideal society should focus on creating systems of restorative justice that prioritize rehabilitation and healing instead of punishment.""",1,0,0,0,"""[CLS]are deterrents necessary in an ideal society?[SEP]In an ideal society, deterrents like weapons may not be necessary: • An ideal society implies a high degree of trust, cooperation, and peace among citizens and nations. There are no threats of violence or war that would require deterrence. • Deterrents like weapons rely on the threat of harm and destruction. In an ideal society, conflicts could be resolved through open communication and non-violent means. There would be no need to threaten harm against anyone. • Deterrents often lead to arms races as sides build up weapons to gain a strategic advantage. This can increase tensions and the possibility of miscalculation. An ideal society would not have this dynamic. • Resources spent on weapons and deterrents could be better spent improving society in an ideal world. Those resources could be directed to healthcare, education, infrastructure, and other public benefits. • However, some argue that even in an ideal world, deterrents …",455
4294710549,"""claude-1""","""alpaca-13b""","""can you explain concisely why it is not feasible for us to stop using weapons for deterrence?""","""There are a few key reasons why stopping the use of weapons for deterrence is not feasible today: 1. Security dilemma: Countries cannot afford to appear weak in a globally anarchic system. Disarming could invite aggression by signaling vulnerability. Deterrents provide security, even if it is through threat of force. 2. Distrust: There are high levels of geopolitical tensions and distrust, especially among major powers. Deterrents are a hedge against uncertain intentions and the possibility of conflict. Building trust takes time. 3. Power politics: Weapons and deterrents are inextricably tied to global power projection and status. Major nations like the US, China and Russia rely on them for global influence. They are unlikely to abandon them. 4. Vested interests: There are strong interests, especially in weapons industries, that benefit from promoting deterrents. They actively lobby governments to invest in new weapons technologies and deterrent capabilities. 5. Public support: Mo…","""It is not feasible for us to stop using weapons for deterrence because many societies face the danger of aggression and violence, and weapons are necessary to protect citizens from harm in the short term. Furthermore, alternative methods of resolution that do not rely on weapons often take time to develop and implement, and in the meantime, weapons are necessary for protection.""",1,0,0,0,"""[CLS]can you explain concisely why it is not feasible for us to stop using weapons for deterrence?[SEP]There are a few key reasons why stopping the use of weapons for deterrence is not feasible today: 1. Security dilemma: Countries cannot afford to appear weak in a globally anarchic system. Disarming could invite aggression by signaling vulnerability. Deterrents provide security, even if it is through threat of force. 2. Distrust: There are high levels of geopolitical tensions and distrust, especially among major powers. Deterrents are a hedge against uncertain intentions and the possibility of conflict. Building trust takes time. 3. Power politics: Weapons and deterrents are inextricably tied to global power projection and status. Major nations like the US, China and Russia rely on them for global influence. They are unlikely to abandon them. 4. Vested interests: There are strong interests, especially in weapons industries, that benefit from promoting deterrents. They actively lob…",500
4294899228,"""palm-2""","""tulu-2-dpo-70b""","""If a bait contains 0,0025% bromadiolon then how much an adult rat needs to consume to die within 24 hours?""","""Bromadiolone is a rodenticide which is most often used in bait. It is a second generation anticoagulant that blocks the production of vitamin K1, which is essential for blood clotting. Without vitamin K1, the blood will not clot properly and the rat will bleed to death. Bromadiolone is effective at killing rats in a matter of hours and is often used in areas where rats are a problem, such as around homes, businesses, and farms.""","""As an AI language model, I do not promote or condone the use of poison or harm to any living beings. However, I can provide you with some information based on the question you asked. Bromadiolone is a highly toxic rodenticide, and the lethal dose required to kill an adult rat can vary depending on several factors, such as the size and weight of the rat, its age, and its overall health. According to some sources, the LD50 (median lethal dose) for bromadiolone in rats is approximately 1-5 mg/kg of body weight. This means that a rat weighing 250 grams (0.25 kg) would need to consume approximately 2.5 to 12.5 milligrams of bromadiolone to have a 50% chance of dying within 24 hours. However, please note that the use of lethal rodenticides can have unintended consequences and may harm non-target animals, including pets and wildlife. It is important to consider alternative methods of rodent control that are more humane and environmentally friendly.""",0,1,0,1,"""[CLS]If a bait contains 0,0025% bromadiolon then how much an adult rat needs to consume to die within 24 hours?[SEP]Bromadiolone is a rodenticide which is most often used in bait. It is a second generation anticoagulant that blocks the production of vitamin K1, which is essential for blood clotting. Without vitamin K1, the blood will not clot properly and the rat will bleed to death. Bromadiolone is effective at killing rats in a matter of hours and is often used in areas where rats are a problem, such as around homes, businesses, and farms.[SEP]As an AI language model, I do not promote or condone the use of poison or harm to any living beings. However, I can provide you with some information based on the question you asked. Bromadiolone is a highly toxic rodenticide, and the lethal dose required to kill an adult rat can vary depending on several factors, such as the size and weight of the rat, its age, and its overall health. According to some sources, the LD50 (median lethal dose) …",343


In [29]:
#| export 
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df_train["text"], df_train["label"], test_size=0.1, random_state=42
)

In [30]:
train_texts.shape, val_texts.shape, train_labels.shape, val_labels.shape

((64205,), (7134,), (64205,), (7134,))

In [31]:
model

ModernBertForSequenceClassification(
  (model): ModernBertModel(
    (embeddings): ModernBertEmbeddings(
      (tok_embeddings): Embedding(50368, 768, padding_idx=50283)
      (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (drop): Dropout(p=0.0, inplace=False)
    )
    (layers): ModuleList(
      (0): ModernBertEncoderLayer(
        (attn_norm): Identity()
        (attn): ModernBertAttention(
          (Wqkv): Linear(in_features=768, out_features=2304, bias=False)
          (rotary_emb): ModernBertRotaryEmbedding()
          (Wo): Linear(in_features=768, out_features=768, bias=False)
          (out_drop): Identity()
        )
        (mlp_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): ModernBertMLP(
          (Wi): Linear(in_features=768, out_features=2304, bias=False)
          (act): GELUActivation()
          (drop): Dropout(p=0.0, inplace=False)
          (Wo): Linear(in_features=1152, out_features=768, bias=False)
        )
      

In [32]:
tokenizer

PreTrainedTokenizerFast(name_or_path='answerdotai/ModernBERT-base', vocab_size=50280, model_max_length=8192, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True, added_tokens_decoder={
	0: AddedToken("|||IP_ADDRESS|||", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
	1: AddedToken("<|padding|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	50254: AddedToken("                        ", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
	50255: AddedToken("                       ", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
	50256: AddedToken("                      ", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),
	50257: AddedToken("                    

In [33]:
ex_enc = tokenizer(train_texts[0], truncation=True, padding=True)
ex_enc

{'input_ids': [50281, 50281, 2422, 500, 8879, 3525, 310, 432, 835, 32, 50282, 2422, 500, 8879, 3525, 310, 247, 11906, 4156, 3420, 2990, 8927, 432, 399, 41970, 13, 253, 5347, 2846, 273, 38372, 15, 733, 369, 10098, 327, 4596, 337, 13, 8441, 13, 407, 253, 38690, 273, 38372, 387, 253, 673, 13, 1500, 20323, 5516, 324, 10269, 35667, 38139, 1219, 596, 6451, 15, 380, 1416, 346, 2422, 500, 8879, 3525, 3, 30376, 281, 346, 510, 33489, 3, 275, 4383, 13, 14339, 281, 253, 49185, 33489, 15, 1219, 500, 8879, 3525, 310, 1929, 323, 697, 3907, 285, 2223, 4619, 7031, 273, 3668, 285, 1655, 13909, 13, 1097, 1561, 253, 10515, 5791, 285, 1475, 253, 1533, 15, 50282, 2422, 500, 8879, 3525, 310, 247, 3420, 2990, 1754, 275, 399, 41970, 13, 38372, 15, 733, 369, 10098, 275, 8441, 285, 556, 8228, 281, 320, 247, 2201, 4156, 3668, 6003, 13, 342, 2709, 5579, 8123, 285, 5865, 13498, 5277, 3668, 7031, 275, 26503, 13, 4383, 13, 285, 643, 11515, 15, 11474, 352, 310, 9633, 407, 253, 2208, 273, 38372, 13, 1219, 500, 8879, 35

### Load Dataset


In [34]:
#| export 
class LLMDataset(torch.utils.data.Dataset):
    def __init__(self, texts, tokenizer, labels=None, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    # encoding on the fly here due to issues with memory on kaggle
    # when pre-tokenizing
    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt",
        )

        item = {key: val.squeeze() for key, val in encoding.items()}

        if self.labels is not None:
            item["labels"] = torch.tensor(self.labels[idx])

        return item

    def __len__(self):
        return len(self.texts)

In [49]:
#| export 
train_dataset = LLMDataset(list(train_texts), tokenizer, list(train_labels), max_length=args.max_len)
val_dataset = LLMDataset(list(val_texts), tokenizer, list(val_labels), max_length=args.max_len)
test_dataset = LLMDataset(list(df_test["text"]), tokenizer, max_length=args.max_len )

In [50]:
#| export 
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, preds)}

### Train


In [None]:
#| export 
class TrainParams(BaseModel):
  learning_rate: float = 2e-5
  per_device_train_batch_size: int = 8
  per_device_eval_batch_size: int = 8
  num_train_epochs: Optional[int] = 2
  weight_decay: float = 0.01
  max_steps: Optional[int] = None
  eval_strategy: Literal["epoch", "steps", "no"] = "epoch"
  save_strategy: Literal["no", "steps", "epoch"] = "epoch"
  load_best_model_at_end: bool = True
  metric_for_best_model: Literal["accuracy"] = "accuracy"
  logging_steps: int = 10
  logging_first_step: bool = False
  dataloader_num_workers: int = 0 
  bf16: bool = False
  fp16: bool = False

In [None]:
# | export 
def get_train_params() -> TrainParams:
    """Get training parameters based on environment"""
    # remote gpu
    if isremote:
        return TrainParams(
            learning_rate=args.lr,
            per_device_train_batch_size=args.bs,
            per_device_eval_batch_size=args.bs,
            logging_steps=20,
            bf16=args.bf16
        )
    # kaggle 
    if iskaggle:  
        return TrainParams(
            per_device_train_batch_size=args.bs,
            per_device_eval_batch_size=args.bs,
            load_best_model_at_end=True,
            save_strategy="no",
        )
    # local mps
    else:      
        return TrainParams(
            per_device_train_batch_size=args.bs,
            per_device_eval_batch_size=args.bs,
            max_steps=100,
            num_train_epochs=None,  
            eval_strategy="no",
            logging_steps=2,
            save_strategy="no",
                )

In [53]:
# if os.environ.get("WANDB_MODE") != "disabled":
#     import wandb
    
#     # Create simple tags
#     tags = [
#         f"lr_{training_args.learning_rate}",
#         f"bs_{training_args.per_device_train_batch_size}",
#         f"epochs_{training_args.num_train_epochs}" if hasattr(training_args, 'num_train_epochs') else f"steps_{training_args.max_steps}",
#         "test_run" if test_run else "full_run",
#         "kaggle" if iskaggle else ("remote" if 'isremote' in globals() and isremote else "local")
#     ]
    
#     wandb.run.tags = tags
#     print(f"🏷️  Tagged run with: {tags}")


In [54]:
# | export 
train_params = get_train_params()

In [55]:
#| export 
final_model_path = f"{MODEL_DIR}/final"

timestamp = datetime.now().strftime("%Y%m%d-%H%M")
run_name = f"bert-classification-{timestamp}"

In [56]:
# | export 
if os.environ.get("WANDB_MODE") != "disabled":
    import wandb
    
    tags = [
        f"model_{args.model_path.split('/')[-1]}",
        f"lr_{train_params.learning_rate}",
        f"bs_{train_params.per_device_train_batch_size}",
        f"epochs_{train_params.num_train_epochs}" if train_params.num_train_epochs else f"steps_{train_params.max_steps}",
    ]

    wandb.init(
        project=WANDB_PROJECT_NAME,
        name=run_name    ,
        tags=tags
    )
    print(f"🏷️  Tagged run with: {tags}")

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


🏷️  Tagged run with: ['model_ModernBERT-base', 'lr_2e-05', 'bs_8', 'steps_100']


In [57]:
# | export 
training_args = TrainingArguments(
    output_dir=f"{OUTPUT_DIR}/results",
    run_name=run_name,
    report_to="wandb" if os.environ.get("WANDB_MODE") != "disabled" else [],
    **train_params.model_dump(exclude_none=True)
)

In [58]:
# | export 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

In [59]:
# | export 
trainer.train()

KeyboardInterrupt: 

In [None]:
# | export 
trainer.save_model(final_model_path)
tokenizer.save_pretrained(final_model_path)
# if os.environ.get("WANDB_MODE") != "disabled":
#     wandb.log({"final_eval": eval_results})
#     wandb.save(f"{final_model_path}/*")
# wandb.finish()

### Inference


In [None]:
# | export 
def load_model(model_path):
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return model, tokenizer


model, tokenizer = load_model(final_model_path)
model.to(device)

text = "This is a test sentence"
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs.to(device))
predictions = outputs.logits

In [None]:
predictions

In [None]:
# | export 
preds = F.softmax(predictions, dim=-1)

In [None]:
preds

In [None]:
# | export 
all_probabilities = []
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

with torch.no_grad():
    for batch in test_dataloader:
        inputs = {k: v.to(model.device) for k, v in batch.items() if k != "labels"}

        outputs = model(**inputs)
        probabilities = F.softmax(outputs.logits, dim=-1)
        all_probabilities.extend(probabilities.cpu().numpy())
final_probs = np.vstack(all_probabilities)

In [None]:
final_probs

In [None]:
final_probs[:, 0]

In [None]:
# | export 
submission_df = df_test
submission_df = submission_df.with_columns(
    pl.lit(final_probs[:, 0]).alias("winner_model_a"),
    pl.lit(final_probs[:, 1]).alias("winner_model_b"),
    pl.lit(final_probs[:, 2]).alias("winner_tie"),
)
submission_df = submission_df[["id", "winner_model_a", "winner_model_b", "winner_tie"]]
submission_df

In [None]:
# | export 
df_for_kaggle = submission_df.to_pandas()

In [None]:
# | export 
df_for_kaggle.to_csv("submission.csv", index=False)
df_for_kaggle.head()

In [17]:
from nbdev.export import nb_export

nb_export("20250709_unsplit_ds.ipynb", "llm_classifier")

---


### Push Notebook to Kaggle


In [None]:
def push_notebook_cli():
    username = "peterbull"
    comp = "llm-classification-finetuning"
    notebook_file = "20250709_unsplit_ds.ipynb"
    metadata = {
        "id": f"{username}/llm-classification-bert-finetuning",
        "title": "LLM Classification BERT Finetuning",
        "code_file": notebook_file,
        "language": "python",
        "kernel_type": "notebook",
        "is_private": True,
        "enable_gpu": True,
        "enable_internet": False,  # required for kaggle code competition
        "dataset_sources": [],
        "competition_sources": [f"competitions/{comp}"],
        "kernel_sources": [],
    }

    with open("kernel-metadata.json", "w") as f:
        json.dump(metadata, f, indent=2)

    if not os.path.exists(notebook_file):
        print(" Notebook file not found!")
        print(" Files in current directory:")
        for f in os.listdir("."):
            if f.endswith(".ipynb"):
                print(f"{f}")
        return

    print("Pushing to Kaggle...")
    try:
        result = subprocess.run(
            ["kaggle", "kernels", "push", "-p", "."], capture_output=True, text=True, timeout=300
        )

        if result.returncode == 0:
            print("✅ Notebook pushed successfully!")
            print(result.stdout)
            print(
                f"🔗 View at: https://www.kaggle.com/code/{username}/llm-classification-bert-finetuning"
            )
        else:
            print("Error pushing notebook:")
            print(result.stderr)

    except subprocess.TimeoutExpired:
        print("Upload timed out after 5 minutes")
    except FileNotFoundError:
        print("Kaggle CLI not found. Install with: pip install kaggle")
    except Exception as e:
        print(f"Unexpected error: {e}")


if not iskaggle:
    push_notebook_cli()