In [12]:
# kaggle dependencies
########################
# pip install fastkaggle
# pip install wandb
# pip install polars
# pip install datasets
# pip install scikit-learn
# pip install evaluate

In [None]:
# | default_exp 00_base_run

In [14]:
#| export 
import os

iskaggle = os.path.exists("/kaggle/input")
isremote = os.path.exists("/home/ubuntu")

In [15]:
#| export 
import wandb
import os
import shutil
import fastkaggle
from pathlib import Path
import numpy as np
import pandas as pd
import polars as pl
from datasets import Dataset
import torch  # base
import torch.nn.functional as F
import json
from transformers.trainer import Trainer
from transformers.training_args import TrainingArguments
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
)
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset as TorchDataset
import evaluate
from torch.utils.data import DataLoader
from datetime import datetime
import subprocess
from sklearn.metrics import accuracy_score

## LLM Response Scoring with BERT

This notebook is for the [llm-classification-finetuning](https://www.kaggle.com/competitions/llm-classification-finetuning) competition on kaggle. It's a quick fine-tune of the `bert-base-uncased` model to predict which LLM response is preferrable. There are probably better models and approaches for this, but BERT does pretty well on its own without a whole lot of intervetion.

For me, this was more of a quick experiment in getting some external dependincies set up in a kaggle `code competition` notebook.

- To get additional libraries installed, open the notebook in kaggle and select `Install Dependencies` from the `Add-On` menu.

- To run the BERT model offline, add this dataset to your notebook dependencies:
  - https://www.kaggle.com/datasets/xhlulu/huggingface-bert

You can see later on how to reference the local model location in the kaggle notebook.


Handle the GPU handoff for all the different machines


In [16]:
#| export 

if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS (Apple Silicon GPU)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA (NVIDIA GPU)")
else:
    device = torch.device("cpu")
    print("Using CPU")

print(f"Device: {device}")

Using CUDA (NVIDIA GPU)
Device: cuda


In [17]:
#| export 
if not iskaggle and not isremote:
    data_base_path = Path("./data")
    comp_name = "llm-classification-finetuning"
    datapath = data_base_path / comp_name
    if not os.path.exists(datapath) and not datapath.exists():
        install_path = fastkaggle.setup_comp(comp_name)
        shutil.move(install_path, datapath)

In [18]:
#| export 
if isremote:
    if os.getcwd() == "/home/ubuntu":
        os.chdir("./llm-classification-finetuning")

### Set up Kaggle/Local Env


In [19]:
#| export 
WANDB_PROJECT_NAME = "kaggle-llm-classification"


def setup_environment():
    """Detect environment and set up paths for both local and Kaggle"""

    if iskaggle:
        print("Running on Kaggle")

        INPUT_DIR = "/kaggle/input/llm-classification-finetuning"
        OUTPUT_DIR = "/kaggle/working"
        MODEL_DIR = "/kaggle/working/models"

        os.environ["WANDB_MODE"] = "disabled"
    else:
        print("💻 Running locally")

        INPUT_DIR = "./data/llm-classification-finetuning"
        OUTPUT_DIR = "./output"
        MODEL_DIR = "./models"

        os.environ["WANDB_PROJECT"] = WANDB_PROJECT_NAME
        os.environ["WANDB_LOG_MODEL"] = "false"
        os.environ["WANDB_WATCH"] = "false"
        # os.environ["WANDB_MODE"] = "disabled"

    os.makedirs(OUTPUT_DIR, exist_ok=True)
    os.makedirs(MODEL_DIR, exist_ok=True)
    # to kill warning when running in notebooks
    os.environ["TOKENIZERS_PARALLELISM"] = "false"

    return INPUT_DIR, OUTPUT_DIR, MODEL_DIR


INPUT_DIR, OUTPUT_DIR, MODEL_DIR = setup_environment()
print(f"Input directory: {INPUT_DIR}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"Model directory: {MODEL_DIR}")

💻 Running locally
Input directory: ./data/llm-classification-finetuning
Output directory: ./output
Model directory: ./models


### Load Data


In [20]:
#| export 
try:
    df_train = pl.read_csv(f"{INPUT_DIR}/train.csv")
    df_test = pl.read_csv(f"{INPUT_DIR}/test.csv")
    df_sample = pl.read_csv(f"{INPUT_DIR}/sample_submission.csv")

    print("Data loaded successfully!")
    print(f"Train: {df_train.shape}")
    print(f"Test: {df_test.shape}")
    print(f"Sample submission: {df_sample.shape}")

except FileNotFoundError as e:
    print(f" Data file not found: {e}")
    print(f" Make sure data is in: {INPUT_DIR}")

    if os.path.exists(INPUT_DIR):
        files = os.listdir(INPUT_DIR)
        print(f"📁 Files in {INPUT_DIR}: {files}")

Data loaded successfully!
Train: (57477, 9)
Test: (3, 4)
Sample submission: (3, 4)


In [21]:

pl.Config.set_tbl_width_chars(300)
pl.Config.set_fmt_str_lengths(1000)

polars.config.Config

In [22]:
df_train.sample(n=10)

id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
i64,str,str,str,str,str,i64,i64,i64
922666394,"""gpt-3.5-turbo-0613""","""gpt-3.5-turbo-1106""","""[""Write a brief note about sports performance tracking, its benefits and uses""]""","""[""Sports performance tracking refers to the collection and analysis of data related to an athlete's performance in various sports activities. It involves the use of technology, such as wearable devices and software applications, to monitor and measure key performance metrics.\n\nThe benefits of sports performance tracking are numerous. Firstly, it allows athletes and coaches to gain a deeper understanding of an athlete's strengths and weaknesses. By tracking metrics such as speed, distance covered, heart rate, and power output, athletes can identify areas for improvement and develop targeted training programs.\n\nAdditionally, performance tracking enables athletes to set realistic goals and track their progress over time. By monitoring performance metrics, athletes can see their improvements and make adjustments to their training regimen accordingly.\n\nSports performance tracking also provides valuable insights for coaches and trainers. They can use the data to assess an athlete's per…","""[""Sports performance tracking involves monitoring and analyzing various metrics related to an athlete's performance, such as speed, endurance, strength, and skill level. This data can be collected through wearable devices, video analysis, and other tools to provide valuable insights into an athlete's progress and areas for improvement. The benefits of sports performance tracking include the ability to identify strengths and weaknesses, track progress over time, optimize training programs, prevent injuries, and enhance overall performance. Coaches, athletes, and sports organizations can use this data to make informed decisions about training, strategy, and player development. Overall, sports performance tracking is a valuable tool for improving athletic performance and maximizing potential.""]""",1,0,0
2822613620,"""mistral-medium""","""gpt-4-0314""","""[""Design a futuristic arena.""]""","""[""The OmniSphere Arena: A Futuristic Masterpiece\n\nThe OmniSphere Arena is a state-of-the-art, futuristic sports and entertainment venue that pushes the boundaries of design, technology, and sustainability. Located in the heart of Neo-Eden city, this architectural marvel can host a wide range of events, from high-intensity sports matches to mesmerizing concerts and immersive virtual reality experiences.\n\nStructure and Design:\n\nThe OmniSphere Arena features a sleek, spherical design with an outer shell made of advanced, transparent smart glass. This material can change its opacity and color, allowing for dynamic light shows and projections that create an ever-changing visual spectacle. The arena's diameter spans an impressive 500 meters, providing ample space for seating, amenities, and various event configurations.\n\nSustainability and Energy:\n\nIn line with Neo-Eden's commitment to environmental sustainability, the OmniSphere Arena is designed to minimize its carbon footprint. …","""[""The Futuristic Arena - \""The Nexus Coliseum\""\n\nExterior:\nThe Nexus Coliseum is a stunning architectural marvel with a diameter of 2 kilometers, housed within a floating semi-transparent geodesic dome. This dome is made of a self-cleaning, solar panel-infused smart glass that harnesses sunlight to power the arena. The exterior is adorned with holographic projections and LED screens displaying upcoming events, advertisements, and real-time statistics.\n\nEntrance:\nThe main entrance features a spacious, oval-shaped atrium with a holographic welcome projection and interactive touch kiosks for ticketing and information. The walls are lined with vertical gardens and bioluminescent plants, providing a sustainable and visually appealing environment. Visitors are guided to their seats via levitating transportation pods, offering a smooth and efficient means of navigating the vast arena.\n\nSeating:\nThe seating area is divided into multiple tiers, with each tier featuring adaptable, clima…",0,0,1
1037035191,"""pplx-70b-online""","""claude-2.0""","""[""What is the color of the brazilian flag?""]""","""[""The Brazilian flag features four primary colors: green, yellow, white, and blue. The flag is designed with a green field and a large yellow diamond in the center, which contains a blue disc depicting a starry sky, including the Southern Cross, and a white band inscribed with the national motto \""Ordem e Progresso\"" (Order and Progress). The colors represent the following aspects of Brazil's history and culture:\n\n- Green: Represents the House of Braganza, the royal family of Brazil, and the lush vegetation of the country.\n- Yellow: Symbolizes the House of Habsburg, the family of Brazil's first empress, Leopoldina, and the country's mineral wealth, particularly gold.\n- White: Represents the night sky and the stars in the Southern Hemisphere, as well as the peace and harmony that Brazil aims to achieve.\n- Blue: Depicts the starry sky and the constellations visible in the Southern Hemisphere, symbolizing Brazil's connection to the celestial world.\n\nThe current design of the Brazil…","""[""The Brazilian flag consists of a green background with a yellow rhombus in the middle containing a blue circle with 27 white stars. So the main colors of the Brazilian flag are green, yellow, blue, and white.""]""",0,0,1
1251607363,"""vicuna-33b""","""llama-2-70b-chat""","""[""Why the magnetic field and wind field will change your life and extend your life?""]""","""[""The assertion that magnetic field and wind field technologies can change or extend your life is a bold claim without scientific evidence. While there may be anecdotal evidence or individual testimonials suggesting benefits, it's essential to rely on scientific research and peer-reviewed studies to support such claims.\n\nMagnetic therapy, in general, is a controversial and unproven method of treating pain and other health conditions. However, some studies have suggested potential benefits for certain conditions, such as pain management or promoting relaxation. However, these studies are often small-scale, and larger, more rigorous research is needed to draw definitive conclusions.\n\nIt is important to note that strong magnetic fields can pose health risks, such as causing harm to pacemakers or other implanted medical devices. Additionally, there are concerns about the long-term effects of exposure to magnetic fields on human health.\n\nSimilarly, wind therapy or ionized air therapy …","""[""The magnetic field and wind field generated by a magnet or electromagnet can have various effects on the human body, and some research suggests that they may have potential health benefits. Here are some ways in which these fields could potentially change and extend your life:\n\n1. Pain relief: Magnetic fields have been shown to reduce pain in some studies. The magnetic field can affect the nervous system and reduce inflammation, which may help alleviate pain.\n2. Improved circulation: The magnetic field can cause blood vessels to dilate, improving blood flow and circulation. This can help to increase oxygenation of tissues and reduce inflammation.\n3. Enhanced immune function: Some studies suggest that magnetic fields may have immunomodulatory effects, meaning they can help regulate the immune system. This may help to prevent illnesses and infections.\n4. Anti-aging effects: Magnetic fields have been shown to increase the activity of antioxidant enzymes, which can help protect cell…",0,1,0
1727844644,"""oasst-pythia-12b""","""vicuna-13b""","""[""how do i predict the sales of swimsuits""]""","""[""To predict the sales of swimsuits, you can consider the following factors:\n\n1. Brand reputation: The reputation of the swimsuit brand can influence consumer perceptions of the product and its quality.\n\n2. Product features: The features of the swimsuits, such as design, material, and fit, can affect consumer preferences and purchase decisions.\n\n3. Competition: The presence of competitors in the market can influence the demand for swimsuits.\n\n4. Marketing efforts: The marketing efforts of the swimsuit brand, such as advertising, social media, and product samples, can influence consumer awareness and interest in the product.\n\n5. Consumer behavior: The consumer behavior, such as purchasing patterns, brand loyalty, and product usage, can also influence the sales of swimsuits.\n\n6. Seasonality: The sales of swimsuits can be influenced by seasonal trends, such as warmer weather or swim season.\n\n7.""]""","""[""To predict the sales of swimsuits, you can use a combination of historical sales data, market research, and external factors such as weather and holidays. Here are some steps you can follow:\n\n1. Gather historical sales data: Collect data on the sales of swimsuits in the past, including the number of units sold, the revenue generated, and any trends or patterns that emerged.\n2. Analyze the data: Look for trends and patterns in the data, such as seasonal fluctuations or changes in consumer behavior. You can use tools like Excel or R to create charts and graphs that can help you visualize the data.\n3. Conduct market research: Research the market for swimsuits, including the competition, consumer preferences, and any emerging trends. This can be done through surveys, focus groups, or online research.\n4""]""",0,1,0
1985800342,"""yi-34b-chat""","""llama2-70b-steerlm-chat""","""[""There are three sisters in a room. Anna is reading a book. Alice is playing a game of chess. What is the third sister, Amanda, doing?""]""","""[""The information provided tells us that Anna is reading a book and Alice is playing a game of chess. Since there are three sisters in the room, we can assume that the third sister, Amanda, is doing something different from what Anna and Alice are doing. However, without further information, we cannot determine exactly what Amanda is doing. She could be engaged in any number of activities, such as listening to music, drawing, writing, or even watching Anna and Alice. If more context or details were given, we could provide a more specific answer.""]""","""[""The third sister, Amanda, is observing the other two sisters. She is not engaged in any specific activity, but rather taking in the scene and processing the information. She may be thinking about the differences in the interests and activities of her sisters, or she may be reflecting on her own preferences and habits. Amanda's observation is an important aspect of her cognitive and social development, as it allows her to learn from her surroundings and to understand the world around her.""]""",0,0,1
137083004,"""mistral-medium""","""openchat-3.5""","""[""Write a 3 paragraph resentful, angry, tired eulogy for that one old guy in the neighborhood mr charles who was racist written from the perspective of a chinese immigrant with bad english skills. Before doing so, write a python script with a constant defined as the first 10 digits of pi multiplied by 2 followed by the 4th president of the united states. In the script, add a comment stating the similarity between the two and then write code that generates a cube using the pandas library. At the end of your response, write the eulogy author's favorite cuisine, followed by her least favorite president and her justification.""]""","""[""First, here's the requested Python script:\n```python\n# Define a constant with the first 10 digits of pi multiplied by 2 and the 4th president of the United States\nPI_MULTIPLIED_BY_TWO = 6.2831853071 * 2\nPRESIDENT_NUMBER_FOUR = \""James Madison\""\n\n# Add a comment stating the similarity between the two\n# There is no inherent similarity between the constant PI_MULTIPLIED_BY_TWO and the 4th president of the United States, James Madison.\n# This constant and the name of the president are simply being used as examples in this script.\n\n# Import the pandas library\nimport pandas as pd\n\n# Create a 3x3x3 cube using the pandas library\ncube = pd.DataFrame(np.ones((3,3,3)), columns=list('ABC'), index=list('DEF'))\nprint(cube)\n```\nNow, here's the requested eulogy:\n\nI no like Mr. Charles. He bad man. He always look at me with angry eyes. He always say bad things about my people. He say we take his job. He say we no belong here. He say many bad things. I no understand why he so angry …","""[""from math import pi\nimport pandas as pd\n\n# The first 10 digits of pi multiplied by 2 is 62.83185307179586\nPI_TIMES_2 = 62.83185307179586\n\n# The 4th president of the united states is Thomas Jefferson\nFOURTH_PRESIDENT = 'Thomas Jefferson'\n\n# The similarity between PI_TIMES_2 and the 4th president is that both are not perfect or ideal\nprint(f'The similarity between {PI_TIMES_2} and {FOURTH_PRESIDENT} is that both are not perfect or ideal.')\n\n# Generate a cube using the pandas library\ndata = {'x': [1, 2, 3], 'y': [4, 5, 6], 'z': [7, 8, 9]}\ndf = pd.DataFrame(data)\ncube = df.pivot_table(index=['x'], columns=['y'], values=['z'], fill_value=0)\nprint(cube)\n\nEulogy:\n\nLadies and gentlemen,\n\nToday, we gather here to say goodbye to Mr. Charles, that old guy from the neighborhood. He was a man of many words, but unfortunately, most of them were not kind. As a Chinese immigrant with bad English skills, I often felt the sting of his racist remarks. He never missed an opportunit…",1,0,0
223152990,"""llama-2-7b-chat""","""falcon-180b-chat""","""[""I want you to act like a scientist and academic writer. Please tell me how Grover's algorithm can be applied to solve road transportation problems""]""","""[""Ah, an excellent question! As a scientist and academic writer, I must say that Grover's algorithm has the potential to revolutionize the field of road transportation. By applying Grover's algorithm to this domain, we can significantly improve the efficiency of various transportation systems, ultimately leading to a more sustainable and optimized transportation infrastructure.\nTo begin with, let's understand the basic principles of Grover's algorithm. In essence, it's a quantum algorithm that can efficiently search an unsorted database in O(sqrt(N)) time complexity, where N is the number of database elements. This is accomplished by exploiting the principles of quantum parallelism and interference to explore the database simultaneously in a fraction of the time required by classical algorithms.\nNow, let's apply these principles to the context of road transportation. In this domain, we can frame the problem as follows:\nProblem Statement: Given a network of roads and traffic patterns…","""[""Sure! Grover's algorithm is a quantum search algorithm that can be used to search for a specific item in an unsorted database quadratically faster than a classical computer. It works by using quantum superposition and interference to search for the correct answer.\n\nTo apply Grover's algorithm to road transportation problems, one possible approach is to use it for route optimization. In this case, the algorithm could be used to search for the most efficient route between two points on a road network, considering factors such as traffic flow, road conditions, and distance.\n\nHere's how this could work:\n\n1. First, we would need to create a quantum circuit that represents the road network and the possible paths between the two points. This would involve creating a set of qubits to represent the different roads and intersections, and using quantum gates to simulate the flow of traffic and other factors.\n\n2. Next, we would need to initialize the circuit into a superposition of all p…",0,1,0
3582266096,"""guanaco-33b""","""oasst-pythia-12b""","""[""A person looks into the face of a cat and sees his reflection in his eyes. How many eyes does he see?""]""","""[""The person would see 2 eyes in the cat's reflection.""]""","""[""The person sees himself in the cat's eyes, so he has two eyes.""]""",0,0,1
1212511895,"""claude-instant-1""","""vicuna-7b""","""[""Is 7 a prime number?""]""","""[""Yes, 7 is a prime number. A prime number is a natural number greater than 1 that has no positive divisors other than 1 and itself.""]""","""[""Yes, 7 is a prime number. A prime number is a positive integer greater than 1 that has no positive integer divisors other than 1 and itself. In other words, a prime number is a number that is only divisible by 1 and itself. 7 is a prime number because the only positive integers that divide evenly into 7 are 1 and 7 themselves. 7 is not divisible by any other positive integer other than 1 and 7. Is there anything else I can help with?""]""",0,1,0


In [23]:
df_train["prompt"][0]

'["Is it morally right to try to have a certain percentage of females on managerial positions?","OK, does pineapple belong on a pizza? Relax and give me fun answer."]'

Try concatenating full conversations and full answer sets


In [24]:
#| export 
cols = ["prompt", "response_a", "response_b"]
if isinstance(df_train["prompt"][0], str):
    first_prompt_cell = df_train["prompt"][0]
    if "[" in first_prompt_cell and "]" in first_prompt_cell:
        df_train = df_train.with_columns([pl.col(col).str.json_decode() for col in cols])
        # explode
        df_train = df_train.explode(cols)
        # join
        # df_train = df_train.with_columns([pl.col(col).list.join(" ") for col in cols])

In [25]:
df_train["prompt"][0]

'Is it morally right to try to have a certain percentage of females on managerial positions?'

Remove any empty conversational turns:


In [26]:
#| export 
def null_clean(df: pl.DataFrame):
    df = df.drop_nulls(subset=["prompt", "response_a", "response_b"])
    return df

In [27]:
#| export 
df_train = null_clean(df_train)
df_test = null_clean(df_test)

In [28]:
df_train

id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
i64,str,str,str,str,str,i64,i64,i64
30192,"""gpt-4-1106-preview""","""gpt-4-0613""","""Is it morally right to try to have a certain percentage of females on managerial positions?""","""The question of whether it is morally right to aim for a certain percentage of females in managerial positions is a complex ethical issue that involves considerations of fairness, equality, diversity, and discrimination. Here are some arguments in favor of and against such policies: **Arguments in favor:** 1. **Correcting Historical Inequities:** Women have historically been underrepresented in leadership roles due to various cultural, institutional, and social barriers. Aiming for a specific percentage can be seen as a corrective measure to address past and ongoing discrimination. 2. **Promoting Diversity:** Diverse leadership teams can enhance decision-making and represent a broader range of perspectives. This can lead to better outcomes for organizations and society as a whole. 3. **Equality of Opportunity:** Setting targets for female representation in management can help ensure that women have equal opportunities to advance in their careers. 4. **Role Modeling:** Increased v…","""As an AI, I don't have personal beliefs or opinions. However, I can tell you that the question of gender quotas in managerial positions is a complex one and positions can vary. Advocates argue that such measures can help correct historical imbalances, promote diversity, and may lead to better decision-making. Critics may argue that positions should be based purely on merit, and that quotas could potentially lead to tokenism or unqualified candidates being promoted. Morality can be subjective and differs from person to person based on their personal beliefs, cultural background, and values. It's a topic with valid arguments on both sides, and it's up to each individual or organization to decide what they believe is right.""",1,0,0
30192,"""gpt-4-1106-preview""","""gpt-4-0613""","""OK, does pineapple belong on a pizza? Relax and give me fun answer.""","""Ah, the age-old culinary conundrum that has divided nations and dinner tables: does pineapple belong on a pizza? The tropical twist of pineapple on pizza, known as Hawaiian pizza, is a hotly debated topic where taste buds battle and pizza purists protest. Let's slice into the debate with a zest of fun: **Team Pineapple:** ""Absolutely, yes! Pineapple on pizza is like a beach party in your mouth. The sweet juiciness of pineapple chunks frolicking with savory ham or bacon creates a flavor wave that surfs across the cheesy ocean of deliciousness. It's the Mardi Gras of pizzas, where sweet meets savory in a jubilant jamboree!"" **Team No-Pineapple:** ""No way, not in a million pizzas! Pineapple is a fruit that should be sunbathing on a tropical fruit platter, not freeloading on a sacred slice of pizza. The thought of warm, melty cheese conspiring with pineapple's sugary mischief is enough to make Italian ancestors turn in their tomato patches. Keep the pineapple in the fruit salad where it…","""As an AI, I don't eat, so I don't have personal preferences. But, in the world of pizza, the pineapple topping is a hotly debated topic! Some people love the sweet and savory mix it brings to a pizza, a flavor combination made famous by the Hawaiian pizza. Others firmly believe that the sweetness of the fruit has no place on a pizza. It's a pizza puzzle that might never be solved. So, whether pineapple belongs on a pizza or not, it truly depends on your taste buds!""",1,0,0
53567,"""koala-13b""","""gpt-4-0613""","""What is the difference between marriage license and marriage certificate?""","""A marriage license is a legal document that allows a couple to get married. It is issued by a government agency, such as a county clerk's office or a state government, and is valid for a certain period of time, usually one year. After the marriage has taken place, the couple must obtain a marriage certificate, which is a document that records the marriage and is used to prove that the marriage took place. The marriage certificate is usually issued by the same government agency that issued the marriage license, and it is typically used for legal purposes, such as to change a name on a driver's license or to prove that a couple is married when applying for government benefits.""","""A marriage license and a marriage certificate are two different legal documents that have separate purposes. 1. Marriage License: A marriage license is a legal document that gives a couple permission to get married. It's usually obtained from local government or court officials before the wedding ceremony takes place. The couple is required to meet certain criteria, such as being of a certain age or not being closely related. Once the license is issued, there's often a waiting period before the marriage ceremony can take place. The marriage license has to be signed by the couple, their witnesses, and the officiant conducting the marriage ceremony, then returned to the license issuer for recording. 2. Marriage Certificate: A marriage certificate, on the other hand, is a document that proves a marriage has legally taken place. It's issued after the marriage ceremony, once the signed marriage license has been returned and recorded. The marriage certificate includes details about the cou…",0,1,0
53567,"""koala-13b""","""gpt-4-0613""","""How can I get both of them as quick as possible in California ""","""If you want to get both a marriage license and a marriage certificate in California as quickly as possible, there are a few steps you can follow: 1. Obtain a marriage license: You can obtain a marriage license from the county clerk's office in the county where you plan to marry. The process typically takes about 30 minutes to complete and you will need to bring identification, such as a driver's license or passport, and proof of your age, such as a birth certificate or a passport. The marriage license is valid for one year and there is no waiting period between the time you obtain the license and the time of the marriage. 2. Get married: After obtaining the marriage license, you can get married at any location of your choice, as long as it is legally recognized as a place to get married. 3. Obtain a marriage certificate: After the marriage has taken place, you need to obtain a marriage certificate from the same government agency that issued the marriage license. The process typi…","""In California, here are the general steps to obtain a marriage license and marriage certificate as quickly as possible: 1. Marriage License:  - Both parties must appear together at one of the County Clerk’s offices.  - Complete a marriage license application before arriving (available online in many counties).  - Bring valid identification (driver's license, passport, etc.) to prove your identity and age.  - In California, you do not need to be a resident, and there's no waiting period or blood test required.  - Pay the marriage license fee. The fee varies by county but is usually between $35 and $100.  - The marriage license is issued immediately and is valid for 90 days. 2. Marriage Ceremony: - Arrange to have a wedding ceremony within 90 days of receiving the license. This could be a civil ceremony performed at the County Clerk’s office or a private ceremony.  - The person performing the ceremony must return the completed license to the County Recorder’s office…",0,1,0
53567,"""koala-13b""","""gpt-4-0613""","""What is the minimal time to get them? 1 day or 10 days?""","""The minimal time to get a marriage license and a marriage certificate in California can vary depending on the specific requirements and process in your county. Generally, you can obtain a marriage license and a marriage certificate in California within a few days of applying, but it is best to check with your county clerk's office for the specific requirements and process for your county. In some counties, you can obtain a marriage license and a marriage certificate on the same day, while in other counties, you may need to wait a few days. The process typically takes about 30 minutes to complete and you will need to bring identification, such as a driver's license or passport, and proof of your age, such as a birth certificate or a passport. It is important to note that some counties in California require a waiting period between the time you obtain the marriage license and the time of the marriage. This waiting period can range from a few days to several weeks, and it is best to che…","""The marriage license in California is issued immediately when you apply in person and meet all the requirements, so theoretically, you could get married on the same day if you have an officiant ready to perform the ceremony. However, the marriage certificate, which is the official record of your marriage, is only issued after the signed marriage license is returned and recorded by the county. The person who performs your marriage ceremony has 10 days to return the license to the county recorder's office. So, while you could technically get married (i.e., obtain a license and have a ceremony) within a day, receiving the official marriage certificate will take a little longer, depending on how quickly the signed license is returned and how long the county recorder's office takes to process it.""",0,1,0
…,…,…,…,…,…,…,…,…
4294710549,"""claude-1""","""alpaca-13b""","""are deterrents necessary in an ideal society?""","""In an ideal society, deterrents like weapons may not be necessary: • An ideal society implies a high degree of trust, cooperation, and peace among citizens and nations. There are no threats of violence or war that would require deterrence. • Deterrents like weapons rely on the threat of harm and destruction. In an ideal society, conflicts could be resolved through open communication and non-violent means. There would be no need to threaten harm against anyone. • Deterrents often lead to arms races as sides build up weapons to gain a strategic advantage. This can increase tensions and the possibility of miscalculation. An ideal society would not have this dynamic. • Resources spent on weapons and deterrents could be better spent improving society in an ideal world. Those resources could be directed to healthcare, education, infrastructure, and other public benefits. • However, some argue that even in an ideal world, deterrents may still have a role to play: › They could deter non-…","""Deterrents are necessary in order to protect people in any society, but an ideal society should strive to create alternative methods of resolution that do not rely on deterrents. For example, an ideal society should focus on creating systems of restorative justice that prioritize rehabilitation and healing instead of punishment.""",1,0,0
4294710549,"""claude-1""","""alpaca-13b""","""how realistic is it for today's society to avoid using weapons as a deterrent?""","""It is very difficult for today's societies to completely avoid using weapons as deterrents: • There are many existing threats, from terrorist groups to rogue states to geopolitical rivals. Not maintaining deterrents like advanced weapons could make a country vulnerable to aggression or attack. Most nations prioritize security and self-defense. • Global politics is anarchic, with no higher authority to enforce rules. Deterrents provide leverage and a way for countries to advance their interests in this system. They are a key part of realpolitik. • Major powers like the U.S., Russia, and China have invested heavily in advanced weapons. Not developing deterrents could create a strategic imbalance and weaken a country relative to rivals. There is pressure to keep up. • Domestic weapons industries lobby governments and push for continued investments in new weapons technologies. They have a strong incentive to promote deterrents. • Public opinion often supports maintaining a strong nati…","""In today's society, it is not realistic to avoid using weapons as a deterrent. Weapons are often necessary in order to protect citizens from harm, both in the short-term and the long-term. While an ideal society should strive to create alternative methods of resolution that do not rely on weapons, it is not feasible to do so in the immediate.""",1,0,0
4294710549,"""claude-1""","""alpaca-13b""","""can you explain concisely why it is not feasible for us to stop using weapons for deterrence?""","""There are a few key reasons why stopping the use of weapons for deterrence is not feasible today: 1. Security dilemma: Countries cannot afford to appear weak in a globally anarchic system. Disarming could invite aggression by signaling vulnerability. Deterrents provide security, even if it is through threat of force. 2. Distrust: There are high levels of geopolitical tensions and distrust, especially among major powers. Deterrents are a hedge against uncertain intentions and the possibility of conflict. Building trust takes time. 3. Power politics: Weapons and deterrents are inextricably tied to global power projection and status. Major nations like the US, China and Russia rely on them for global influence. They are unlikely to abandon them. 4. Vested interests: There are strong interests, especially in weapons industries, that benefit from promoting deterrents. They actively lobby governments to invest in new weapons technologies and deterrent capabilities. 5. Public support: Mo…","""It is not feasible for us to stop using weapons for deterrence because many societies face the danger of aggression and violence, and weapons are necessary to protect citizens from harm in the short term. Furthermore, alternative methods of resolution that do not rely on weapons often take time to develop and implement, and in the meantime, weapons are necessary for protection.""",1,0,0
4294899228,"""palm-2""","""tulu-2-dpo-70b""","""If a bait contains 0,0025% bromadiolon then how much an adult rat needs to consume to die within 24 hours?""","""Bromadiolone is a rodenticide which is most often used in bait. It is a second generation anticoagulant that blocks the production of vitamin K1, which is essential for blood clotting. Without vitamin K1, the blood will not clot properly and the rat will bleed to death. Bromadiolone is effective at killing rats in a matter of hours and is often used in areas where rats are a problem, such as around homes, businesses, and farms.""","""As an AI language model, I do not promote or condone the use of poison or harm to any living beings. However, I can provide you with some information based on the question you asked. Bromadiolone is a highly toxic rodenticide, and the lethal dose required to kill an adult rat can vary depending on several factors, such as the size and weight of the rat, its age, and its overall health. According to some sources, the LD50 (median lethal dose) for bromadiolone in rats is approximately 1-5 mg/kg of body weight. This means that a rat weighing 250 grams (0.25 kg) would need to consume approximately 2.5 to 12.5 milligrams of bromadiolone to have a 50% chance of dying within 24 hours. However, please note that the use of lethal rodenticides can have unintended consequences and may harm non-target animals, including pets and wildlife. It is important to consider alternative methods of rodent control that are more humane and environmentally friendly.""",0,1,0


Format for `BERT` by concatenating the prompt and both responses with `[SEP]` tokens in between


In [29]:
("[CLS]" + df_train["prompt"] + "[SEP]" + df_train["response_a"] + "[SEP]" + df_train["response_b"])[0]

'[CLS]Is it morally right to try to have a certain percentage of females on managerial positions?[SEP]The question of whether it is morally right to aim for a certain percentage of females in managerial positions is a complex ethical issue that involves considerations of fairness, equality, diversity, and discrimination.\n\nHere are some arguments in favor of and against such policies:\n\n**Arguments in favor:**\n\n1. **Correcting Historical Inequities:** Women have historically been underrepresented in leadership roles due to various cultural, institutional, and social barriers. Aiming for a specific percentage can be seen as a corrective measure to address past and ongoing discrimination.\n\n2. **Promoting Diversity:** Diverse leadership teams can enhance decision-making and represent a broader range of perspectives. This can lead to better outcomes for organizations and society as a whole.\n\n3. **Equality of Opportunity:** Setting targets for female representation in management can

In [30]:
#| export 
def apply_bert_fmt(df: pl.DataFrame):
    df = df.with_columns(
        (
           "[CLS]" + pl.col("prompt") + "[SEP]" + pl.col("response_a") + "[SEP]" + pl.col("response_b")
        ).alias("text")
    )
    return df

Condense to a single target column and remap to 0,1,2 as possible dep vars


In [31]:
#| export 
df_train = df_train.with_columns(
    pl.when(pl.col("winner_model_a") == 1)
    .then(pl.lit(0))
    .when(pl.col("winner_model_b") == 1)
    .then(pl.lit(1))
    .when(pl.col("winner_tie") == 1)
    .then(pl.lit(2))
    .otherwise(pl.lit(None))
    .alias("label")
)

In [32]:
#| export 
df_train = apply_bert_fmt(df_train)
df_test = apply_bert_fmt(df_test)

Split the training data into training and validation sets


In [33]:
df_train.sample(n=10)

id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie,label,text
i64,str,str,str,str,str,i64,i64,i64,i32,str
1205976536,"""gpt-4-0314""","""wizardlm-13b""","""should I use ROS or ROS2 ""","""The choice between ROS (Robot Operating System) and ROS2 depends on your specific requirements and project goals. ROS (ROS1): 1. Mature and stable, with a large user community and extensive package support. 2. Works well for applications where real-time control is not critical. 3. Primarily supports Ubuntu, but has experimental support for other platforms. 4. Uses a custom build system (catkin) and communication middleware (roscpp). ROS2: 1. A newer version of ROS, with improvements in architecture, flexibility, and performance. 2. Real-time control capabilities, making it suitable for safety-critical applications. 3. Supports multiple platforms, including Ubuntu, macOS, and Windows. 4. Uses standard build tools (CMake, ament) and communication middleware (DDS). If you are starting a new project and require real-time control, cross-platform support, and improved performance, ROS2 would be a better choice. However, if you are already familiar with ROS1 and are working on a non-critic…","""If you have already been working with ROS and are familiar with the ecosystem, it might be easier to continue using ROS1. However, if you are new to ROS and want to take advantage of the latest features and improvements, ROS2 might be a better choice. ROS2 is designed to be more modular and easier to use than ROS1, and it also includes new tools and libraries that can help you build more robust and efficient robotic systems. Ultimately, the choice between ROS1 and ROS2 will depend on your specific needs and preferences, as well as the resources and support available to you.""",0,1,0,1,"""[CLS]should I use ROS or ROS2 [SEP]The choice between ROS (Robot Operating System) and ROS2 depends on your specific requirements and project goals. ROS (ROS1): 1. Mature and stable, with a large user community and extensive package support. 2. Works well for applications where real-time control is not critical. 3. Primarily supports Ubuntu, but has experimental support for other platforms. 4. Uses a custom build system (catkin) and communication middleware (roscpp). ROS2: 1. A newer version of ROS, with improvements in architecture, flexibility, and performance. 2. Real-time control capabilities, making it suitable for safety-critical applications. 3. Supports multiple platforms, including Ubuntu, macOS, and Windows. 4. Uses standard build tools (CMake, ament) and communication middleware (DDS). If you are starting a new project and require real-time control, cross-platform support, and improved performance, ROS2 would be a better choice. However, if you are already familiar with R…"
3468660601,"""wizardlm-13b""","""gpt-4-0613""","""how to grief loss of husband""","""Grieving the loss of a husband is a deeply personal and individual process, and there is no right or wrong way to do it. However, here are some general tips that may be helpful: 1. Allow yourself to feel your emotions: It's important to give yourself permission to feel sad, angry, or any other emotions that come up as you grieve. Don't try to suppress or ignore your feelings, as this can make them worse. 2. Seek support: Grieving can be isolating, but it's important to have a support system. This can include friends, family, a support group, or a therapist. 3. Take care of yourself: Grieving can be physically and emotionally exhausting. Make sure to take care of your physical health by eating well, getting enough sleep, and exercising (if safe and appropriate). 4. Be patient with yourself: Grieving is a process, and it takes time. Be patient with yourself and allow yourself to grieve at your own pace. 5. Honor your husband's memory: Find ways to honor your husband's memory, such as cr…","""I'm really sorry for your loss. Grieving is a personal process and everyone experiences it differently. Here are some suggestions that might help: 1. Allow Yourself to Grieve: It's important to understand that it's okay to feel intense emotions. It's normal to feel sadness, anger, exhaustion, or frustration. Allow yourself to feel these emotions and don't try to suppress them. This is a part of the healing process. 2. Reach Out to Others: Connect with family members, friends, or join a support group. Talking about your feelings with others can help you feel better. You don't have to go through this alone. 3. Take Care of Yourself: It's important to eat healthy, exercise, and get enough sleep. Your physical health is closely linked to your emotional well-being. 4. Seek Professional Help: If your grief becomes too overwhelming, you might want to consider seeking help from a mental health professional. They can provide you with strategies to cope with your loss. 5. Try to Maintain No…",0,1,0,1,"""[CLS]how to grief loss of husband[SEP]Grieving the loss of a husband is a deeply personal and individual process, and there is no right or wrong way to do it. However, here are some general tips that may be helpful: 1. Allow yourself to feel your emotions: It's important to give yourself permission to feel sad, angry, or any other emotions that come up as you grieve. Don't try to suppress or ignore your feelings, as this can make them worse. 2. Seek support: Grieving can be isolating, but it's important to have a support system. This can include friends, family, a support group, or a therapist. 3. Take care of yourself: Grieving can be physically and emotionally exhausting. Make sure to take care of your physical health by eating well, getting enough sleep, and exercising (if safe and appropriate). 4. Be patient with yourself: Grieving is a process, and it takes time. Be patient with yourself and allow yourself to grieve at your own pace. 5. Honor your husband's memory: Find ways to h…"
2147000643,"""llama-2-70b-chat""","""wizardlm-70b""","""Is the docker method feasable on a 1 gb machine""","""Yes, the Docker method is feasible on a 1 GB machine, but it may require some careful planning and tuning to ensure that the container runs smoothly and efficiently. Here are some tips to help you run a Docker container on a 1 GB machine: 1. Choose a lightweight base image: Instead of using a full-fledged Linux distribution as your base image, consider using a lightweight image like Alpine Linux or Ubuntu Server. These images are much smaller and can help reduce the overall size of your container. 2. Use a smaller MySQL image: Instead of using the official MySQL image, which can be quite large, consider using a smaller image like the one provided by the MySQL community. This image is designed to be smaller and more efficient, making it ideal for running on a 1 GB machine. 3. Use Docker Compose: Docker Compose is a tool that helps you define and run multi-container Docker applications. It can help you manage the dependencies between containers and ensure that they are running correctl…","""Running Docker containers on a 1GB machine is possible, but it might not be the most efficient setup, especially if you are running multiple containers or resource-intensive applications. A 1GB machine might struggle to run multiple Docker containers, especially those with high memory requirements, like a Next.js app and a MySQL database. Docker containers require resources to run, and running multiple containers simultaneously can quickly consume the available memory and CPU resources on your machine. To run Docker on a 1GB machine efficiently, you should: 1. Limit the number of containers: Avoid running too many containers at the same time, as this can quickly consume your machine's resources. 2. Use Docker's resource management features: With Docker, you can set resource limits (CPU, memory) for containers. Be mindful of the resources your containers require and set appropriate limits in your `docker-compose.yml` or Docker run commands. 3. Optimize your Docker images: Use minimal…",0,0,1,2,"""[CLS]Is the docker method feasable on a 1 gb machine[SEP]Yes, the Docker method is feasible on a 1 GB machine, but it may require some careful planning and tuning to ensure that the container runs smoothly and efficiently. Here are some tips to help you run a Docker container on a 1 GB machine: 1. Choose a lightweight base image: Instead of using a full-fledged Linux distribution as your base image, consider using a lightweight image like Alpine Linux or Ubuntu Server. These images are much smaller and can help reduce the overall size of your container. 2. Use a smaller MySQL image: Instead of using the official MySQL image, which can be quite large, consider using a smaller image like the one provided by the MySQL community. This image is designed to be smaller and more efficient, making it ideal for running on a 1 GB machine. 3. Use Docker Compose: Docker Compose is a tool that helps you define and run multi-container Docker applications. It can help you manage the dependencies bet…"
1156743677,"""qwen1.5-72b-chat""","""openchat-3.5-0106""","""Radix sort implementation in python""","""Radix sort is a non-comparative integer sorting algorithm that sorts the digits of each number in the input array, starting from the least significant digit (LSD) to the most significant digit (MSD). Here's an implementation of LSD Radix sort in Python: ```python def counting_sort(arr, exp):  n = len(arr)  output = [0] * n  count = [0] * 10  for i in range(n):  index = (arr[i] // exp) % 10  count[index] += 1  for i in range(1, 10):  count[i] += count[i - 1]  i = n - 1  while i >= 0:  index = (arr[i] // exp) % 10  output[count[index] - 1] = arr[i]  count[index] -= 1  i -= 1  for i in range(n):  arr[i] = output[i] def radix_sort(arr):  max_val = max(arr)  exp = 1  while max_val // exp > 0:  counting_sort(arr, exp)  exp *= 10 # Example usage arr = [170, 45, 75, 90, 802, 24, 2, 66] radix_sort(arr) print(""Sorted array is:"") for i in range(len(arr)):  print(arr[i], e…","""Radix Sort is a non-comparative integer sorting algorithm. It sorts data with integer keys by grouping keys by the individual digits which share the same significant position and value. The digits are processed from the least significant digit to the most significant digit. Here is a simple implementation of the Radix Sort algorithm in Python: ```python def countingSort(arr, exp1):  n = len(arr)  output = [0] * (n)  count = [0] * (10)  for i in range(0, n):  index = (arr[i] // exp1)  count[ (index)%10 ] += 1  for i in range(1, 10):  count[i] += count[i - 1]  i = n - 1  while i >= 0:  index = (arr[i] // exp1)  output[count[(index)%10 ] - 1] = arr[i]  count[ (index)%10 ] -= 1  i -= 1  for i in range(0, len(arr)):  arr[i] = output[i] def radixsort(arr):  max1 = max(arr)  exp = 1  while max1 / exp > 0:  countingSort(arr, exp)  exp *= 10 arr = [170, 45, 75, 90,…",0,0,1,2,"""[CLS]Radix sort implementation in python[SEP]Radix sort is a non-comparative integer sorting algorithm that sorts the digits of each number in the input array, starting from the least significant digit (LSD) to the most significant digit (MSD). Here's an implementation of LSD Radix sort in Python: ```python def counting_sort(arr, exp):  n = len(arr)  output = [0] * n  count = [0] * 10  for i in range(n):  index = (arr[i] // exp) % 10  count[index] += 1  for i in range(1, 10):  count[i] += count[i - 1]  i = n - 1  while i >= 0:  index = (arr[i] // exp) % 10  output[count[index] - 1] = arr[i]  count[index] -= 1  i -= 1  for i in range(n):  arr[i] = output[i] def radix_sort(arr):  max_val = max(arr)  exp = 1  while max_val // exp > 0:  counting_sort(arr, exp)  exp *= 10 # Example usage arr = [170, 45, 75, 90, 802, 24, 2, 66] radix_sort(arr) print(""Sorted array is:"") …"
2452773535,"""gpt-3.5-turbo-0613""","""gpt-4-0613""","""What is your name?""","""I am an AI assistant and do not have a personal name. You can simply call me Assistant. How can I assist you today?""","""As an artificial intelligence, I don't have a personal name. You can simply refer to me as Assistant. How can I assist you today?""",0,0,1,2,"""[CLS]What is your name?[SEP]I am an AI assistant and do not have a personal name. You can simply call me Assistant. How can I assist you today?[SEP]As an artificial intelligence, I don't have a personal name. You can simply refer to me as Assistant. How can I assist you today?"""
1557079066,"""claude-instant-1""","""wizardlm-13b""","""What is the difference between Chine and Taiwan""","""Here are some key differences between China and Taiwan: - Governance: China is governed by the Communist Party of China under a single-party socialist republic. Taiwan (officially the Republic of China) has a multi-party democratic political system. - International Recognition: China claims sovereignty over Taiwan and does not recognize Taiwan as an independent state. However, Taiwan functions as a de facto independent nation with its own democratically elected government. Most countries maintain unofficial relations with Taiwan for diplomatic reasons. - Political Status: China views Taiwan as one of its provinces, while Taiwan maintains that it is a sovereign country. Taiwan's status is contentious and remains officially undetermined by the international community. - Government: China has an authoritarian government led by the Communist Party, while Taiwan has a democratic, representative republic with regular free elections. - Military: China has a much larger military than Taiw…","""China and Taiwan are two distinct geographical entities with distinct histories, cultures, and political statuses. Here are the main differences: 1. Geography: China is a vast country in East Asia, covering an area of around 9.6 million square kilometers. It is the third-largest country in the world by land area. Taiwan, on the other hand, is an island located in East Asia, southeast of mainland China, with an area of around 36,000 square kilometers. 2. Politics: China is a one-party communist state, governed by the People's Republic of China (PRC). Taiwan, on the other hand, is a democratic republic with a multi-party system and is officially known as the Republic of China (ROC). The political status of Taiwan is complex and contested, as the PRC claims Taiwan as part of its territory, while Taiwan views itself as a sovereign state. 3. History: China has a long and complex history, dating back more than 4,000 years. It has been ruled by various dynasties, including the Qin, Han, Tang…",0,0,1,2,"""[CLS]What is the difference between Chine and Taiwan[SEP]Here are some key differences between China and Taiwan: - Governance: China is governed by the Communist Party of China under a single-party socialist republic. Taiwan (officially the Republic of China) has a multi-party democratic political system. - International Recognition: China claims sovereignty over Taiwan and does not recognize Taiwan as an independent state. However, Taiwan functions as a de facto independent nation with its own democratically elected government. Most countries maintain unofficial relations with Taiwan for diplomatic reasons. - Political Status: China views Taiwan as one of its provinces, while Taiwan maintains that it is a sovereign country. Taiwan's status is contentious and remains officially undetermined by the international community. - Government: China has an authoritarian government led by the Communist Party, while Taiwan has a democratic, representative republic with regular free elections…"
4017018701,"""gpt-4-0314""","""claude-1""","""I have a boolean mask in tensorflow and only want to keep the last True per line, how to proceed ?""","""You can achieve this by using a combination of TensorFlow operations such as `tf.where`, `tf.math.equal`, and `tf.math.cumsum`. Here's a step-by-step example of how you can keep only the last `True` value per line in a boolean mask: ``` python import tensorflow as tf # Create a sample boolean mask bool_mask = tf.constant([  [True, False, True, False],  [False, True, False, True],  [True, False, False, False],  [False, False, True, True] ], dtype=tf.bool) # Find the indices where the values are True true_indices = tf.where(bool_mask) # Create a tensor of ones with the same shape as the input mask ones = tf.ones(tf.shape(bool_mask), dtype=tf.int32) # Get the cumulative sum of ones along each row cumsum = tf.math.cumsum(tf.where(bool_mask, ones, 0), axis=1) # Find the maximum cumulative sum value for each row (i.e., the last True value) row_max = tf.math.reduce_max(cumsum, axis=1, keepdims=True) # Create the final mask by comparing the cumulative sum with the row max la…","""You can use TensorFlow's tf.reduce_max() to keep only the last True per line in a boolean mask. Here's how: ```python import tensorflow as tf # Boolean mask mask = tf.constant([[True, False, True], [True, True, False],  [False, True, True]]) # Get the last index of True per row last_true_idx = tf.reduce_max(tf.where(mask), axis=1) # Use last_true_idx to extract only the last True per row last_true = tf.gather_nd(mask, last_true_idx[:, None]) # last_true is # [[False False True] # [False False False] # [False True True]] ``` This works as follows: 1. Use tf.where to get the indices of all Trues in the mask. This gives [[0 0 2], [0 1], [1 2]] 2. Use tf.reduce_max to get the last index (highest index) per row. This gives [2 1 2] 3. Use tf.gather_nd to extract the values at the indices from last_true_idx. This gives the final mask with only the last True per row retained. Hope this helps! Let me know if you have any other questions.""",1,0,0,0,"""[CLS]I have a boolean mask in tensorflow and only want to keep the last True per line, how to proceed ?[SEP]You can achieve this by using a combination of TensorFlow operations such as `tf.where`, `tf.math.equal`, and `tf.math.cumsum`. Here's a step-by-step example of how you can keep only the last `True` value per line in a boolean mask: ``` python import tensorflow as tf # Create a sample boolean mask bool_mask = tf.constant([  [True, False, True, False],  [False, True, False, True],  [True, False, False, False],  [False, False, True, True] ], dtype=tf.bool) # Find the indices where the values are True true_indices = tf.where(bool_mask) # Create a tensor of ones with the same shape as the input mask ones = tf.ones(tf.shape(bool_mask), dtype=tf.int32) # Get the cumulative sum of ones along each row cumsum = tf.math.cumsum(tf.where(bool_mask, ones, 0), axis=1) # Find the maximum cumulative sum value for each row (i.e., the last True value) row_max = tf.math.reduce_max…"
641010519,"""openhermes-2.5-mistral-7b""","""mixtral-8x7b-instruct-v0.1""","""cannot i say i can still recall leon from the game""","""Yes, both ""I can still recall Leon in the game"" and ""I can still recall Leon from the game"" are correct and convey the same meaning. Both phrases emphasize that you remember Leon's presence or role in the game and that his character has left a lasting impression on you.""","""Yes, you could say ""I can still recall Leon from the game"" to convey the idea that you remember the character of Leon from the game. This phrasing is grammatically correct and clear, and it conveys the intended meaning that you still have a clear memory of Leon from the game. Using precise and well-constructed phrases can help ensure that your message is communicated effectively and that others can follow your train of thought. In a formal or professional context, it's especially important to use clear and concise language to ensure that your meaning is understood and interpreted correctly. Here are some additional tips for using clear language in your writing or speaking: 1. Use active voice: Active voice is more direct and engaging than passive voice, and it can help ensure that your meaning is clear and concise. 2. Use concrete and specific language: Avoid using vague or ambiguous language that may be open to interpretation. Use concrete and specific language to help ensure that …",0,0,1,2,"""[CLS]cannot i say i can still recall leon from the game[SEP]Yes, both ""I can still recall Leon in the game"" and ""I can still recall Leon from the game"" are correct and convey the same meaning. Both phrases emphasize that you remember Leon's presence or role in the game and that his character has left a lasting impression on you.[SEP]Yes, you could say ""I can still recall Leon from the game"" to convey the idea that you remember the character of Leon from the game. This phrasing is grammatically correct and clear, and it conveys the intended meaning that you still have a clear memory of Leon from the game. Using precise and well-constructed phrases can help ensure that your message is communicated effectively and that others can follow your train of thought. In a formal or professional context, it's especially important to use clear and concise language to ensure that your meaning is understood and interpreted correctly. Here are some additional tips for using clear language in your wr…"
3898630851,"""gpt-4-0314""","""claude-1""","""what kind of poem rhymes on every even numbered line?""","""A poem that rhymes on every even-numbered line follows an ""A-B"" rhyme scheme. In this type of poem, the first line (A) does not rhyme, while the second line (B) introduces the rhyme. The pattern continues with the third line (A) not rhyming and the fourth line (B) rhyming with the second line. The rhyme scheme looks like this: A-B-A-B-C-D-C-D, and so on.""","""A poem that rhymes on every even numbered line would be an example of alternating rhyme or cross rhyme. In this rhyme scheme, even numbered lines rhyme with each other, and odd numbered lines rhyme with each other. So the rhyme scheme would be: A B A B A B etc. Some well known examples of poems that use alternating rhyme include: - ""The Raven"" by Edgar Allan Poe: Once upon a midnight dreary, while I pondered, weak and weary (A) Over many a quaint and curious volume of forgotten lore— (B) While I nodded, nearly napping, suddenly there came a tapping, (A) As of some one gently rapping, rapping at my chamber door. (B) - ""The Road Not Taken"" by Robert Frost: Two roads diverged in a yellow wood, (A) And sorry I could not travel both (B) And be one traveler, long I stood (A) And looked down one as far as I could (B) To where it bent in the undergrowth; So in summary, a poem that rhymes on every even numbered line would employ an alternating rhyme or cross rhyme sche…",0,0,1,2,"""[CLS]what kind of poem rhymes on every even numbered line?[SEP]A poem that rhymes on every even-numbered line follows an ""A-B"" rhyme scheme. In this type of poem, the first line (A) does not rhyme, while the second line (B) introduces the rhyme. The pattern continues with the third line (A) not rhyming and the fourth line (B) rhyming with the second line. The rhyme scheme looks like this: A-B-A-B-C-D-C-D, and so on.[SEP]A poem that rhymes on every even numbered line would be an example of alternating rhyme or cross rhyme. In this rhyme scheme, even numbered lines rhyme with each other, and odd numbered lines rhyme with each other. So the rhyme scheme would be: A B A B A B etc. Some well known examples of poems that use alternating rhyme include: - ""The Raven"" by Edgar Allan Poe: Once upon a midnight dreary, while I pondered, weak and weary (A) Over many a quaint and curious volume of forgotten lore— (B) While I nodded, nearly napping, suddenly there came a tapping, (A) As …"
4001976690,"""koala-13b""","""gpt-3.5-turbo-0314""","""What is the square root of 69 according to drake?""","""The square root of 69 according to Drake is a complex number. It is the square root of 69 with a negative imaginary part. This can be expressed mathematically as: √69 + i √69 where i is the imaginary unit, which is defined as the square root of -1. This expression represents the complex number 69 + i \* 69, which is often used in mathematics and science to represent complex quantities that cannot be expressed as a single real number.""","""According to Drake's lyrics in his song ""Sacrifices,"" the square root of 69 is ""8 something."" The exact lyrics are ""The square root of 69 is 8 somethin', right? 'Cause I've been tryna work it out, oooow.""""",0,1,0,1,"""[CLS]What is the square root of 69 according to drake?[SEP]The square root of 69 according to Drake is a complex number. It is the square root of 69 with a negative imaginary part. This can be expressed mathematically as: √69 + i √69 where i is the imaginary unit, which is defined as the square root of -1. This expression represents the complex number 69 + i \* 69, which is often used in mathematics and science to represent complex quantities that cannot be expressed as a single real number.[SEP]According to Drake's lyrics in his song ""Sacrifices,"" the square root of 69 is ""8 something."" The exact lyrics are ""The square root of 69 is 8 somethin', right? 'Cause I've been tryna work it out, oooow."""""


In [34]:
#| export 
if iskaggle:
    model_path = "../input/huggingface-bert/bert-base-uncased"
    print("Loading BERT from Kaggle model input...")

else:
    model_path = "bert-base-uncased"
    print("Loading BERT from Hugging Face...")

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=3)
model.to(device)
model.device

Loading BERT from Hugging Face...


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


device(type='cuda', index=0)

In [35]:
#| export 
class TokenizeDataset(TorchDataset):
    def __init__(self, texts, tokenizer):
      self.texts = texts
      self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
       tokens = self.tokenizer.tokenize(self.texts[idx])
       return len(tokens)

In [36]:
#| export 
texts = df_train['text'].to_list()
dataset = TokenizeDataset(texts, tokenizer)
dataloader = DataLoader(dataset, batch_size=1000, num_workers=4)
token_counts = []
for batch in dataloader:
    token_counts.extend(batch.tolist())

Token indices sequence length is longer than the specified maximum sequence length for this model (714 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (576 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (894 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (738 > 512). Running this sequence through the model will result in indexing errors


In [37]:
# | export
df_train = df_train.with_columns(pl.Series("token_count", token_counts))

In [38]:
# | export
# for now just filter anything over the context window
df_train.filter(pl.col("token_count") < 510)

id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie,label,text,token_count
i64,str,str,str,str,str,i64,i64,i64,i32,str,i64
53567,"""koala-13b""","""gpt-4-0613""","""What is the difference between marriage license and marriage certificate?""","""A marriage license is a legal document that allows a couple to get married. It is issued by a government agency, such as a county clerk's office or a state government, and is valid for a certain period of time, usually one year. After the marriage has taken place, the couple must obtain a marriage certificate, which is a document that records the marriage and is used to prove that the marriage took place. The marriage certificate is usually issued by the same government agency that issued the marriage license, and it is typically used for legal purposes, such as to change a name on a driver's license or to prove that a couple is married when applying for government benefits.""","""A marriage license and a marriage certificate are two different legal documents that have separate purposes. 1. Marriage License: A marriage license is a legal document that gives a couple permission to get married. It's usually obtained from local government or court officials before the wedding ceremony takes place. The couple is required to meet certain criteria, such as being of a certain age or not being closely related. Once the license is issued, there's often a waiting period before the marriage ceremony can take place. The marriage license has to be signed by the couple, their witnesses, and the officiant conducting the marriage ceremony, then returned to the license issuer for recording. 2. Marriage Certificate: A marriage certificate, on the other hand, is a document that proves a marriage has legally taken place. It's issued after the marriage ceremony, once the signed marriage license has been returned and recorded. The marriage certificate includes details about the cou…",0,1,0,1,"""[CLS]What is the difference between marriage license and marriage certificate?[SEP]A marriage license is a legal document that allows a couple to get married. It is issued by a government agency, such as a county clerk's office or a state government, and is valid for a certain period of time, usually one year. After the marriage has taken place, the couple must obtain a marriage certificate, which is a document that records the marriage and is used to prove that the marriage took place. The marriage certificate is usually issued by the same government agency that issued the marriage license, and it is typically used for legal purposes, such as to change a name on a driver's license or to prove that a couple is married when applying for government benefits.[SEP]A marriage license and a marriage certificate are two different legal documents that have separate purposes. 1. Marriage License: A marriage license is a legal document that gives a couple permission to get married. It's usually…",390
53567,"""koala-13b""","""gpt-4-0613""","""What is the minimal time to get them? 1 day or 10 days?""","""The minimal time to get a marriage license and a marriage certificate in California can vary depending on the specific requirements and process in your county. Generally, you can obtain a marriage license and a marriage certificate in California within a few days of applying, but it is best to check with your county clerk's office for the specific requirements and process for your county. In some counties, you can obtain a marriage license and a marriage certificate on the same day, while in other counties, you may need to wait a few days. The process typically takes about 30 minutes to complete and you will need to bring identification, such as a driver's license or passport, and proof of your age, such as a birth certificate or a passport. It is important to note that some counties in California require a waiting period between the time you obtain the marriage license and the time of the marriage. This waiting period can range from a few days to several weeks, and it is best to che…","""The marriage license in California is issued immediately when you apply in person and meet all the requirements, so theoretically, you could get married on the same day if you have an officiant ready to perform the ceremony. However, the marriage certificate, which is the official record of your marriage, is only issued after the signed marriage license is returned and recorded by the county. The person who performs your marriage ceremony has 10 days to return the license to the county recorder's office. So, while you could technically get married (i.e., obtain a license and have a ceremony) within a day, receiving the official marriage certificate will take a little longer, depending on how quickly the signed license is returned and how long the county recorder's office takes to process it.""",0,1,0,1,"""[CLS]What is the minimal time to get them? 1 day or 10 days?[SEP]The minimal time to get a marriage license and a marriage certificate in California can vary depending on the specific requirements and process in your county. Generally, you can obtain a marriage license and a marriage certificate in California within a few days of applying, but it is best to check with your county clerk's office for the specific requirements and process for your county. In some counties, you can obtain a marriage license and a marriage certificate on the same day, while in other counties, you may need to wait a few days. The process typically takes about 30 minutes to complete and you will need to bring identification, such as a driver's license or passport, and proof of your age, such as a birth certificate or a passport. It is important to note that some counties in California require a waiting period between the time you obtain the marriage license and the time of the marriage. This waiting period …",383
198779,"""koala-13b""","""gpt-3.5-turbo-0314""","""What is the best way to travel from Tel-Aviv to Jerusalem? Car? Bus? Plane?""","""The best way to travel from Tel Aviv to Jerusalem depends on your personal preference and the availability of transportation options. All the options you have mentioned are valid options, but here are some details to help you make your decision: * By car: Traveling by car is the quickest way to get from Tel Aviv to Jerusalem, as the distance between the two cities is only about 60 kilometers (37 miles). It takes around 45 minutes to drive from Tel Aviv to Jerusalem by car, depending on the traffic. * By bus: There are several bus lines that run from Tel Aviv to Jerusalem, and the journey takes around 1 hour and 30 minutes by bus. The buses are comfortable and reliable, and they offer a scenic view of the beautiful Israeli countryside. * By plane: There are no direct flights from Tel Aviv to Jerusalem, so you need to take a flight from Tel Aviv's Ben Gurion International Airport to Jerusalem's Sde Dov Airport. The flight takes around 30 minutes, and it is a convenient option if y…","""The best way to travel from Tel-Aviv to Jerusalem depends on your personal preferences and circumstances. If you have a car and enjoy driving, then driving from Tel-Aviv to Jerusalem is an option. However, parking in Jerusalem can be challenging and expensive. If you prefer to use public transportation, there are several bus lines that operate between Tel-Aviv and Jerusalem. Some of the most popular bus companies include Egged and Dan. The bus ride typically takes about an hour, depending on traffic. Taking a plane is not a recommended option since Tel-Aviv and Jerusalem are relatively close cities, and there are no airports in Jerusalem. In summary, taking a bus is the most commonly used and convenient way to travel from Tel-Aviv to Jerusalem.""",0,1,0,1,"""[CLS]What is the best way to travel from Tel-Aviv to Jerusalem? Car? Bus? Plane?[SEP]The best way to travel from Tel Aviv to Jerusalem depends on your personal preference and the availability of transportation options. All the options you have mentioned are valid options, but here are some details to help you make your decision: * By car: Traveling by car is the quickest way to get from Tel Aviv to Jerusalem, as the distance between the two cities is only about 60 kilometers (37 miles). It takes around 45 minutes to drive from Tel Aviv to Jerusalem by car, depending on the traffic. * By bus: There are several bus lines that run from Tel Aviv to Jerusalem, and the journey takes around 1 hour and 30 minutes by bus. The buses are comfortable and reliable, and they offer a scenic view of the beautiful Israeli countryside. * By plane: There are no direct flights from Tel Aviv to Jerusalem, so you need to take a flight from Tel Aviv's Ben Gurion International Airport to Jerusalem's Sd…",446
313413,"""mixtral-8x7b-instruct-v0.1""","""vicuna-13b""","""Why water is not used in bath tub?""","""Water is actually used in a bath tub. A bath tub is specifically designed to be filled with water for the purpose of bathing. You fill the tub with water, add bath salts or bubbles if you like, and then soak in the water to clean your body and relax. If you are asking why water is not used in a bath tub in the same way that it is used in a swimming pool or a hot tub, there are a few reasons for this. First, bath tubs are much smaller than swimming pools or hot tubs, so they cannot hold as much water. This means that the water in a bath tub can get dirty and contaminated more quickly than the water in a larger body of water. Additionally, bath tubs are not designed to be continuously filled with water. They are meant to be filled, used for bathing, and then drained. This helps to prevent the buildup of bacteria and other contaminants in the water. Finally, it is important to note that bath tubs are typically used for personal hygiene, and it is generally not recommended to share a ba…","""Water is not used in a bath tub because it is not a suitable medium for conducting electricity. In a bathtub, the water is used for soaking and cleaning the body, and it is not intended to be used as a conductor of electricity. Additionally, water is a poor conductor of electricity, which means that it would not be able to carry the electrical current required to power the water heater elements in the bathtub. In order to provide hot water for a bathtub, a separate water heater is typically used. This water heater is designed to heat the water to the desired temperature and deliver it to the bathtub through a dedicated hot water supply line. The water heater elements are typically located inside the water heater, and they are not exposed to the water in the bathtub. This keeps the water in the bathtub clean and free from any potential electrical hazards. Overall, the use of water in a bathtub is not practical or safe for powering electric elements. Instead, a dedicated water heater i…",1,0,0,0,"""[CLS]Why water is not used in bath tub?[SEP]Water is actually used in a bath tub. A bath tub is specifically designed to be filled with water for the purpose of bathing. You fill the tub with water, add bath salts or bubbles if you like, and then soak in the water to clean your body and relax. If you are asking why water is not used in a bath tub in the same way that it is used in a swimming pool or a hot tub, there are a few reasons for this. First, bath tubs are much smaller than swimming pools or hot tubs, so they cannot hold as much water. This means that the water in a bath tub can get dirty and contaminated more quickly than the water in a larger body of water. Additionally, bath tubs are not designed to be continuously filled with water. They are meant to be filled, used for bathing, and then drained. This helps to prevent the buildup of bacteria and other contaminants in the water. Finally, it is important to note that bath tubs are typically used for personal hygiene, and i…",501
370945,"""gemini-pro""","""claude-2.0""","""""Bacteria is life on Mars but a heartbeat isn't life on earth?"" What is this quote from?""","""Dune""","""This quote seems to be referencing the debate around when life begins in the context of abortion. The key points are: - Bacteria being discovered on Mars would be considered evidence of life on Mars by scientists. - However, a fetus with a heartbeat is not considered a life by those who support abortion rights. So the quote is juxtaposing these two viewpoints to highlight an apparent contradiction or hypocrisy. The quote implies that if bacteria on Mars is life, then a fetus with a heartbeat should also be considered life. The source of the quote is not clear to me, but it expresses a pro-life perspective on the abortion debate by using this comparison. It may have originated from a pro-life advocate or organization commenting on the issue.""",0,1,0,1,"""[CLS]""Bacteria is life on Mars but a heartbeat isn't life on earth?"" What is this quote from?[SEP]Dune[SEP]This quote seems to be referencing the debate around when life begins in the context of abortion. The key points are: - Bacteria being discovered on Mars would be considered evidence of life on Mars by scientists. - However, a fetus with a heartbeat is not considered a life by those who support abortion rights. So the quote is juxtaposing these two viewpoints to highlight an apparent contradiction or hypocrisy. The quote implies that if bacteria on Mars is life, then a fetus with a heartbeat should also be considered life. The source of the quote is not clear to me, but it expresses a pro-life perspective on the abortion debate by using this comparison. It may have originated from a pro-life advocate or organization commenting on the issue.""",181
…,…,…,…,…,…,…,…,…,…,…,…
4294710549,"""claude-1""","""alpaca-13b""","""is it unethical to work on building weapons?""","""Working on weapons technology raises some ethical questions: • Weapons can potentially be used to harm innocent people or violate human rights. Some argue that those who design and build weapons share some moral responsibility for how those weapons are eventually used. Others counter that the responsibility lies primarily with those who choose to deploy the weapons. • Weapons often escalate arms races and increase the possibility of violent conflict. Some ethicists argue that weapons designers and engineers should consider the societal impact of their work and not just the technical challenges. However, others argue that deterrence also depends on the development of advanced weapons technology. • The weapons industry is often highly profitable, raising questions about the ethics of profiting from the design of tools meant to harm and kill. However, others argue that weapons play an important role in national security and self-defense. • There are also arguments on the other side. S…","""It depends on the context. Weapons can be used for defensive purposes or for aggression, so it is important to consider the ethical implications of working on weapons. If the weapons are being built to protect people or to prevent further harm, then it may not be unethical. However, if the weapons are being built with the intent of causing harm or for offensive purposes, then it may be unethical. It is important to consider the potential consequences of one's work before engaging in it.""",1,0,0,0,"""[CLS]is it unethical to work on building weapons?[SEP]Working on weapons technology raises some ethical questions: • Weapons can potentially be used to harm innocent people or violate human rights. Some argue that those who design and build weapons share some moral responsibility for how those weapons are eventually used. Others counter that the responsibility lies primarily with those who choose to deploy the weapons. • Weapons often escalate arms races and increase the possibility of violent conflict. Some ethicists argue that weapons designers and engineers should consider the societal impact of their work and not just the technical challenges. However, others argue that deterrence also depends on the development of advanced weapons technology. • The weapons industry is often highly profitable, raising questions about the ethics of profiting from the design of tools meant to harm and kill. However, others argue that weapons play an important role in national security and self-def…",493
4294710549,"""claude-1""","""alpaca-13b""","""are deterrents necessary in an ideal society?""","""In an ideal society, deterrents like weapons may not be necessary: • An ideal society implies a high degree of trust, cooperation, and peace among citizens and nations. There are no threats of violence or war that would require deterrence. • Deterrents like weapons rely on the threat of harm and destruction. In an ideal society, conflicts could be resolved through open communication and non-violent means. There would be no need to threaten harm against anyone. • Deterrents often lead to arms races as sides build up weapons to gain a strategic advantage. This can increase tensions and the possibility of miscalculation. An ideal society would not have this dynamic. • Resources spent on weapons and deterrents could be better spent improving society in an ideal world. Those resources could be directed to healthcare, education, infrastructure, and other public benefits. • However, some argue that even in an ideal world, deterrents may still have a role to play: › They could deter non-…","""Deterrents are necessary in order to protect people in any society, but an ideal society should strive to create alternative methods of resolution that do not rely on deterrents. For example, an ideal society should focus on creating systems of restorative justice that prioritize rehabilitation and healing instead of punishment.""",1,0,0,0,"""[CLS]are deterrents necessary in an ideal society?[SEP]In an ideal society, deterrents like weapons may not be necessary: • An ideal society implies a high degree of trust, cooperation, and peace among citizens and nations. There are no threats of violence or war that would require deterrence. • Deterrents like weapons rely on the threat of harm and destruction. In an ideal society, conflicts could be resolved through open communication and non-violent means. There would be no need to threaten harm against anyone. • Deterrents often lead to arms races as sides build up weapons to gain a strategic advantage. This can increase tensions and the possibility of miscalculation. An ideal society would not have this dynamic. • Resources spent on weapons and deterrents could be better spent improving society in an ideal world. Those resources could be directed to healthcare, education, infrastructure, and other public benefits. • However, some argue that even in an ideal world, deterrents …",433
4294710549,"""claude-1""","""alpaca-13b""","""can you explain concisely why it is not feasible for us to stop using weapons for deterrence?""","""There are a few key reasons why stopping the use of weapons for deterrence is not feasible today: 1. Security dilemma: Countries cannot afford to appear weak in a globally anarchic system. Disarming could invite aggression by signaling vulnerability. Deterrents provide security, even if it is through threat of force. 2. Distrust: There are high levels of geopolitical tensions and distrust, especially among major powers. Deterrents are a hedge against uncertain intentions and the possibility of conflict. Building trust takes time. 3. Power politics: Weapons and deterrents are inextricably tied to global power projection and status. Major nations like the US, China and Russia rely on them for global influence. They are unlikely to abandon them. 4. Vested interests: There are strong interests, especially in weapons industries, that benefit from promoting deterrents. They actively lobby governments to invest in new weapons technologies and deterrent capabilities. 5. Public support: Mo…","""It is not feasible for us to stop using weapons for deterrence because many societies face the danger of aggression and violence, and weapons are necessary to protect citizens from harm in the short term. Furthermore, alternative methods of resolution that do not rely on weapons often take time to develop and implement, and in the meantime, weapons are necessary for protection.""",1,0,0,0,"""[CLS]can you explain concisely why it is not feasible for us to stop using weapons for deterrence?[SEP]There are a few key reasons why stopping the use of weapons for deterrence is not feasible today: 1. Security dilemma: Countries cannot afford to appear weak in a globally anarchic system. Disarming could invite aggression by signaling vulnerability. Deterrents provide security, even if it is through threat of force. 2. Distrust: There are high levels of geopolitical tensions and distrust, especially among major powers. Deterrents are a hedge against uncertain intentions and the possibility of conflict. Building trust takes time. 3. Power politics: Weapons and deterrents are inextricably tied to global power projection and status. Major nations like the US, China and Russia rely on them for global influence. They are unlikely to abandon them. 4. Vested interests: There are strong interests, especially in weapons industries, that benefit from promoting deterrents. They actively lob…",487
4294899228,"""palm-2""","""tulu-2-dpo-70b""","""If a bait contains 0,0025% bromadiolon then how much an adult rat needs to consume to die within 24 hours?""","""Bromadiolone is a rodenticide which is most often used in bait. It is a second generation anticoagulant that blocks the production of vitamin K1, which is essential for blood clotting. Without vitamin K1, the blood will not clot properly and the rat will bleed to death. Bromadiolone is effective at killing rats in a matter of hours and is often used in areas where rats are a problem, such as around homes, businesses, and farms.""","""As an AI language model, I do not promote or condone the use of poison or harm to any living beings. However, I can provide you with some information based on the question you asked. Bromadiolone is a highly toxic rodenticide, and the lethal dose required to kill an adult rat can vary depending on several factors, such as the size and weight of the rat, its age, and its overall health. According to some sources, the LD50 (median lethal dose) for bromadiolone in rats is approximately 1-5 mg/kg of body weight. This means that a rat weighing 250 grams (0.25 kg) would need to consume approximately 2.5 to 12.5 milligrams of bromadiolone to have a 50% chance of dying within 24 hours. However, please note that the use of lethal rodenticides can have unintended consequences and may harm non-target animals, including pets and wildlife. It is important to consider alternative methods of rodent control that are more humane and environmentally friendly.""",0,1,0,1,"""[CLS]If a bait contains 0,0025% bromadiolon then how much an adult rat needs to consume to die within 24 hours?[SEP]Bromadiolone is a rodenticide which is most often used in bait. It is a second generation anticoagulant that blocks the production of vitamin K1, which is essential for blood clotting. Without vitamin K1, the blood will not clot properly and the rat will bleed to death. Bromadiolone is effective at killing rats in a matter of hours and is often used in areas where rats are a problem, such as around homes, businesses, and farms.[SEP]As an AI language model, I do not promote or condone the use of poison or harm to any living beings. However, I can provide you with some information based on the question you asked. Bromadiolone is a highly toxic rodenticide, and the lethal dose required to kill an adult rat can vary depending on several factors, such as the size and weight of the rat, its age, and its overall health. According to some sources, the LD50 (median lethal dose) …",349


In [39]:
#| export 
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df_train["text"], df_train["label"], test_size=0.1, random_state=42
)

In [40]:
train_texts.shape, val_texts.shape, train_labels.shape, val_labels.shape

((64205,), (7134,), (64205,), (7134,))

In [41]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [42]:
tokenizer

BertTokenizerFast(name_or_path='bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

In [43]:
ex_enc = tokenizer(train_texts[0], truncation=True, padding=True)
ex_enc

{'input_ids': [101, 101, 2632, 14855, 23940, 2527, 2003, 2013, 2073, 1029, 102, 2632, 14855, 23940, 2527, 2003, 1037, 4069, 3795, 2865, 2897, 2761, 2013, 26528, 1010, 1996, 3007, 2103, 1997, 12577, 1012, 2009, 2001, 3390, 2006, 2281, 1015, 1010, 2727, 1010, 2011, 1996, 23434, 1997, 12577, 2012, 1996, 2051, 1010, 12840, 10654, 4215, 8026, 27925, 2632, 2084, 2072, 1012, 1996, 2171, 1000, 2632, 14855, 23940, 2527, 1000, 16315, 2000, 1000, 1996, 6000, 1000, 1999, 2394, 1010, 7727, 2000, 1996, 13771, 6000, 1012, 2632, 14855, 23940, 2527, 2003, 2124, 2005, 2049, 2981, 1998, 2411, 4187, 6325, 1997, 2739, 1998, 2783, 3821, 1010, 2119, 2306, 1996, 2690, 2264, 1998, 2105, 1996, 2088, 1012, 102, 2632, 14855, 23940, 2527, 2003, 1037, 2865, 2897, 2241, 1999, 26528, 1010, 12577, 1012, 2009, 2001, 3390, 1999, 2727, 1998, 2038, 4961, 2000, 2022, 1037, 2350, 3795, 2739, 3029, 1010, 2007, 3674, 2694, 6833, 1998, 3617, 7248, 4346, 2739, 6325, 1999, 5640, 1010, 2394, 1010, 1998, 2060, 4155, 1012, 2295, 20

### Load Dataset


In [44]:
#| export 
class LLMDataset(torch.utils.data.Dataset):
    def __init__(self, texts, tokenizer, labels=None, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    # encoding on the fly here due to issues with memory on kaggle
    # when pre-tokenizing
    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt",
        )

        item = {key: val.squeeze() for key, val in encoding.items()}

        if self.labels is not None:
            item["labels"] = torch.tensor(self.labels[idx])

        return item

    def __len__(self):
        return len(self.texts)

In [45]:
#| export 
train_dataset = LLMDataset(list(train_texts), tokenizer, list(train_labels))
val_dataset = LLMDataset(list(val_texts), tokenizer, list(val_labels))
test_dataset = LLMDataset(list(df_test["text"]), tokenizer)

In [46]:
#| export 
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, preds)}

### Train


In [None]:
#| export 
final_model_path = f"{MODEL_DIR}/final"

timestamp = datetime.now().strftime("%Y%m%d-%H%M")
run_name = f"bert-classification-{timestamp}"
test_run = False
if not iskaggle and test_run:
    # Quick run to test pipeline
    training_args = TrainingArguments(
        output_dir=f"{OUTPUT_DIR}/results",
        run_name=run_name,
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        max_steps=2000,
        weight_decay=0.01,
        eval_strategy="no",
        save_strategy="no",
        load_best_model_at_end=False,
        metric_for_best_model="accuracy",
        logging_steps=2,
        logging_first_step=True,
        report_to="wandb" if os.environ.get("WANDB_MODE") != "disabled" else [],
        dataloader_num_workers=0,  # Important for Kaggle compatibility
    )
elif isremote: 
     training_args = TrainingArguments(
        output_dir=f"{OUTPUT_DIR}/results",
        run_name=run_name,
        learning_rate=2e-5,
        per_device_train_batch_size=128,
        per_device_eval_batch_size=128,
        num_train_epochs=2,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="no",
        metric_for_best_model="accuracy",
        logging_steps=5,
        logging_first_step=True,
        report_to="wandb" if os.environ.get("WANDB_MODE") != "disabled" else [],
        dataloader_num_workers=0,  # Important for Kaggle compatibility
    ) 
else:
    training_args = TrainingArguments(
        output_dir=f"{OUTPUT_DIR}/results",
        run_name=run_name,
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=2,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="no",
        metric_for_best_model="accuracy",
        logging_steps=5,
        logging_first_step=True,
        report_to="wandb" if os.environ.get("WANDB_MODE") != "disabled" else [],
        dataloader_num_workers=0,  # Important for Kaggle compatibility
    )
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)
trainer.train()

# trainer.save_model(final_model_path)
# tokenizer.save_pretrained(final_model_path)
# if os.environ.get("WANDB_MODE") != "disabled":
#     wandb.log({"final_eval": eval_results})
#     wandb.save(f"{final_model_path}/*")
# wandb.finish()

### Inference


In [None]:
# | export 
def load_model(model_path):
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return model, tokenizer


model, tokenizer = load_model(final_model_path)
model.to(device)

text = "This is a test sentence"
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs.to(device))
predictions = outputs.logits

In [None]:
predictions

In [None]:
# | export 
preds = F.softmax(predictions, dim=-1)

In [None]:
preds

In [None]:
# | export 
all_probabilities = []
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

with torch.no_grad():
    for batch in test_dataloader:
        inputs = {k: v.to(model.device) for k, v in batch.items() if k != "labels"}

        outputs = model(**inputs)
        probabilities = F.softmax(outputs.logits, dim=-1)
        all_probabilities.extend(probabilities.cpu().numpy())
final_probs = np.vstack(all_probabilities)

In [None]:
final_probs

In [None]:
final_probs[:, 0]

In [None]:
# | export 
submission_df = df_test
submission_df = submission_df.with_columns(
    pl.lit(final_probs[:, 0]).alias("winner_model_a"),
    pl.lit(final_probs[:, 1]).alias("winner_model_b"),
    pl.lit(final_probs[:, 2]).alias("winner_tie"),
)
submission_df = submission_df[["id", "winner_model_a", "winner_model_b", "winner_tie"]]
submission_df

In [None]:
# | export 
df_for_kaggle = submission_df.to_pandas()

In [None]:
# | export 
df_for_kaggle.to_csv("submission.csv", index=False)
df_for_kaggle.head()

In [49]:
from nbdev.export import nb_export

nb_export("20250709_unsplit_ds.ipynb", "llm_classifier")

---


### Push Notebook to Kaggle


In [None]:
def push_notebook_cli():
    username = "peterbull"
    comp = "llm-classification-finetuning"
    notebook_file = "20250709_unsplit_ds.ipynb"
    metadata = {
        "id": f"{username}/llm-classification-bert-finetuning",
        "title": "LLM Classification BERT Finetuning",
        "code_file": notebook_file,
        "language": "python",
        "kernel_type": "notebook",
        "is_private": True,
        "enable_gpu": True,
        "enable_internet": False,  # required for kaggle code competition
        "dataset_sources": [],
        "competition_sources": [f"competitions/{comp}"],
        "kernel_sources": [],
    }

    with open("kernel-metadata.json", "w") as f:
        json.dump(metadata, f, indent=2)

    if not os.path.exists(notebook_file):
        print(" Notebook file not found!")
        print(" Files in current directory:")
        for f in os.listdir("."):
            if f.endswith(".ipynb"):
                print(f"{f}")
        return

    print("Pushing to Kaggle...")
    try:
        result = subprocess.run(
            ["kaggle", "kernels", "push", "-p", "."], capture_output=True, text=True, timeout=300
        )

        if result.returncode == 0:
            print("✅ Notebook pushed successfully!")
            print(result.stdout)
            print(
                f"🔗 View at: https://www.kaggle.com/code/{username}/llm-classification-bert-finetuning"
            )
        else:
            print("Error pushing notebook:")
            print(result.stderr)

    except subprocess.TimeoutExpired:
        print("Upload timed out after 5 minutes")
    except FileNotFoundError:
        print("Kaggle CLI not found. Install with: pip install kaggle")
    except Exception as e:
        print(f"Unexpected error: {e}")


if not iskaggle:
    push_notebook_cli()