In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/wsdm-cup-multilingual-chatbot-arena/sample_submission.csv
/kaggle/input/wsdm-cup-multilingual-chatbot-arena/train.parquet
/kaggle/input/wsdm-cup-multilingual-chatbot-arena/test.parquet
/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1/config.json
/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1/README.md
/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1/tokenizer.json
/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1/tokenizer_config.json
/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1/model.safetensors
/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1/special_tokens_map.json
/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1/.gitattributes
/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1/tokenizer.model
/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1/generation_config.json
/kaggle/input/checkpoint9200-gemma2/adapter_model.safetensors
/kaggle/

In [2]:
# !pip install -U bitsandbytes
# !pip install -U bitsandbytes accelerate
# !pip install transformers peft
# !pip install --upgrade huggingface_hub

In [3]:
!pip install transformers peft accelerate bitsandbytes \
    -U --no-index --find-links /kaggle/input/lmsys-wheel-files

Looking in links: /kaggle/input/lmsys-wheel-files
Processing /kaggle/input/lmsys-wheel-files/peft-0.11.1-py3-none-any.whl
Processing /kaggle/input/lmsys-wheel-files/bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl
Installing collected packages: bitsandbytes, peft
Successfully installed bitsandbytes-0.43.1 peft-0.11.1


In [4]:
import time
from dataclasses import dataclass
from concurrent.futures import ThreadPoolExecutor

import torch
import sklearn
import numpy as np
import pandas as pd
from transformers import Gemma2ForSequenceClassification, GemmaTokenizerFast, BitsAndBytesConfig
from transformers.data.data_collator import pad_without_fast_tokenizer_warning
from peft import PeftModel

In [5]:
assert torch.cuda.device_count() == 2

In [6]:
## Need to change paths here

@dataclass
class Config:
    gemma_dir = '/kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1'
    lora_dir = '/kaggle/input/checkpoint9200-gemma2'
    max_length = 2048
    batch_size = 4
    device = torch.device("cuda")    
    tta = False  # test time augmentation. <prompt>-<model-b's response>-<model-a's response>
    spread_max_length = False  # whether to apply max_length//3 on each input or max_length on the concatenated input

cfg = Config()

### Loading data


In [7]:
test = pd.read_parquet('/kaggle/input/wsdm-cup-multilingual-chatbot-arena/test.parquet')

In [8]:
def process_text(text: str) -> str:
    # Safely replace "null" with an empty string and remove extra spaces
    return " ".join(str(text).replace("null", "").split())

# Apply the function to the DataFrame columns
test.loc[:, 'prompt'] = test['prompt'].apply(process_text)
test.loc[:, 'response_a'] = test['response_a'].apply(process_text)
test.loc[:, 'response_b'] = test['response_b'].apply(process_text)

# Display the first 5 rows
display(test.head(5))


Unnamed: 0,id,prompt,response_a,response_b,scored
0,327228,Caso Clínico: Un hombre de 70 años con anteced...,**Diagnóstico Diferencial de Anemia en Pacient...,"Basándonos en el caso clínico presentado, pode...",False
1,1139415,Peel Company received a cash dividend from a c...,The correct answer is **(a) No No**. Here's wh...,The correct answer is **(a) No No**. Here's wh...,False
2,1235630,Há um grave problema com o relógio da torre da...,Dois problemas interessantes! **Problema 1: O ...,Vamos resolver os dois problemas em sequência....,False


Tokenize

In [9]:
def tokenize(
    tokenizer, prompt, response_a, response_b, max_length=cfg.max_length, spread_max_length=cfg.spread_max_length
):
    prompt = ["<Task>:Which response is better? which response will be preffered by user for given prompt? if response_a is better output model_a , if response_b is better output model_b in only one word,<prompt>: " + p for p in prompt]
    response_a = ["\n\n<response_a>: " + r_a for r_a in response_a]
    response_b = ["\n\n<response_b>: " + r_b for r_b in response_b]
    if spread_max_length:
        prompt = tokenizer(prompt, max_length=max_length//3, truncation=True, padding=False).input_ids
        response_a = tokenizer(response_a, max_length=max_length//3, truncation=True, padding=False).input_ids
        response_b = tokenizer(response_b, max_length=max_length//3, truncation=True, padding=False).input_ids
        input_ids = [p + r_a + r_b for p, r_a, r_b in zip(prompt, response_a, response_b)]
        attention_mask = [[1]* len(i) for i in input_ids]
    else:
        text = [p + r_a + r_b for p, r_a, r_b in zip(prompt, response_a, response_b)]
        tokenized = tokenizer(text, max_length=max_length, truncation=True, padding=False)
        input_ids = tokenized.input_ids
        attention_mask = tokenized.attention_mask
    return input_ids, attention_mask

In [10]:
%%time

tokenizer = GemmaTokenizerFast.from_pretrained(cfg.gemma_dir)
tokenizer.add_eos_token = True
tokenizer.padding_side = "right"

data = pd.DataFrame()
data["id"] = test["id"]
data["input_ids"], data["attention_mask"] = tokenize(tokenizer, test["prompt"], test["response_a"], test["response_b"])
data["length"] = data["input_ids"].apply(len)

aug_data = pd.DataFrame()
aug_data["id"] = test["id"]
# swap response_a & response_b
aug_data['input_ids'], aug_data['attention_mask'] = tokenize(tokenizer, test["prompt"], test["response_b"], test["response_a"])
aug_data["length"] = aug_data["input_ids"].apply(len)

CPU times: user 651 ms, sys: 164 ms, total: 815 ms
Wall time: 881 ms


In [11]:
print(tokenizer.decode(data["input_ids"][0]))

<bos><Task>:Which response is better? which response will be preffered by user for given prompt? if response_a is better output model_a , if response_b is better output model_b in only one word,<prompt>: Caso Clínico: Un hombre de 70 años con antecedentes de cáncer testicular tratado en 1990 y cáncer gástrico tratado en 2020, es diagnosticado con leucemia mieloide aguda (LMA). El paciente inicia tratamiento con quimioterapia (12 sesiones planificadas) y se presenta en la consulta después de la primera sesión con síntomas de fatiga intensa, disnea leve, palpitaciones y mareos. Los estudios de laboratorio revelan una hemoglobina de 8 g/dL, leucocitos bajos, y plaquetas en el límite inferior de lo normal. El paciente refiere no haber tenido sangrados visibles, pero menciona sentirse más cansado de lo habitual desde el inicio del tratamiento. No tiene fiebre ni signos de infección activa. Pregunta: Con base en el cuadro clínico descrito, proponga un diagnóstico diferencial que incluya al m

In [12]:
print(tokenizer.decode(aug_data["input_ids"][0]))

<bos><Task>:Which response is better? which response will be preffered by user for given prompt? if response_a is better output model_a , if response_b is better output model_b in only one word,<prompt>: Caso Clínico: Un hombre de 70 años con antecedentes de cáncer testicular tratado en 1990 y cáncer gástrico tratado en 2020, es diagnosticado con leucemia mieloide aguda (LMA). El paciente inicia tratamiento con quimioterapia (12 sesiones planificadas) y se presenta en la consulta después de la primera sesión con síntomas de fatiga intensa, disnea leve, palpitaciones y mareos. Los estudios de laboratorio revelan una hemoglobina de 8 g/dL, leucocitos bajos, y plaquetas en el límite inferior de lo normal. El paciente refiere no haber tenido sangrados visibles, pero menciona sentirse más cansado de lo habitual desde el inicio del tratamiento. No tiene fiebre ni signos de infección activa. Pregunta: Con base en el cuadro clínico descrito, proponga un diagnóstico diferencial que incluya al m

In [13]:
# Load base model on GPU 0
device_0 = torch.device('cuda:0')
model_0 = Gemma2ForSequenceClassification.from_pretrained(
    cfg.gemma_dir,
    device_map=device_0,
    use_cache=False,
    ignore_mismatched_sizes=True
)

# Load base model on GPU 1
device_1 = torch.device('cuda:1')
model_1 = Gemma2ForSequenceClassification.from_pretrained(
    cfg.gemma_dir,
    device_map=device_1,
    use_cache=False,
    ignore_mismatched_sizes=True
)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Some weights of Gemma2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Some weights of Gemma2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/gemma-2-9b-it-bnb-4bit_hf/transformers/original/1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
model_0 = PeftModel.from_pretrained(model_0, cfg.lora_dir)
model_1 = PeftModel.from_pretrained(model_1, cfg.lora_dir)

In [15]:
@torch.no_grad()
@torch.cuda.amp.autocast()
def inference(df, model, batch_size=cfg.batch_size, max_length=cfg.max_length):
    """
    Perform inference on a DataFrame with a given model.
    """
    a_win, b_win = [], []
    
    for start_idx in range(0, len(df), batch_size):
        end_idx = min(start_idx + batch_size, len(df))
        tmp = df.iloc[start_idx:end_idx]
        input_ids = tmp["input_ids"].to_list()
        attention_mask = tmp["attention_mask"].to_list()
        
        inputs = pad_without_fast_tokenizer_warning(
            tokenizer,
            {"input_ids": input_ids, "attention_mask": attention_mask},
            padding="longest",
            pad_to_multiple_of=None,
            return_tensors="pt",
        )
        inputs = {k: v.to(model.device) for k, v in inputs.items()}  # Move inputs to model's device
        outputs = model(**inputs)
        proba = outputs.logits.softmax(-1).cpu()  # Get probabilities
        
        a_win.extend(proba[:, 0].tolist())  # Probability for model_a
        b_win.extend(proba[:, 1].tolist())  # Probability for model_b
    
    df["winner_model_a"] = a_win
    df["winner_model_b"] = b_win
    return df

# Sort data by input length
data = data.sort_values("length", ascending=False)

# Split data into two subsets for parallel processing
sub_1 = data.iloc[0::2].copy()
sub_2 = data.iloc[1::2].copy()

# Perform parallel inference
st = time.time()
with ThreadPoolExecutor(max_workers=2) as executor:
    results = executor.map(inference, (sub_1, sub_2), (model_0, model_1))

# Combine the results
result_df = pd.concat(results, axis=0)
proba = result_df[["winner_model_a", "winner_model_b"]].values  # Get probabilities

print(f"Elapsed time: {time.time() - st}")

  @torch.cuda.amp.autocast()


Elapsed time: 4.200034856796265


In [16]:
# Assuming 'proba' contains only two columns for 'winner_model_a' and 'winner_model_b'
result_df.loc[:, "winner_model_a"] = proba[:, 0]  # Probability for model_a winning
result_df.loc[:, "winner_model_b"] = proba[:, 1]  # Probability for model_b winning

# Create the submission DataFrame with the necessary columns
submission_df = result_df[["id", 'winner_model_a', 'winner_model_b']]

submission_df["winner_model"] = submission_df.apply(
    lambda row: "model_a" if row["winner_model_a"] > row["winner_model_b"] else "model_b", axis=1
)

submission_df= submission_df.drop(['winner_model_a','winner_model_b'],axis=1)

# Save to CSV and display
submission_df.to_csv('submission.csv', index=False)
display(submission_df)
print("Successfully completed inference")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submission_df["winner_model"] = submission_df.apply(


Unnamed: 0,id,winner_model
0,327228,model_b
1,1139415,model_a
2,1235630,model_b


Successfully completed inference
