In [1]:
import numpy as np
import pandas as pd
import time
import ast
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoTokenizer, LlamaModel, LlamaForSequenceClassification, BitsAndBytesConfig
import tokenizers
import gc

sns.set_style("darkgrid")
pd.options.display.max_rows = None
pd.options.display.max_columns = None
colors = sns.color_palette("pastel")
from torch.cuda.amp import autocast
from threading import Thread

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)
from peft import get_peft_model, LoraConfig, TaskType
import os

for dirname, _, filenames in os.walk(''):
    for filename in filenames:
        print(os.path.join(dirname, filename))

if (not torch.cuda.is_available()): print("Sorry - GPU required!")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL_NAME = 'pre_trained_model/llama3/'
WEIGHTS_PATH = 'pre_trained_model/lmsys-model/model'
MAX_LENGTH = 1024
BATCH_SIZE = 8
LORA_RANK = 16
LORA_ALPHA = 8
LORA_MODULES = ['o_proj', 'v_proj']
DEVICE = torch.device("cuda")

In [3]:
test = pd.read_csv('dataset/test.csv')
# test = pd.read_csv('testing_data.csv')
submission = pd.read_csv('dataset/sample_submission.csv')

In [4]:
def process(input_str):
    stripped_str = input_str.strip('[]')
    sentences = [s.strip('"') for s in stripped_str.split('","')]
    return  ' '.join(sentences)

test['prompt'] = test['prompt'].apply(process)
test['response_a'] = test['response_a'].apply(process)
test['response_b'] = test['response_b'].apply(process)

display(test.head(2))
test['text'] = 'User prompt: ' + test['prompt'] +  '\n\nModel A :\n' + test['response_a'] +'\n\n--------\n\nModel B:\n'  + test['response_b']
# print(test['text'][0])

Unnamed: 0,id,prompt,response_a,response_b
0,136060,"I have three oranges today, I ate an orange ye...",You have two oranges today.,You still have three oranges. Eating an orange...
1,211333,You are a mediator in a heated political debat...,Thank you for sharing the details of the situa...,Mr Reddy and Ms Blue both have valid points in...


In [5]:
%%time

tokenizer = AutoTokenizer.from_pretrained('pre_trained_model/lmsys-model/tokenizer')

tokens = tokenizer(test['text'].tolist(), padding='max_length',
                   max_length=MAX_LENGTH, truncation=True, return_tensors='pt')

INPUT_IDS = tokens['input_ids'].to(DEVICE, dtype=torch.int32)
ATTENTION_MASKS = tokens['attention_mask'].to(DEVICE, dtype=torch.int32)

input_ids_cpu = [tensor.cpu().tolist() for tensor in INPUT_IDS]
attention_masks_cpu = [tensor.cpu().tolist() for tensor in ATTENTION_MASKS]

data = pd.DataFrame()
data['INPUT_IDS'] = input_ids_cpu
data['ATTENTION_MASKS'] = attention_masks_cpu
data[:1]

CPU times: user 414 ms, sys: 149 ms, total: 563 ms
Wall time: 617 ms


Unnamed: 0,INPUT_IDS,ATTENTION_MASKS
0,"[1502, 10137, 25, 358, 617, 2380, 85138, 3432,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."


In [6]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [7]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.float16,
    bnb_8bit_use_double_quant=False
    #llm_int8_enable_fp32_cpu_offload=True  # Enable CPU offloading
)

# GPU 0
device0 = torch.device('cuda:1')
# device_map = "auto"
base_model_0 = LlamaForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3,
    torch_dtype=torch.float16,
    quantization_config=bnb_config,
    device_map='cuda:1')
base_model_0.config.pad_token_id = tokenizer.pad_token_id

# GPU 1
device1 = torch.device('cuda:2')
base_model_1 = LlamaForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3,
    torch_dtype=torch.float16,
    quantization_config=bnb_config,
    device_map='cuda:2')
base_model_1.config.pad_token_id = tokenizer.pad_token_id

Unused kwargs: ['bnb_8bit_compute_dtype', 'bnb_8bit_use_double_quant']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.23s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at pre_trained_model/llama3/ and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.21s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at pre_trained_model/llama3/ and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
peft_config = LoraConfig(
    r=LORA_RANK,
    lora_alpha=LORA_ALPHA,
    lora_dropout=0.05,
    bias='none',
    inference_mode=True,
    task_type=TaskType.SEQ_CLS,
    target_modules=LORA_MODULES)
model_0 = get_peft_model(base_model_0, peft_config).to(device0)
model_0.load_state_dict(torch.load(WEIGHTS_PATH), strict=False)
model_0.eval()

model_1 = get_peft_model(base_model_1, peft_config).to(device1)
model_1.load_state_dict(torch.load(WEIGHTS_PATH), strict=False)
model_1.eval()

  model_0.load_state_dict(torch.load(WEIGHTS_PATH), strict=False)
  model_1.load_state_dict(torch.load(WEIGHTS_PATH), strict=False)


PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): LlamaForSequenceClassification(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
              (k_proj): Linear8bitLt(in_features=4096, out_features=1024, bias=False)
              (v_proj): lora.Linear8bitLt(
                (base_layer): Linear8bitLt(in_features=4096, out_features=1024, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=1024, bias=False)
           

In [9]:
def inference(df, model, device, batch_size=BATCH_SIZE):
    input_ids = torch.tensor(df['INPUT_IDS'].values.tolist(), dtype=torch.long)
    attention_mask = torch.tensor(df['ATTENTION_MASKS'].values.tolist(), dtype=torch.long)
    
    generated_class_a = []
    generated_class_b = []
    generated_class_c = []
    labels = []

    model.eval()
    
    for start_idx in range(0, len(df), batch_size):
        end_idx = min(start_idx + batch_size, len(df))
        batch_input_ids = input_ids[start_idx:end_idx].to(device)
        batch_attention_mask = attention_mask[start_idx:end_idx].to(device)
        
        with torch.no_grad():
            with autocast():
                outputs = model(
                    input_ids=batch_input_ids,
                    attention_mask=batch_attention_mask
                )
        
        probabilities = torch.softmax(outputs.logits, dim=-1).cpu().numpy()
        
        generated_class_a.extend(probabilities[:, 0])
        generated_class_b.extend(probabilities[:, 1])
        generated_class_c.extend(probabilities[:, 2])
        batch_labels = np.argmax(probabilities, axis=1)
        labels.extend(batch_labels)
    
    df['winner_model_a'] = generated_class_a
    df['winner_model_b'] = generated_class_b
    df['winner_tie'] = generated_class_c
    df['label'] = labels

    torch.cuda.empty_cache()  

    return df

In [10]:
start_timer = time.time()

N_SAMPLES = len(data)

half = round(N_SAMPLES / 2)
sub0 = data.iloc[0:half].copy()
sub1 = data.iloc[half:N_SAMPLES].copy()

def run_inference(df, model, device, results, index):
    results[index] = inference(df, model, device)

results = {}

t0 = Thread(target=run_inference, args=(sub0, model_0, device0, results, 0))
t1 = Thread(target=run_inference, args=(sub1, model_1, device1, results, 1))

t0.start()
t1.start()

t0.join()
t1.join()


  with autocast():


In [11]:
print(results)
data = pd.concat([results[0], results[1]], axis=0)

{1:                                            INPUT_IDS  \
2  [1502, 10137, 25, 2650, 311, 9656, 279, 24790,...   

                                     ATTENTION_MASKS  winner_model_a  \
2  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...        0.026169   

   winner_model_b  winner_tie  label  
2        0.518555    0.455322      1  , 0:                                            INPUT_IDS  \
0  [1502, 10137, 25, 358, 617, 2380, 85138, 3432,...   
1  [1502, 10137, 25, 1472, 527, 264, 69030, 304, ...   

                                     ATTENTION_MASKS  winner_model_a  \
0  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...        0.242432   
1  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...        0.005032   

   winner_model_b  winner_tie  label  
0        0.687988    0.069397      1  
1        0.932617    0.062469      1  }


In [12]:
print(data.head(1))
print(data.columns)
print(len(data))
print(data['INPUT_IDS'])

                                           INPUT_IDS  \
0  [1502, 10137, 25, 358, 617, 2380, 85138, 3432,...   

                                     ATTENTION_MASKS  winner_model_a  \
0  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...        0.242432   

   winner_model_b  winner_tie  label  
0        0.687988    0.069397      1  
Index(['INPUT_IDS', 'ATTENTION_MASKS', 'winner_model_a', 'winner_model_b',
       'winner_tie', 'label'],
      dtype='object')
3
0    [1502, 10137, 25, 358, 617, 2380, 85138, 3432,...
1    [1502, 10137, 25, 1472, 527, 264, 69030, 304, ...
2    [1502, 10137, 25, 2650, 311, 9656, 279, 24790,...
Name: INPUT_IDS, dtype: object


In [13]:
test

Unnamed: 0,id,prompt,response_a,response_b,text
0,136060,"I have three oranges today, I ate an orange ye...",You have two oranges today.,You still have three oranges. Eating an orange...,"User prompt: I have three oranges today, I ate..."
1,211333,You are a mediator in a heated political debat...,Thank you for sharing the details of the situa...,Mr Reddy and Ms Blue both have valid points in...,User prompt: You are a mediator in a heated po...
2,1233961,How to initialize the classification head when...,When you want to initialize the classification...,To initialize the classification head when per...,User prompt: How to initialize the classificat...


In [13]:
TARGETS = ['winner_model_a', 'winner_model_b', 'winner_tie', 'label']

submission[TARGETS] = data[TARGETS]
display(submission)

Unnamed: 0,id,winner_model_a,winner_model_b,winner_tie,label
0,136060,0.060822,0.822754,0.116455,1
1,211333,0.062439,0.65918,0.278564,1
2,1233961,0.456543,0.217285,0.326172,0


In [14]:
submission.to_csv('submission.csv', index=False)