In [1]:
!pip install transformers peft bitsandbytes \
    --no-index --find-links /kaggle/input/commonlit-pip/

Looking in links: /kaggle/input/commonlit-pip/
Processing /kaggle/input/commonlit-pip/peft-0.14.0-py3-none-any.whl
Processing /kaggle/input/commonlit-pip/bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl
Processing /kaggle/input/commonlit-pip/huggingface_hub-0.27.1-py3-none-any.whl (from transformers)
Installing collected packages: huggingface-hub, bitsandbytes, peft
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface-hub 0.24.7
    Uninstalling huggingface-hub-0.24.7:
      Successfully uninstalled huggingface-hub-0.24.7
Successfully installed bitsandbytes-0.45.0 huggingface-hub-0.27.1 peft-0.14.0


In [2]:
import os
import random
import math
import gc
import time
import copy

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.distributed import DistributedSampler


import transformers
from transformers import(
    BitsAndBytesConfig,
    Gemma2ForSequenceClassification,
    GemmaTokenizerFast,
    Gemma2Config,
    EvalPrediction,
    Trainer, 
    TrainingArguments,
    DataCollatorWithPadding
)
import bitsandbytes as bnb
from peft import LoraConfig, TaskType, prepare_model_for_kbit_training, get_peft_model, PeftModel

from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
class CFG :
    debug_one_epoch=True
    debug_one_fold=False
    run_cv=True
    only_infer=False
    num_workers=2
    num_epochs=3
    warmup_prop=0.1
    lr=1e-4
    max_len=512
    batch_size=8
    early_stopping_rounds=5
    random_seed=42
    model_path="/kaggle/input/gemma2-regression-model-save/gemma2-4bit-regression" 
    pretrained_path="/kaggle/input/commonlit-gemma2-9b4bit-lora"
    optimizer="adamw_8bit"
    criterion=torch.nn.MSELoss()
    device=device
    fold=5
    use_amp = True
    per_device_train_batch_size = 2
    gradient_accumulation_steps = 2
    per_device_eval_batch_size = 8

    
    num_warmup_steps = 10
    num_training_steps = -1
    
    # lora config
    freeze_layer = 16
    tasktype = TaskType.SEQ_CLS
    lora_r = 16
    lora_alpha = lora_r * 2
    lora_dropout = 0.05
    lora_bias = "none"
    
    
def seed_torch(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    transformers.set_seed(seed)

    
seed_torch(CFG.random_seed)

In [4]:
lora_config = LoraConfig(
    task_type = CFG.tasktype,
    inference_mode = CFG.only_infer,
    r = CFG.lora_r,
    lora_alpha = CFG.lora_alpha,
    target_modules = ["q_proj", "k_proj", "v_proj"], #, "o_proj","gate_proj"
    layers_to_transform =  [i for i in range(42) if i >= CFG.freeze_layer], 
    lora_dropout = CFG.lora_dropout,
    bias = CFG.lora_bias,
)

In [5]:
if CFG.debug_one_epoch:
    CFG.num_epochs = 1

In [6]:
train = pd.read_csv("../input/commonlitreadabilityprize/train.csv")
test = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")

In [7]:
train["target"].describe()

count    2834.000000
mean       -0.959319
std         1.033579
min        -3.676268
25%        -1.690320
50%        -0.912190
75%        -0.202540
max         1.711390
Name: target, dtype: float64

In [8]:
for i in range(len(test)):
    before_txt = test.loc[i,"excerpt"]
    test.loc[i,"excerpt"] = f"You are a highschool teacher. Please grade the following essay carefully. \n {before_txt}"

In [9]:
class CommonlitDataset(Dataset):
    def __init__(self, texts, tokenizer, max_len=512, test=False, targets=None):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.test = test
        self.targets = targets
        
    def __len__(self):
        return len(self.texts)
        
    def __getitem__(self, idx):
        text = self.texts[idx]
        inputs = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        
        ids = inputs["input_ids"].squeeze()
        mask = inputs["attention_mask"].squeeze()
        
        return {
            "input_ids": torch.tensor(ids, dtype=torch.long),
            "attention_mask": torch.tensor(mask, dtype=torch.long),
        }

In [10]:
test.loc[0,"excerpt"]

'You are a highschool teacher. Please grade the following essay carefully. \n My hope lay in Jack\'s promise that he would keep a bright light burning in the upper story to guide me on my course. On a clear night this light was visible from the village, but somehow or other I failed to take into account the state of the weather. The air was full of eddying flakes, which would render the headlight of a locomotive invisible a hundred yards distant. Strange that this important fact never occurred to me until I was fully a fourth of a mile from the village. Then, after looking in vain for the beacon light, the danger of my situation struck me, and I halted.\n"I am certain to go wrong," I said to myself.\n"It is out of my power to follow a direct course without something to serve as a compass. I will go back to the village and wait till morning."'

In [11]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred  # 出力 (予測値, ラベル)
    predictions = np.squeeze(logits)  # 必要に応じて次元を調整
    mse = mean_squared_error(labels, predictions)
    return {"mse": mse}

LoRA の multi GPU 対応が面倒だったので苦渋の決断ですが Trainer を使うことにします（悲しい）

In [12]:
def inference(df, model, device, batch_size=64, max_len=512):
    test_dataset = CommonlitDataset(texts=df["excerpt"].values, tokenizer=tokenizer, max_len=max_len, test=True)  # test=True を追加
    test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=4, shuffle=False, pin_memory=True)
    
    model.eval()
    preds = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            outputs = model(input_ids, attention_mask)
            # BFloat16 から float32 に変換してからCPUに移動
            preds += outputs.logits.float().cpu().numpy().tolist()
            
    return np.array(preds).reshape(-1)

In [13]:
preds = []

In [14]:

for i in range(5):
    tokenizer = GemmaTokenizerFast.from_pretrained(CFG.model_path)
    tokenizer.add_eos_token = True
    tokenizer.padding_side = "right"
    device = torch.device("cuda:0")

    # ベースモデルのロード
    model = Gemma2ForSequenceClassification.from_pretrained(
        CFG.model_path,
        device_map=device,
        torch_dtype=torch.float16,
        num_labels=1,
        use_cache=False
    )
    
    # LoRAモデルのロード
    lora_dir = f"/kaggle/input/commonlit-gemma2-9b4bit-lora/results_{i}/checkpoint-142"
    model = PeftModel.from_pretrained(
        model, 
        lora_dir,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    model.eval()
    
    pred = inference(test, model, device)
    preds.append(pred)
    del model
    torch.cuda.empty_cache()


preds = np.array(preds)
preds = preds.mean(axis=0)
submission["target"] = preds
submission.to_csv("submission.csv", index=False)


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
        self._close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 361, in _close
self._close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner

OSError: [Errno 9] Bad file descriptor

Du

  0%|          | 0/1 [00:00<?, ?it/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Exception ignored in: <function _ConnectionBase.__del__ at 0x79dcb58267a0>
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 132, in __del__
        reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
self._close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    self._close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Tra

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    self._close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in cl

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    self._close()    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in 

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

# 工夫のしどころ、キーワード
- BERT の fine-tuning （全員向け） : このコンペのネタバレを含みますが、[BERT の fine-tuning のみを取り扱った良記事](https://www.ai-shift.co.jp/techblog/2138) があります。まずは CFG の値を変えてみるところからでいいので、やってみましょう！
- アンサンブルをしよう（全員向け） : IOAI 2024 だとあまり試す機会が無かったものの、 kaggle に近い問題になればなるほど強力な手法です。身につけておきましょう。モデルを変える、埋込表現だけ使う、など色々な方法が考えられると思います！
- 使用モデルの検討（初中級者↑向け） : BERT にも様々な発展形があります。調べて使ってみましょう。また、本当に BERT がベストなのでしょうか......?
- アンサンブル方法の検討（中級者↑向け） : スタッキングを知っていますか？私は知っています。
- AMP 対応（中上級者↑向け） : 名前に large が付いていたりする大きめのモデルだと学習と推論に時間がかかります。実験効率も大事なので対応させましょう。思想が許せば pytorch lightning が楽です。
- full train 戦略や random seed ensemble などの細かいテク（上級者向け） : ここらへんはコンペの振り返り記事を読み漁ると時々出てきます。類似コンペを調べてみましょう。
- 実験効率化（上級者向け） : 実験パイプラインの管理と高速化を大事にしましょう。wandb は特に at-home task での実験管理でとても便利です。また、先述の AMP を含めた高速化も実験数で勝負する時に重要になります。代表を強く意識しているのならば出来て損はないはずです。
- スコアアタック（上級者向け） : 数年前のコンペなので、最新の model や手法を使えばかなり面白いのではないでしょうか！