In [2]:
import gc
import itertools
import os
import pickle
import random
import sys
import warnings
from glob import glob
from pathlib import Path

import config  # edit config.py as needed
import numpy as np
import pandas as pd
import polars as pl
import polars.selectors as cs
import safetensors
import seaborn as sns
import torch
from datasets import Dataset
from jinja2 import Template
from lifelines import CoxPHFitter, KaplanMeierFitter, NelsonAalenFitter
from metric import score  # edit metric.py as needed
from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training
from scipy.stats import rankdata
from seed import seed_everything  # edit seed.py as needed
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GroupKFold, KFold, StratifiedKFold, train_test_split
from tqdm.notebook import tqdm
from transformers import (
    AutoConfig,
    AutoModel,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    PreTrainedModel,
    Trainer,
    TrainingArguments,
)
from transformers.utils import is_torch_bf16_gpu_available

warnings.filterwarnings("ignore")

# TODO: kaggle上でのpeft, bitsandbytesのインストール


In [3]:
# ====================================================
# Configurations
# ====================================================
class CFG:
    DRY_RUN = False
    EXP_NAME = config.EXP_NAME
    AUTHOR = "marumarukun"
    COMPETITION = config.KAGGLE_COMPETITION_NAME
    DATA_PATH = config.COMP_DATASET_DIR
    OUTPUT_DIR = config.OUTPUT_DIR
    MODEL_PATH = config.OUTPUT_DIR / "models"  # モデル作成・実験時はこちらを使用
    # MODEL_PATH = config.ARTIFACT_EXP_DIR(config.EXP_NAME) / "models"  # 提出時はこちらを使用
    METHOD_LIST = ["lightgbm", "xgboost", "catboost"]
    SEED = 42
    n_folds = 2 if DRY_RUN else 5
    target_col = "y"
    # cox_target_col_list = ["efs_time2"]
    # group_col = "race_group"  # Required for GroupKFold (edit as needed)
    stratified_col = "race_group_efs"  # Required for StratifiedKFold (edit as needed)

    # model
    # model_path = "unsloth/gemma-2-9b-it-bnb-4bit"
    model_path = "unsloth/gemma-2-2b-it-bnb-4bit"
    metric = "rmse"
    max_length = 1024
    bf16 = is_torch_bf16_gpu_available()
    fp16 = False if bf16 else True
    learning_rate = 0.0001
    epochs = 2
    per_device_train_batch_size = 4
    gradient_accumulation_steps = 16
    per_device_eval_batch_size = 8
    steps = 50
    lr_scheduler_type = "cosine"
    weight_decay = 0.01
    optim = "adamw_torch_fused"
    lora_r = 16
    lora_alpha = 32
    lora_dropout = 0.05
    lora_bias = "none"


In [4]:
# ====================================================
# Seed everything
# ====================================================
seed_everything(CFG.SEED)


In [5]:
# ====================================================
# Read data
# ====================================================
train = pl.read_csv(CFG.DATA_PATH / "train.csv", try_parse_dates=True)
test = pl.read_csv(CFG.DATA_PATH / "test.csv", try_parse_dates=True)

if CFG.DRY_RUN:
    train = train.sample(100)


In [6]:
# ====================================================
# fold column
# ====================================================
# race_group_efs列を作成
train = train.with_columns((pl.col("race_group").cast(str) + "_" + pl.col("efs").cast(str)).alias("race_group_efs"))

fold_array = np.zeros(train.height)
skf = StratifiedKFold(n_splits=CFG.n_folds, shuffle=True, random_state=CFG.SEED)
for fold, (_, val_idx) in enumerate(skf.split(train, train[CFG.stratified_col]), start=1):
    fold_array[val_idx] = fold
train = train.with_columns(pl.Series(fold_array, dtype=pl.Int8).alias("fold"))


In [7]:
# ====================================================
# target column
# ====================================================
def transform_survival_probability(df, time_col="efs_time", event_col="efs"):
    kmf = KaplanMeierFitter()
    kmf.fit(df[time_col], df[event_col])
    y = kmf.survival_function_at_times(df[time_col]).values
    return y


y = transform_survival_probability(train, time_col="efs_time", event_col="efs")
train = train.with_columns(pl.Series(y).alias("y"))


In [8]:
train.null_count()


ID,dri_score,psych_disturb,cyto_score,diabetes,hla_match_c_high,hla_high_res_8,tbi_status,arrhythmia,hla_low_res_6,graft_type,vent_hist,renal_issue,pulm_severe,prim_disease_hct,hla_high_res_6,cmv_status,hla_high_res_10,hla_match_dqb1_high,tce_imm_match,hla_nmdp_6,hla_match_c_low,rituximab,hla_match_drb1_low,hla_match_dqb1_low,prod_type,cyto_score_detail,conditioning_intensity,ethnicity,year_hct,obesity,mrd_hct,in_vivo_tcd,tce_match,hla_match_a_high,hepatic_severe,donor_age,prior_tumor,hla_match_b_low,peptic_ulcer,age_at_hct,hla_match_a_low,gvhd_proph,rheum_issue,sex_match,hla_match_b_high,race_group,comorbidity_score,karnofsky_score,hepatic_mild,tce_div_match,donor_related,melphalan_dose,hla_low_res_8,cardiac,hla_match_drb1_high,pulm_moderate,hla_low_res_10,efs,efs_time,race_group_efs,fold,y
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,154,2062,8068,2119,4620,5829,0,2202,3270,0,259,1915,2135,0,5284,634,7163,5199,11133,4197,2800,2148,2643,4194,0,11923,4789,587,0,1760,16597,225,18996,4301,1871,1808,1678,2565,2419,0,2390,225,2183,261,4088,0,477,870,1917,11396,158,1405,3653,2542,3352,2047,5064,0,0,0,0,0


In [9]:
# ====================================================
# preprocess(欠損値補完)
# ====================================================
def preprocess_dataframe(df: pl.DataFrame) -> pl.DataFrame:
    """欠損値を適切な値で埋める"""

    # 数値列は-1、文字列列は"Unknown"で埋める
    df = df.with_columns(cs.numeric().fill_null(-1), cs.string().fill_null("Unknown"))

    return df


display(train.null_count())

train = preprocess_dataframe(train)
test = preprocess_dataframe(test)

display(train.null_count())


ID,dri_score,psych_disturb,cyto_score,diabetes,hla_match_c_high,hla_high_res_8,tbi_status,arrhythmia,hla_low_res_6,graft_type,vent_hist,renal_issue,pulm_severe,prim_disease_hct,hla_high_res_6,cmv_status,hla_high_res_10,hla_match_dqb1_high,tce_imm_match,hla_nmdp_6,hla_match_c_low,rituximab,hla_match_drb1_low,hla_match_dqb1_low,prod_type,cyto_score_detail,conditioning_intensity,ethnicity,year_hct,obesity,mrd_hct,in_vivo_tcd,tce_match,hla_match_a_high,hepatic_severe,donor_age,prior_tumor,hla_match_b_low,peptic_ulcer,age_at_hct,hla_match_a_low,gvhd_proph,rheum_issue,sex_match,hla_match_b_high,race_group,comorbidity_score,karnofsky_score,hepatic_mild,tce_div_match,donor_related,melphalan_dose,hla_low_res_8,cardiac,hla_match_drb1_high,pulm_moderate,hla_low_res_10,efs,efs_time,race_group_efs,fold,y
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,154,2062,8068,2119,4620,5829,0,2202,3270,0,259,1915,2135,0,5284,634,7163,5199,11133,4197,2800,2148,2643,4194,0,11923,4789,587,0,1760,16597,225,18996,4301,1871,1808,1678,2565,2419,0,2390,225,2183,261,4088,0,477,870,1917,11396,158,1405,3653,2542,3352,2047,5064,0,0,0,0,0


ID,dri_score,psych_disturb,cyto_score,diabetes,hla_match_c_high,hla_high_res_8,tbi_status,arrhythmia,hla_low_res_6,graft_type,vent_hist,renal_issue,pulm_severe,prim_disease_hct,hla_high_res_6,cmv_status,hla_high_res_10,hla_match_dqb1_high,tce_imm_match,hla_nmdp_6,hla_match_c_low,rituximab,hla_match_drb1_low,hla_match_dqb1_low,prod_type,cyto_score_detail,conditioning_intensity,ethnicity,year_hct,obesity,mrd_hct,in_vivo_tcd,tce_match,hla_match_a_high,hepatic_severe,donor_age,prior_tumor,hla_match_b_low,peptic_ulcer,age_at_hct,hla_match_a_low,gvhd_proph,rheum_issue,sex_match,hla_match_b_high,race_group,comorbidity_score,karnofsky_score,hepatic_mild,tce_div_match,donor_related,melphalan_dose,hla_low_res_8,cardiac,hla_match_drb1_high,pulm_moderate,hla_low_res_10,efs,efs_time,race_group_efs,fold,y
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [10]:
# ====================================================
# prompt
# ====================================================

PROMPT_TEMPLATE = Template("""As a medical expert specializing in hematopoietic cell transplantation (HCT), predict the survival probability for the following patient. Provide your prediction as a number between 0 and 1.

PRIMARY RISK FACTORS:
1. Disease Characteristics
- Primary Disease: {{ prim_disease_hct }}
- Disease Risk Index: {{ dri_score }}
- Cytogenetic Score: {{ cyto_score_detail }}
- MRD Status at HCT: {{ mrd_hct }}

2. Patient Status
- Age at HCT: {{ age_at_hct }} years
- Karnofsky Performance Score: {{ karnofsky_score }}
- HCT Comorbidity Index: {{ comorbidity_score }}

3. HLA & Immunological Factors
- 10/10 Match Score: {{ hla_high_res_10 }}/10
- T-cell Epitope Status: {{ tce_match }}
- TCE Immunogenicity: {{ tce_imm_match }}
- CMV Status (D/R): {{ cmv_status }}

TRANSPLANT CHARACTERISTICS:
4. Donor & Graft Information
- Donor Type: {{ donor_related }}
- Donor Age: {{ donor_age }}
- Graft Type: {{ graft_type }}
- Sex Match (D-R): {{ sex_match }}

5. Treatment Protocol
- Conditioning Intensity: {{ conditioning_intensity }}
- TBI Status: {{ tbi_status }}
- GVHD Prophylaxis: {{ gvhd_proph }}
- In-vivo T-cell Depletion: {{ in_vivo_tcd }}
- Rituximab in Conditioning: {{ rituximab }}
- Melphalan Dose: {{ melphalan_dose }}

COMORBIDITIES & MEDICAL HISTORY:
6. Major Organ Systems
- Cardiac Disease: {{ cardiac }}
- Arrhythmia: {{ arrhythmia }}
- Pulmonary Disease (Severe): {{ pulm_severe }}
- Pulmonary Disease (Moderate): {{ pulm_moderate }}
- Renal Disease: {{ renal_issue }}
- Hepatic Disease (Severe): {{ hepatic_severe }}
- Hepatic Disease (Mild): {{ hepatic_mild }}

7. Other Medical Conditions
- Diabetes: {{ diabetes }}
- Obesity: {{ obesity }}
- Psychiatric Condition: {{ psych_disturb }}
- Peptic Ulcer: {{ peptic_ulcer }}
- Rheumatologic Disease: {{ rheum_issue }}
- Prior Solid Tumor: {{ prior_tumor }}
- History of Mechanical Ventilation: {{ vent_hist }}

8. Demographics
- Race: {{ race_group }}
- Ethnicity: {{ ethnicity }}
- Transplant Year: {{ year_hct }}

Key Considerations for Survival Prediction:
1. Disease severity and risk status
2. HLA matching and immunological compatibility
3. Patient fitness and comorbidity burden
4. Donor and graft characteristics
5. Treatment intensity and protocol
6. Historical outcomes for similar profiles

Based on these comprehensive factors, particularly noting the {{ prim_disease_hct }} diagnosis, {{ conditioning_intensity }} conditioning, and comorbidity score of {{ comorbidity_score }}, provide your survival probability prediction.

Required format:
Survival probability: [number between 0-1]
""")


def make_prompt_column(df: pl.DataFrame) -> pl.DataFrame:
    prompts = []
    for row in df.iter_rows(named=True):
        prompt = PROMPT_TEMPLATE.render(**row)
        prompts.append(prompt)

    return df.with_columns(pl.Series(prompts).alias("prompt"))


train = make_prompt_column(train)
test = make_prompt_column(test)


In [11]:
print(train["prompt"][0])


As a medical expert specializing in hematopoietic cell transplantation (HCT), predict the survival probability for the following patient. Provide your prediction as a number between 0 and 1.

PRIMARY RISK FACTORS:
1. Disease Characteristics
- Primary Disease: IEA
- Disease Risk Index: N/A - non-malignant indication
- Cytogenetic Score: Unknown
- MRD Status at HCT: Unknown

2. Patient Status
- Age at HCT: 9.942 years
- Karnofsky Performance Score: 90.0
- HCT Comorbidity Index: 0.0

3. HLA & Immunological Factors
- 10/10 Match Score: -1.0/10
- T-cell Epitope Status: Unknown
- TCE Immunogenicity: Unknown
- CMV Status (D/R): +/+

TRANSPLANT CHARACTERISTICS:
4. Donor & Graft Information
- Donor Type: Unrelated
- Donor Age: -1.0
- Graft Type: Bone marrow
- Sex Match (D-R): M-F

5. Treatment Protocol
- Conditioning Intensity: Unknown
- TBI Status: No TBI
- GVHD Prophylaxis: FKalone
- In-vivo T-cell Depletion: Yes
- Rituximab in Conditioning: No
- Melphalan Dose: N/A, Mel not given

COMORBIDIT

In [12]:
# setup model and tokenizer
def setup_model_and_tokenizer():
    tokenizer = AutoTokenizer.from_pretrained(CFG.model_path)
    tokenizer.add_eos_token = True
    tokenizer.padding_side = "right"  # 文末に<eos>トークンを追加
    tokenizer.pad_token = tokenizer.eos_token  # <eos>をpad_tokenとして設定

    peft_config = LoraConfig(
        r=CFG.lora_r,
        lora_alpha=CFG.lora_alpha,
        lora_dropout=CFG.lora_dropout,
        bias=CFG.lora_bias,
        inference_mode=False,
        task_type=TaskType.SEQ_CLS,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
    )

    # NOTE: device_mapを設定しないことで4foldの学習が可能になった
    model = AutoModelForSequenceClassification.from_pretrained(
        CFG.model_path,
        num_labels=1,
    )
    model.config.use_cache = False  # キャッシュを使用しない
    model = prepare_model_for_kbit_training(model)  # 量子化したモデルをファインチューニング可能にする
    model = get_peft_model(model, peft_config)  # モデルにLoRAを適用
    # model.print_trainable_parameters()
    return model, tokenizer


In [12]:
# model, tokenizer = setup_model_and_tokenizer()


In [13]:
# model.print_trainable_parameters()


In [13]:
# tokenize function
def tokenize(sample):
    return tokenizer(sample["prompt"], padding="max_length", truncation=True)


In [None]:
# # metricをRMSEに変更
# def compute_metrics(eval_pred):
#     preds, labels = eval_pred
#     preds = preds.squeeze()  # (バッチサイズ, 1) -> (バッチサイズ,)
#     rmse = np.sqrt(np.mean((preds - labels) ** 2))
#     return {"rmse": rmse}


# # # 実験結果格納用のディレクトリを作成
# # cfg.run_name = time.strftime("%Y%m%d_%H%M%S")
# # Path(cfg.data.results_dir).mkdir(exist_ok=True, parents=True)

# y_train = train[CFG.target_col].to_numpy()
# # oof = np.zeros(len(y_train))
# oof_rmse = []

# for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, y_train)):
#     # Setup model and tokenizer
#     model, tokenizer = setup_model_and_tokenizer()

#     # Setup dataset
#     ds_train = Dataset.from_pandas(train_df[train_idx][["prompt", "labels"]].clone().to_pandas())
#     ds_val = Dataset.from_pandas(train_df[val_idx][["prompt", "labels"]].clone().to_pandas())
#     ds_test = Dataset.from_pandas(test_df.select("prompt").clone().to_pandas())

#     ds_train = ds_train.map(tokenize).remove_columns("prompt")
#     ds_val = ds_val.map(tokenize).remove_columns("prompt")
#     ds_test = ds_test.map(tokenize).remove_columns("prompt")

#     # Setup trainer
#     output_dir = os.path.join(cfg.data.results_dir, f"fold{fold}")

#     train_args = TrainingArguments(
#         output_dir=output_dir,  # 学習結果の出力ディレクトリ
#         fp16=cfg.gemma.fp16,  # 16ビット浮動小数点演算を使用するかどうか
#         learning_rate=cfg.gemma.learning_rate,  # 学習率
#         num_train_epochs=cfg.gemma.epochs,  # 学習エポック数
#         per_device_train_batch_size=cfg.gemma.per_device_train_batch_size,  # デバイスあたりの訓練バッチサイズ
#         per_device_eval_batch_size=cfg.gemma.per_device_eval_batch_size,  # デバイスあたりの評価バッチサイズ
#         gradient_accumulation_steps=cfg.gemma.gradient_accumulation_steps,  # 勾配蓄積ステップ数
#         gradient_checkpointing=True,  # 勾配チェックポイントを使用するかどうか
#         report_to="none",  # レポート出力先（なし）
#         evaluation_strategy="steps",  # 評価戦略（ステップごと）
#         do_eval=True,  # 評価を行うかどうか
#         eval_steps=cfg.gemma.steps,  # 評価を行うステップ間隔
#         save_total_limit=1,  # 保存するモデルの最大数
#         save_strategy="steps",  # 保存戦略（ステップごと）
#         save_steps=cfg.gemma.steps,  # モデルを保存するステップ間隔
#         logging_steps=cfg.gemma.steps,  # ログを出力するステップ間隔
#         load_best_model_at_end=True,  # 学習終了時に最良のモデルをロードするかどうか
#         lr_scheduler_type=cfg.gemma.lr_scheduler_type,  # 学習率スケジューラーの種類
#         metric_for_best_model=cfg.gemma.metric,  # 最良モデルを判断するための評価指標
#         greater_is_better=True,  # 評価指標が大きいほど良いかどうか
#         warmup_ratio=0.1,  # ウォームアップの比率
#         weight_decay=cfg.gemma.weight_decay,  # 重み減衰
#         save_safetensors=True,  # SafeTensorsフォーマットで保存するかどうか
#         seed=cfg.seed,  # 乱数シード
#         data_seed=cfg.seed,  # データシャッフル用の乱数シード
#         optim=cfg.gemma.optim,  # 最適化アルゴリズム
#     )

#     trainer = Trainer(
#         model=model,
#         args=train_args,
#         train_dataset=ds_train,
#         eval_dataset=ds_val,
#         data_collator=DataCollatorWithPadding(tokenizer),
#         tokenizer=tokenizer,
#         compute_metrics=compute_metrics,
#     )

#     # Train the model
#     trainer.train()

#     # Perform inference on val and test datasets
#     pred_val = torch.softmax(torch.tensor(trainer.predict(ds_val).predictions), dim=1).numpy()[:, 1]
#     pred_test = torch.softmax(torch.tensor(trainer.predict(ds_test).predictions), dim=1).numpy()[:, 1]

#     # Save the model, predictions
#     final_output_dir = f"{cfg.data.results_dir}/fold{fold}/final"
#     trainer.save_model(final_output_dir)
#     np.save(f"{final_output_dir}/val.npy", pred_val)
#     np.save(f"{final_output_dir}/test.npy", pred_test)
#     # tokenizer.save_pretrained(final_output_dir)

#     # Calculate and log AUC score
#     roc_auc = roc_auc_score(y_train[val_idx], pred_val)
#     print(f"Fold {fold} AUC: {roc_auc}")
#     oof_auc.append(roc_auc)

#     # Clean up to free memory
#     del model
#     torch.cuda.empty_cache()
#     gc.collect()


# print(f"Mean AUC score across all folds: {np.mean(oof_auc)}")


In [14]:
# metricをRMSEに変更
def compute_metrics(eval_pred):
    preds, labels = eval_pred
    preds = preds.squeeze()  # (バッチサイズ, 1) -> (バッチサイズ,)
    rmse = np.sqrt(np.mean((preds - labels) ** 2))
    return {"rmse": rmse}


y_train = train[CFG.target_col].to_numpy()
oof = np.zeros(len(y_train))

for fold in range(1, CFG.n_folds + 1):
    # Setup model and tokenizer
    model, tokenizer = setup_model_and_tokenizer()

    # Setup dataset
    ds_train = Dataset.from_polars(
        train.filter(pl.col("fold") != fold)
        .select(["prompt", CFG.target_col])
        .rename({CFG.target_col: "labels"})
        .clone()
    )
    ds_val = Dataset.from_polars(
        train.filter(pl.col("fold") == fold)
        .select(["prompt", CFG.target_col])
        .rename({CFG.target_col: "labels"})
        .clone()
    )
    ds_test = Dataset.from_polars(test.select(["prompt"]).clone())

    ds_train = ds_train.map(tokenize).remove_columns("prompt")
    ds_val = ds_val.map(tokenize).remove_columns("prompt")
    ds_test = ds_test.map(tokenize).remove_columns("prompt")

    # Setup trainer
    output_dir = os.path.join(CFG.MODEL_PATH, f"fold{fold}")

    train_args = TrainingArguments(
        output_dir=output_dir,
        fp16=CFG.fp16,
        bf16=CFG.bf16,
        learning_rate=CFG.learning_rate,
        num_train_epochs=CFG.epochs,
        per_device_train_batch_size=CFG.per_device_train_batch_size,
        gradient_accumulation_steps=CFG.gradient_accumulation_steps,
        per_device_eval_batch_size=CFG.per_device_eval_batch_size,
        gradient_checkpointing=True,
        report_to="none",
        evaluation_strategy="steps",
        do_eval=True,
        eval_steps=CFG.steps,
        save_total_limit=1,
        save_strategy="steps",
        save_steps=CFG.steps,
        logging_steps=CFG.steps,
        load_best_model_at_end=True,
        lr_scheduler_type=CFG.lr_scheduler_type,
        metric_for_best_model=CFG.metric,
        greater_is_better=False,
        warmup_ratio=0.1,
        weight_decay=CFG.weight_decay,
        optim=CFG.optim,
        seed=CFG.SEED,
        data_seed=CFG.SEED,
        save_safetensors=True,
    )

    trainer = Trainer(
        model=model,
        args=train_args,
        train_dataset=ds_train,
        eval_dataset=ds_val,
        data_collator=DataCollatorWithPadding(tokenizer),
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    trainer.train()


tokenizer_config.json:   0%|          | 0.00/47.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors:   0%|          | 0.00/2.22G [00:00<?, ?B/s]

Some weights of Gemma2ForSequenceClassification were not initialized from the model checkpoint at unsloth/gemma-2-2b-it-bnb-4bit and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/23040 [00:00<?, ? examples/s]

Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/5760 [00:00<?, ? examples/s]

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Rmse
50,11.181,0.064283,0.253542
100,0.1896,0.032488,0.180244
150,0.1569,0.04132,0.203272
200,0.0812,0.196644,0.443446
250,0.1738,0.128024,0.357805


KeyboardInterrupt: 