In [1]:
import os
import random
import math
import gc
import time
import copy

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset


import transformers
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification
import lightgbm as lgb


from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [2]:
class CFG :
    debug_one_epoch=False
    debug_one_fold=False
    run_cv=True
    only_infer=False
    num_workers=8
    num_epochs=10
    warmup_prop=0.1
    lr=5e-5
    max_len=512
    batch_size=8
    early_stopping_rounds=5
    random_seed=42
    model_path="/kaggle/input/huggingfacedebertav3variants/deberta-v3-base" 
    pretrained_path="/kaggle/input/commmonlit-deberta"
    optimizer=torch.optim.AdamW
    criterion=torch.nn.MSELoss()
    device=device
    fold=5
    use_amp = True
    
def seed_torch(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_torch(CFG.random_seed)

In [3]:
if CFG.debug_one_epoch:
    CFG.num_epochs = 1

In [4]:
train = pd.read_csv("../input/commonlitreadabilityprize/train.csv")
test = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")

In [5]:
train["target"].describe()

count    2834.000000
mean       -0.959319
std         1.033579
min        -3.676268
25%        -1.690320
50%        -0.912190
75%        -0.202540
max         1.711390
Name: target, dtype: float64

In [6]:
def get_features(texts, model_path, device, fold):
    # モデルとトークナイザーの読み込み
    model = AutoModelForSequenceClassification.from_pretrained(
        model_path,
        output_hidden_states=True,
        num_labels = 1
    ).to(device)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    model.eval()
    features = []
    
    # numpy arrayをリストに変換
    texts = texts.tolist()
    
    with torch.no_grad():
        for i in range(0, len(texts), CFG.batch_size):
            batch_texts = texts[i:i + CFG.batch_size]
            
            # バッチ処理
            inputs = tokenizer(
                batch_texts,
                padding=True,
                truncation=True,
                max_length=CFG.max_len,
                return_tensors="pt"
            ).to(device)
            
            outputs = model(**inputs, output_hidden_states=True)
            last_hidden_states = outputs.hidden_states[-1]
            cls_features = last_hidden_states[:, 0].cpu().numpy()
            features.append(cls_features)
    
    return np.vstack(features)

In [7]:
def train_lightgbm(X_train, y_train, X_valid, y_valid):
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_valid, label=y_valid)
    
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': 0.01,
        'num_leaves': 16,
        'max_depth': 4,
        'verbose': -1
    }
    
    callbacks = [
        lgb.early_stopping(50),
        lgb.log_evaluation(100)
    ]
    
    model = lgb.train(
        params,
        train_data,
        valid_sets=[train_data, valid_data],
        num_boost_round=1000,
        callbacks=callbacks
    )
    
    return model

In [8]:

def main():
    # データの読み込み
    train = pd.read_csv("../input/commonlitreadabilityprize/train.csv")
    test = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
    submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")

    # K-fold cross validation
    kf = KFold(n_splits=CFG.fold, shuffle=True, random_state=CFG.random_seed)
    models = []
    oof_preds = np.zeros(len(train))
    test_preds = np.zeros(len(test))
    
    for fold, (train_idx, valid_idx) in enumerate(kf.split(train)):
        print(f"======================================Fold {fold}======================================")
        train_embeddings = get_features(train["excerpt"].values, CFG.model_path, CFG.device, fold)
        test_embeddings = get_features(test["excerpt"].values, CFG.model_path, CFG.device, fold)
        X_train = train_embeddings[train_idx]
        y_train = train["target"].values[train_idx]
        X_valid = train_embeddings[valid_idx]
        y_valid = train["target"].values[valid_idx]
        
        model = train_lightgbm(X_train, y_train, X_valid, y_valid)
        models.append(model)
        
        oof_preds[valid_idx] = model.predict(X_valid)
        test_preds += model.predict(test_embeddings)
    
    test_preds /= CFG.fold
    # 結果の保存
    submission["target"] = test_preds
    submission.to_csv("submission.csv", index=False)

if __name__ == "__main__":
    main()



Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training until validation scores don't improve for 50 rounds
[100]	training's rmse: 0.744648	valid_1's rmse: 0.799359
[200]	training's rmse: 0.621437	valid_1's rmse: 0.727885
[300]	training's rmse: 0.548039	valid_1's rmse: 0.696973
[400]	training's rmse: 0.501952	valid_1's rmse: 0.680425
[500]	training's rmse: 0.468647	valid_1's rmse: 0.670366
[600]	training's rmse: 0.439426	valid_1's rmse: 0.663494
[700]	training's rmse: 0.413073	valid_1's rmse: 0.658315
[800]	training's rmse: 0.38995	valid_1's rmse: 0.655157
[900]	training's rmse: 0.367735	valid_1's rmse: 0.652829
[1000]	training's rmse: 0.347414	valid_1's rmse: 0.65056
Did not meet early stopping. Best iteration is:
[1000]	training's rmse: 0.347414	valid_1's rmse: 0.65056


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training until validation scores don't improve for 50 rounds
[100]	training's rmse: 0.743067	valid_1's rmse: 0.837642
[200]	training's rmse: 0.621119	valid_1's rmse: 0.761916
[300]	training's rmse: 0.550356	valid_1's rmse: 0.731211
[400]	training's rmse: 0.499999	valid_1's rmse: 0.714216
[500]	training's rmse: 0.464188	valid_1's rmse: 0.702626
[600]	training's rmse: 0.436263	valid_1's rmse: 0.696286
[700]	training's rmse: 0.41186	valid_1's rmse: 0.690834
[800]	training's rmse: 0.388716	valid_1's rmse: 0.687362
[900]	training's rmse: 0.368095	valid_1's rmse: 0.68543
[1000]	training's rmse: 0.349172	valid_1's rmse: 0.683855
Did not meet early stopping. Best iteration is:
[1000]	training's rmse: 0.349172	valid_1's rmse: 0.683855


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training until validation scores don't improve for 50 rounds
[100]	training's rmse: 0.750586	valid_1's rmse: 0.791142
[200]	training's rmse: 0.623728	valid_1's rmse: 0.714648
[300]	training's rmse: 0.552833	valid_1's rmse: 0.682542
[400]	training's rmse: 0.507792	valid_1's rmse: 0.670249
[500]	training's rmse: 0.470936	valid_1's rmse: 0.661809
[600]	training's rmse: 0.439667	valid_1's rmse: 0.656406
[700]	training's rmse: 0.41334	valid_1's rmse: 0.652785
[800]	training's rmse: 0.390077	valid_1's rmse: 0.650832
[900]	training's rmse: 0.368647	valid_1's rmse: 0.649609
[1000]	training's rmse: 0.349016	valid_1's rmse: 0.649252
Did not meet early stopping. Best iteration is:
[989]	training's rmse: 0.351176	valid_1's rmse: 0.649086


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training until validation scores don't improve for 50 rounds
[100]	training's rmse: 0.738001	valid_1's rmse: 0.83137
[200]	training's rmse: 0.612173	valid_1's rmse: 0.764059
[300]	training's rmse: 0.538832	valid_1's rmse: 0.737735
[400]	training's rmse: 0.493134	valid_1's rmse: 0.726988
[500]	training's rmse: 0.457972	valid_1's rmse: 0.720092
[600]	training's rmse: 0.428982	valid_1's rmse: 0.716127
[700]	training's rmse: 0.404011	valid_1's rmse: 0.710727
[800]	training's rmse: 0.381073	valid_1's rmse: 0.706416
[900]	training's rmse: 0.361295	valid_1's rmse: 0.703236
[1000]	training's rmse: 0.343513	valid_1's rmse: 0.700445
Did not meet early stopping. Best iteration is:
[999]	training's rmse: 0.343736	valid_1's rmse: 0.700444


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/huggingfacedebertav3variants/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training until validation scores don't improve for 50 rounds
[100]	training's rmse: 0.745156	valid_1's rmse: 0.789661
[200]	training's rmse: 0.621148	valid_1's rmse: 0.717714
[300]	training's rmse: 0.547042	valid_1's rmse: 0.689485
[400]	training's rmse: 0.499274	valid_1's rmse: 0.67618
[500]	training's rmse: 0.463186	valid_1's rmse: 0.66977
[600]	training's rmse: 0.434977	valid_1's rmse: 0.666336
[700]	training's rmse: 0.411823	valid_1's rmse: 0.664089
[800]	training's rmse: 0.388819	valid_1's rmse: 0.661772
[900]	training's rmse: 0.366575	valid_1's rmse: 0.659781
[1000]	training's rmse: 0.346729	valid_1's rmse: 0.657702
Did not meet early stopping. Best iteration is:
[1000]	training's rmse: 0.346729	valid_1's rmse: 0.657702
