In [1]:
import os
import json
from pathlib import Path
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import scml
from scml import pandasx as pdx
import lalaes2 as mylib

In [2]:
model_dir = Path("models/aes2/deberta_v3_base/20240628_111207")
validation_data_file = Path("input/val_06.parquet")
model_max_length = 768
batch_size = 32
critique_column = "ctq_3_Qwen2-1.5B-Instruct"
#model_class = "CustomDebertaV2ForTokenClassification"
model_class = "auto"
if model_max_length==768:
    batch_size = 16

In [3]:
tim = scml.Timer()
tim.start()
os.environ["TOKENIZERS_PARALLELISM"] = "false"
percentiles=[.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .99]
pd.set_option("max_info_columns", 9999)
pd.set_option("display.max_columns", 9999)
pd.set_option("display.max_rows", 9999)
pd.set_option('max_colwidth', 9999)
tqdm.pandas()
scml.seed_everything()
info = np.iinfo(np.int16)
print(f"int16, min={info.min}, max={info.max}")

int16, min=-32768, max=32767


In [4]:
device = torch.device("cpu")
if torch.backends.mps.is_available():
    device = torch.device("mps")
    batch_size = 128
    print("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda:1")
    for i in range(torch.cuda.device_count()):
        print(f"device={i}, {torch.cuda.get_device_name(i)}")
        print('Mem Allocated:', round(torch.cuda.memory_allocated(i)/1024**3,1), 'GB')
        print('Mem Cached:   ', round(torch.cuda.memory_reserved(i)/1024**3,1), 'GB')
else:
    print("cpu")

device=0, NVIDIA GeForce RTX 4070 Ti SUPER
Mem Allocated: 0.0 GB
Mem Cached:    0.0 GB
device=1, NVIDIA GeForce RTX 4070 Ti SUPER
Mem Allocated: 0.0 GB
Mem Cached:    0.0 GB


In [5]:
df = pd.read_parquet(validation_data_file)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2094 entries, 0 to 2093
Data columns (total 6 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   essay_id                   2094 non-null   object
 1   score                      2094 non-null   int8  
 2   ctq_3_Qwen2-1.5B-Instruct  2094 non-null   object
 3   topic                      2094 non-null   object
 4   full_text                  2094 non-null   object
 5   source                     2094 non-null   object
dtypes: int8(1), object(5)
memory usage: 84.0+ KB


In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_dir, model_max_length=model_max_length)
print(tokenizer)

DebertaV2TokenizerFast(name_or_path='models/aes2/deberta_v3_base/20240628_111207', vocab_size=128000, model_max_length=768, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '[CLS]', 'eos_token': '[SEP]', 'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	3: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	128000: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}




In [7]:
%%time
if model_class=="CustomDebertaV2ForTokenClassification":
    #model = CustomDebertaV2ForTokenClassification.from_pretrained(model_dir)
    raise ValueError
else:
    model = AutoModelForSequenceClassification.from_pretrained(model_dir)
print(model)

DebertaV2ForSequenceClassification(
  (deberta): DebertaV2Model(
    (embeddings): DebertaV2Embeddings(
      (word_embeddings): Embedding(128100, 768, padding_idx=0)
      (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
      (dropout): StableDropout()
    )
    (encoder): DebertaV2Encoder(
      (layer): ModuleList(
        (0-11): 12 x DebertaV2Layer(
          (attention): DebertaV2Attention(
            (self): DisentangledSelfAttention(
              (query_proj): Linear(in_features=768, out_features=768, bias=True)
              (key_proj): Linear(in_features=768, out_features=768, bias=True)
              (value_proj): Linear(in_features=768, out_features=768, bias=True)
              (pos_dropout): StableDropout()
              (dropout): StableDropout()
            )
            (output): DebertaV2SelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine

In [12]:
%%time
res = mylib.evaluation_aes2(
    ds=mylib.Aes2Dataset(
        tokenizer=tokenizer,
        critiques=df[critique_column].tolist(),
        texts=df["full_text"].tolist(),
        labels=df["score"].tolist(),
    ),
    model=model,
    batch_size=batch_size,
    device=device,
    progress_bar=True,
)

predict hms score: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 131/131 [00:47<00:00,  2.75it/s]


CPU times: user 49.8 s, sys: 338 ms, total: 50.1 s
Wall time: 50.4 s


In [13]:
print(model_dir)
print(res["thresholds"])
print(json.dumps(res, indent=2))

models/aes2/deberta_v3_base/20240628_111207
[1.498935947960958, 2.2397895160227517, 2.9740034895162504, 3.5223502418743555, 7.200203307066931]
{
  "thresholds": [
    1.498935947960958,
    2.2397895160227517,
    2.9740034895162504,
    3.5223502418743555,
    7.200203307066931
  ],
  "qwk": 0.7574758007268526,
  "rmse": 0.7466819718679459
}


In [14]:
tim.stop()
print(f"Total time taken {str(tim.elapsed)}")

Total time taken 0:01:47.697721
