In [1]:
from pathlib import Path
import MeCab
from sklearn.model_selection import StratifiedKFold
import torch
import torch.nn as nn
from transformers import AutoTokenizer, BertJapaneseTokenizer
from src.utils import load_dataset, display_TNM_score
from src.train import MulticlassBERTBaseTrainer, MulticlassDeBERTaBaseTrainer

In [2]:
# 共通設定
TRAIN_DATA_DIR = Path('../data/train')
SEED = 2023
FOLDS = 5
DEVICE = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 16
LEARNING_RATE = 2e-5
EPOCHS = 100
MAX_LENGTH = 512
EARLY_STOPPING_ROUNDS = 10
CRITERION = nn.CrossEntropyLoss()

# データの読み込み
df = load_dataset(TRAIN_DATA_DIR)
df['TNM'] = df['T'].astype(str) + df['N'].astype(str) + df['M'].astype(str)
fold = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=SEED)

In [3]:
# 東北大BERT-v2
MODEL_NAME = 'cl-tohoku/bert-base-japanese-v2'
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)

cv = fold.split(df['text'], df['TNM'])
trainer = MulticlassBERTBaseTrainer(model_name=MODEL_NAME, tokenizer=TOKENIZER, criterion=CRITERION, device=DEVICE, seed=SEED)
cv_predsT, cv_predsN, cv_predsM = trainer.training(df, cv, batch_size=BATCH_SIZE, epochs=EPOCHS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, early_stopping_rounds=EARLY_STOPPING_ROUNDS)
display_TNM_score(df['TNM'].values, cv_predsT, cv_predsN, cv_predsM)
"""
BEST Epochs: [18, 15, 21, 15, 24]
macro F1: 0.6590 Accuracy: 0.7347
T | macro F1: 0.5570 Accuracy: 0.6122
N | macro F1: 0.5102 Accuracy: 0.6735
M | macro F1: 0.9099 Accuracy: 0.9184
TNM | macro F1: 0.1750 Accuracy: 0.4286
"""

Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-v2 were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-v2 were not used when initializing BertM

BEST Epochs: [18, 15, 21, 15, 24]
macro F1: 0.6590 Accuracy: 0.7347
T | macro F1: 0.5570 Accuracy: 0.6122
N | macro F1: 0.5102 Accuracy: 0.6735
M | macro F1: 0.9099 Accuracy: 0.9184
TNM | macro F1: 0.1750 Accuracy: 0.4286


'\nBEST Epochs: [18, 15, 21, 15, 24]\nmacro F1: 0.6590 Accuracy: 0.7347\nT | macro F1: 0.5570 Accuracy: 0.6122\nN | macro F1: 0.5102 Accuracy: 0.6735\nM | macro F1: 0.9099 Accuracy: 0.9184\n'

In [4]:
"""
# 京大DeBERTa-v2(SEED値固定しても再現性取れない)
MODEL_NAME = 'ku-nlp/deberta-v2-base-japanese'
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)

cv = fold.split(df['text'], df['TNM'])
trainer = MulticlassDeBERTaBaseTrainer(model_name=MODEL_NAME, tokenizer=TOKENIZER, criterion=CRITERION, device=DEVICE, seed=SEED)
cv_predsT, cv_predsN, cv_predsM = trainer.training(df, cv, batch_size=BATCH_SIZE, epochs=EPOCHS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, early_stopping_rounds=EARLY_STOPPING_ROUNDS)
display_TNM_score(df['TNM'].values, cv_predsT, cv_predsN, cv_predsM)
"""
"""
BEST Epochs: [25, 72, 61, 18, 36]
macro F1: 0.6955 Accuracy: 0.7619
T | macro F1: 0.5898 Accuracy: 0.6633
N | macro F1: 0.6198 Accuracy: 0.7347
M | macro F1: 0.8770 Accuracy: 0.8878
TNM | macro F1: 0.1890 Accuracy: 0.4388
"""

Some weights of the model checkpoint at ku-nlp/deberta-v2-base-japanese were not used when initializing DebertaV2Model: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at ku-nlp/deberta-v2-base-japanese were not used when initializing DebertaV2Model: ['cls.predictions.decoder.bias', '

BEST Epochs: [25, 72, 61, 18, 36]
macro F1: 0.6955 Accuracy: 0.7619
T | macro F1: 0.5898 Accuracy: 0.6633
N | macro F1: 0.6198 Accuracy: 0.7347
M | macro F1: 0.8770 Accuracy: 0.8878
TNM | macro F1: 0.1890 Accuracy: 0.4388


'\nBEST Epochs: [25, 32, 38, 17, 55]\nmacro F1: 0.6528 Accuracy: 0.7551\nT | macro F1: 0.5810 Accuracy: 0.6735\nN | macro F1: 0.4899 Accuracy: 0.6939\nM | macro F1: 0.8874 Accuracy: 0.8980\n'

In [5]:
# UTH-BERT
MODEL_NAME = '../data/UTH_BERT_BASE_512_MC_BPE_WWM_V25000_352K'
TOKENIZER = BertJapaneseTokenizer.from_pretrained(MODEL_NAME, mecab_kwargs={'mecab_option': '-d ../data/mecab-unidic-neologd -u ../data/MANBYO_201907_Dic-utf8.dic'})

cv = fold.split(df['text'], df['TNM'])
trainer = MulticlassBERTBaseTrainer(model_name=MODEL_NAME, tokenizer=TOKENIZER, criterion=CRITERION, device=DEVICE, seed=SEED)
cv_predsT, cv_predsN, cv_predsM = trainer.training(df, cv, batch_size=BATCH_SIZE, epochs=EPOCHS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, early_stopping_rounds=EARLY_STOPPING_ROUNDS)
display_TNM_score(df['TNM'].values, cv_predsT, cv_predsN, cv_predsM)
"""
BEST Epochs: [26, 42, 33, 29, 21]
macro F1: 0.7122 Accuracy: 0.7551
T | macro F1: 0.6611 Accuracy: 0.7041
N | macro F1: 0.6089 Accuracy: 0.6837
M | macro F1: 0.8667 Accuracy: 0.8776
TNM | macro F1: 0.1933 Accuracy: 0.4490
"""

Some weights of the model checkpoint at ../data/UTH_BERT_BASE_512_MC_BPE_WWM_V25000_352K were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at ../data/UTH_BERT_BASE_512_MC_BPE_WWM_V25000_352K wer

BEST Epochs: [26, 42, 33, 29, 21]
macro F1: 0.7122 Accuracy: 0.7551
T | macro F1: 0.6611 Accuracy: 0.7041
N | macro F1: 0.6089 Accuracy: 0.6837
M | macro F1: 0.8667 Accuracy: 0.8776
TNM | macro F1: 0.1933 Accuracy: 0.4490


'\nBEST Epochs: [26, 42, 33, 29, 21]\nmacro F1: 0.7122 Accuracy: 0.7551\nT | macro F1: 0.6611 Accuracy: 0.7041\nN | macro F1: 0.6089 Accuracy: 0.6837\nM | macro F1: 0.8667 Accuracy: 0.8776\n'

In [3]:
# JMedRoBERTa (sentencepiece)
MODEL_NAME = 'alabnii/jmedroberta-base-sentencepiece-vocab50000'
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)

cv = fold.split(df['text'], df['TNM'])
trainer = MulticlassBERTBaseTrainer(model_name=MODEL_NAME, tokenizer=TOKENIZER, criterion=CRITERION, device=DEVICE, seed=SEED)
cv_predsT, cv_predsN, cv_predsM = trainer.training(df, cv, batch_size=BATCH_SIZE, epochs=EPOCHS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, early_stopping_rounds=EARLY_STOPPING_ROUNDS)
display_TNM_score(df['TNM'].values, cv_predsT, cv_predsN, cv_predsM)
"""
BEST Epochs: [20, 16, 18, 27, 18]
macro F1: 0.6950 Accuracy: 0.7483
T | macro F1: 0.6368 Accuracy: 0.6735
N | macro F1: 0.5799 Accuracy: 0.6939
M | macro F1: 0.8683 Accuracy: 0.8776
TNM | macro F1: 0.2386 Accuracy: 0.4592
"""

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at alabnii/jmedroberta-base-sentencepiece-vocab50000 were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were no

BEST Epochs: [20, 16, 18, 27, 18]
macro F1: 0.6950 Accuracy: 0.7483
T | macro F1: 0.6368 Accuracy: 0.6735
N | macro F1: 0.5799 Accuracy: 0.6939
M | macro F1: 0.8683 Accuracy: 0.8776
TNM | macro F1: 0.2386 Accuracy: 0.4592


'\nBEST Epochs: [20, 16, 18, 27, 18]\nmacro F1: 0.6950 Accuracy: 0.7483\nT | macro F1: 0.6368 Accuracy: 0.6735\nN | macro F1: 0.5799 Accuracy: 0.6939\nM | macro F1: 0.8683 Accuracy: 0.8776\nTNM | macro F1: 0.2386 Accuracy: 0.4592\n'

In [4]:
# JMedRoBERTa (manbyo-wordpiece)
MODEL_NAME = 'alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000'
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME, **{
    'mecab_kwargs': {
        'mecab_option': '-u ../data/MANBYO_201907_Dic-utf8.dic'
    }
})

cv = fold.split(df['text'], df['TNM'])
trainer = MulticlassBERTBaseTrainer(model_name=MODEL_NAME, tokenizer=TOKENIZER, criterion=CRITERION, device=DEVICE, seed=SEED)
cv_predsT, cv_predsN, cv_predsM = trainer.training(df, cv, batch_size=BATCH_SIZE, epochs=EPOCHS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, early_stopping_rounds=EARLY_STOPPING_ROUNDS)
display_TNM_score(df['TNM'].values, cv_predsT, cv_predsN, cv_predsM)
"""
T | macro F1: 0.6279 Accuracy: 0.6939
N | macro F1: 0.6189 Accuracy: 0.7347
M | macro F1: 0.9111 Accuracy: 0.9184
TNM | macro F1: 0.1613 Accuracy: 0.4490
"""



Downloading (…)lve/main/config.json:   0%|          | 0.00/589 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of the model checkpoint at alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000 were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000 and are newly initi

BEST Epochs: [18, 22, 24, 20, 22]
macro F1: 0.7193 Accuracy: 0.7823
T | macro F1: 0.6279 Accuracy: 0.6939
N | macro F1: 0.6189 Accuracy: 0.7347
M | macro F1: 0.9111 Accuracy: 0.9184
TNM | macro F1: 0.1613 Accuracy: 0.4490


In [6]:
# 東北大BERT-v3
MODEL_NAME = 'cl-tohoku/bert-base-japanese-v3'
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)

cv = fold.split(df['text'], df['TNM'])
trainer = MulticlassBERTBaseTrainer(model_name=MODEL_NAME, tokenizer=TOKENIZER, criterion=CRITERION, device=DEVICE, seed=SEED)
cv_predsT, cv_predsN, cv_predsM = trainer.training(df, cv, batch_size=BATCH_SIZE, epochs=EPOCHS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, early_stopping_rounds=EARLY_STOPPING_ROUNDS)
display_TNM_score(df['TNM'].values, cv_predsT, cv_predsN, cv_predsM)
"""
BEST Epochs: [20, 19, 20, 17, 22]
macro F1: 0.7404 Accuracy: 0.7721
T | macro F1: 0.7089 Accuracy: 0.7143
N | macro F1: 0.6105 Accuracy: 0.6939
M | macro F1: 0.9018 Accuracy: 0.9082
TNM | macro F1: 0.2216 Accuracy: 0.4898
"""

Downloading (…)okenizer_config.json:   0%|          | 0.00/251 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/231k [00:00<?, ?B/s]



Downloading (…)lve/main/config.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/447M [00:00<?, ?B/s]

Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-v3 were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-v3 were not used when initializing BertM

BEST Epochs: [20, 19, 20, 17, 22]
macro F1: 0.7404 Accuracy: 0.7721
T | macro F1: 0.7089 Accuracy: 0.7143
N | macro F1: 0.6105 Accuracy: 0.6939
M | macro F1: 0.9018 Accuracy: 0.9082
TNM | macro F1: 0.2216 Accuracy: 0.4898


In [7]:
# 東北大BERT-v3-char
MODEL_NAME = 'cl-tohoku/bert-base-japanese-char-v3'
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)

cv = fold.split(df['text'], df['TNM'])
trainer = MulticlassBERTBaseTrainer(model_name=MODEL_NAME, tokenizer=TOKENIZER, criterion=CRITERION, device=DEVICE, seed=SEED)
cv_predsT, cv_predsN, cv_predsM = trainer.training(df, cv, batch_size=BATCH_SIZE, epochs=EPOCHS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, early_stopping_rounds=EARLY_STOPPING_ROUNDS)
display_TNM_score(df['TNM'].values, cv_predsT, cv_predsN, cv_predsM)
"""
BEST Epochs: [19, 27, 16, 16, 22]
macro F1: 0.7563 Accuracy: 0.7993
T | macro F1: 0.6646 Accuracy: 0.7143
N | macro F1: 0.6818 Accuracy: 0.7551
M | macro F1: 0.9227 Accuracy: 0.9286
TNM | macro F1: 0.2953 Accuracy: 0.5510
"""

Downloading (…)okenizer_config.json:   0%|          | 0.00/251 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/27.9k [00:00<?, ?B/s]



Downloading (…)lve/main/config.json:   0%|          | 0.00/471 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/368M [00:00<?, ?B/s]

Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-char-v3 were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-char-v3 were not used when initiali

BEST Epochs: [19, 27, 16, 16, 22]
macro F1: 0.7563 Accuracy: 0.7993
T | macro F1: 0.6646 Accuracy: 0.7143
N | macro F1: 0.6818 Accuracy: 0.7551
M | macro F1: 0.9227 Accuracy: 0.9286
TNM | macro F1: 0.2953 Accuracy: 0.5510


In [8]:
# 東北大BERT-v2(large)
MODEL_NAME = 'cl-tohoku/bert-large-japanese-v2'
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)

cv = fold.split(df['text'], df['TNM'])
trainer = MulticlassBERTBaseTrainer(model_name=MODEL_NAME, tokenizer=TOKENIZER, criterion=CRITERION, device=DEVICE, seed=SEED)
cv_predsT, cv_predsN, cv_predsM = trainer.training(df, cv, batch_size=BATCH_SIZE, epochs=EPOCHS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, early_stopping_rounds=EARLY_STOPPING_ROUNDS)
display_TNM_score(df['TNM'].values, cv_predsT, cv_predsN, cv_predsM)
"""
BEST Epochs: [11, 14, 13, 13, 15]
macro F1: 0.7259 Accuracy: 0.7721
T | macro F1: 0.6109 Accuracy: 0.6429
N | macro F1: 0.6234 Accuracy: 0.7245
M | macro F1: 0.9433 Accuracy: 0.9490
TNM | macro F1: 0.2519 Accuracy: 0.4388
"""

Downloading (…)okenizer_config.json:   0%|          | 0.00/251 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/231k [00:00<?, ?B/s]



Downloading (…)lve/main/config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.35G [00:00<?, ?B/s]

Some weights of the model checkpoint at cl-tohoku/bert-large-japanese-v2 were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at cl-tohoku/bert-large-japanese-v2 were not used when initializing Ber

BEST Epochs: [11, 14, 13, 13, 15]
macro F1: 0.7259 Accuracy: 0.7721
T | macro F1: 0.6109 Accuracy: 0.6429
N | macro F1: 0.6234 Accuracy: 0.7245
M | macro F1: 0.9433 Accuracy: 0.9490
TNM | macro F1: 0.2519 Accuracy: 0.4388


In [3]:
# 東北大BERT-v2-char(large)
MODEL_NAME = 'cl-tohoku/bert-large-japanese-char-v2'
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)

cv = fold.split(df['text'], df['TNM'])
trainer = MulticlassBERTBaseTrainer(model_name=MODEL_NAME, tokenizer=TOKENIZER, criterion=CRITERION, device=DEVICE, seed=SEED)
cv_predsT, cv_predsN, cv_predsM = trainer.training(df, cv, batch_size=BATCH_SIZE, epochs=EPOCHS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, early_stopping_rounds=EARLY_STOPPING_ROUNDS)
display_TNM_score(df['TNM'].values, cv_predsT, cv_predsN, cv_predsM)
"""
BEST Epochs: [24, 11, 17, 13, 18]
macro F1: 0.7066 Accuracy: 0.7789
T | macro F1: 0.6673 Accuracy: 0.6939
N | macro F1: 0.5307 Accuracy: 0.7143
M | macro F1: 0.9217 Accuracy: 0.9286
TNM | macro F1: 0.2833 Accuracy: 0.5000
"""

Downloading (…)okenizer_config.json:   0%|          | 0.00/251 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/27.9k [00:00<?, ?B/s]



Downloading (…)lve/main/config.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.25G [00:00<?, ?B/s]

Some weights of the model checkpoint at cl-tohoku/bert-large-japanese-char-v2 were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at cl-tohoku/bert-large-japanese-char-v2 were not used when initia

BEST Epochs: [24, 11, 17, 13, 18]
macro F1: 0.7066 Accuracy: 0.7789
T | macro F1: 0.6673 Accuracy: 0.6939
N | macro F1: 0.5307 Accuracy: 0.7143
M | macro F1: 0.9217 Accuracy: 0.9286
TNM | macro F1: 0.2833 Accuracy: 0.5000
