# Lemon のモジュールを利用して、BERT-miniモデルを作成する

BERT mini model は 以下の設定
* epoch 5 までのモデル
* batch size 32,
* linearly decreasing learning rate from 3 ^ 10−5 with 50 warmup steps
* 16-bit precision optimization
* 1, 3, 5, 10, or 20 epochs depending on the dataset size
* The final model is the one from the epoch with the highest F1 score on the validation dataset.

In [1]:
dataset_out_root_dir = "../../data/lemon/datasets"
model_out_root_dir = "../../data/lemon/model/bert-mini"
dataset_names = [
    "structured_amazon_google",
    "structured_beer",
    "structured_dblp_acm",
    "structured_dblp_google_scholar",
    "structured_fodors_zagat",
    "structured_walmart_amazon",
    "structured_itunes_amazon",
    "dirty_dblp_acm",
    "dirty_dblp_google_scholar",
    "dirty_walmart_amazon",
    "dirty_itunes_amazon",
    "textual_abt_buy",
    "textual_company",
]
gpu_id = 5


In [2]:
# torchモジュールの読み込み前に、利用できるGPUを指定しておく
## これをやらないと、システム内のＧＰＵすべてを利用してしまう
import os

os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}"


In [3]:
import torch

print("CUDA =", torch.cuda.is_available())
print("CUDA DEVICES =", torch.cuda.device_count())
print("CUDA CURRENT DEVICE_ID = ", torch.cuda.current_device())


CUDA = True
CUDA DEVICES = 1
CUDA CURRENT DEVICE_ID =  0


In [4]:
# Set Random Seeds and Reproducibility
import random

import numpy as np


def set_seed(seed: int):
    """
    Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch``
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


set_seed(0)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True


In [5]:
import pickle
import pathlib

import lemon.utils.datasets.deepmatcher
from transformers import AutoModelForSequenceClassification
from transformers.trainer_callback import TrainerState


def get_best_model_checkpoint_dir(checkpoints_dir_path: pathlib.Path):
    last_checkpoints_dir = sorted(
        checkpoints_dir_path.iterdir(), key=lambda x: int(str(x).split("-")[-1])
    )[-1]
    state = TrainerState.load_from_json(
        f"{str(last_checkpoints_dir)}/trainer_state.json"
    )
    return pathlib.Path((state.best_model_checkpoint))


for dataset_name in dataset_names:
    print("=============================")
    print(dataset_name)
    print("=============================")
    load_dataset_func = getattr(lemon.utils.datasets.deepmatcher, dataset_name)
    dataset = load_dataset_func(dataset_out_root_dir)
    output_dir_path = (
        pathlib.Path(model_out_root_dir) / dataset_name
    )
    output_dir_path.mkdir(parents=True, exist_ok=True)

    matcher = lemon.utils.matchers.TransformerMatcher(
        "google/bert_uncased_L-4_H-256_A-4",
        tokenizer_args={"model_max_length": 256},
        training_args={
            "output_dir": str(output_dir_path / "checkpoints"),
            "logging_dir": str(output_dir_path / "logs"),
            "per_device_train_batch_size": 32,
            "learning_rate": 3e-5,
            "warmup_steps": 50,
            "fp16": True,
            "num_train_epochs": 20,
        },
    )
    print("training...")
    ret = matcher.fit(
        dataset.train.records.a,
        dataset.train.records.b,
        dataset.train.record_id_pairs,
        dataset.train.labels,
        dataset.val.record_id_pairs,
        dataset.val.labels,
    )
    display(ret)
    print("training...done")
    eval_result = matcher.evaluate(
        dataset.test.records.a,
        dataset.test.records.b,
        dataset.test.record_id_pairs,
        dataset.test.labels,
    )
    print(eval_result)
    with (output_dir_path / "eval_result.pickle").open(
        "wb"
    ) as f:
        pickle.dump(eval_result, f)
    print("reload test...")
    print(get_best_model_checkpoint_dir(output_dir_path / "checkpoints"))
    bert_mini_model_reload = AutoModelForSequenceClassification.from_pretrained(
        get_best_model_checkpoint_dir(output_dir_path / "checkpoints")
    )
    matcher_reload = lemon.utils.matchers.TransformerMatcher(bert_mini_model_reload)
    eval_result_reload = matcher_reload.evaluate(
        dataset.test.records.a,
        dataset.test.records.b,
        dataset.test.record_id_pairs,
        dataset.test.labels,
    )
    print(eval_result_reload)
    assert eval_result == eval_result_reload


structured_amazon_google


Some weights of the model checkpoint at google/bert_uncased_L-4_H-256_A-4 were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

training...


Using amp fp16 backend
***** Running training *****
  Num examples = 6874
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 4300


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.2179,0.205045,0.529563,0.664516,0.440171
2,0.1513,0.176664,0.630021,0.623431,0.636752
3,0.1593,0.165656,0.64557,0.6375,0.653846
4,0.2201,0.175556,0.685225,0.686695,0.683761
5,0.1566,0.185241,0.688797,0.669355,0.709402
6,0.1597,0.179734,0.71068,0.651246,0.782051
7,0.078,0.177274,0.696356,0.661538,0.735043
8,0.126,0.196142,0.695297,0.666667,0.726496
9,0.0885,0.205477,0.694184,0.618729,0.790598
10,0.1307,0.229534,0.695327,0.61794,0.794872


***** Running Evaluation *****
  Num examples = 2293
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-215
Configuration saved in ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-215/config.json
Model weights saved in ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-215/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-215/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-215/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 2293
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-430
Configuration saved in ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-430/conf

None

training...done


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 2293
  Batch size = 64


loading configuration file ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-1290/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-1290/pytorch_model.bin


{'precision': 0.6642335766423357, 'recall': 0.7777777777777778, 'f1': 0.7165354330708661}
reload test...
../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-1290


All model checkpoint weights were used when initializing BertForSequenceClassification.

All the weights of BertForSequenceClassification were initialized from the model checkpoint at ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-1290.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForSequenceClassification for predictions without further training.
Didn't find file ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-1290/added_tokens.json. We won't load it.
loading file ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-1290/vocab.txt
loading file ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-1290/tokenizer.json
loading file None
loading file ../../data/lemon/model/bert-mini/structured_amazon_google/checkpoints/checkpoint-1290/special_tokens_map.json
loading file ../../data/lemon/model/bert-mini/structu

{'precision': 0.6642335766423357, 'recall': 0.7777777777777778, 'f1': 0.7165354330708661}
structured_beer


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,No log,0.697697,0.27957,0.164557,0.928571
2,0.720800,0.588845,0.0,0.0,0.0
3,0.645700,0.476619,0.0,0.0,0.0
4,0.488300,0.438265,0.0,0.0,0.0
5,0.457900,0.426094,0.0,0.0,0.0
6,0.409800,0.41327,0.0,0.0,0.0
7,0.435900,0.353854,0.0,0.0,0.0
8,0.309600,0.304806,0.0,0.0,0.0
9,0.272200,0.27203,0.0,0.0,0.0
10,0.222800,0.246353,0.615385,0.666667,0.571429


***** Running Evaluation *****
  Num examples = 91
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-9
Configuration saved in ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-9/config.json
Model weights saved in ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-9/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-9/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-9/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 91
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-18
Configuration saved in ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-18/config.json
Model weights saved in ../../data/lemon/model/bert-mini/structured_beer

None

PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 91
  Batch size = 64


training...done


loading configuration file ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-153/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-153/pytorch_model.bin


{'precision': 0.5833333333333334, 'recall': 1.0, 'f1': 0.7368421052631579}
reload test...
../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-153


All model checkpoint weights were used when initializing BertForSequenceClassification.

All the weights of BertForSequenceClassification were initialized from the model checkpoint at ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-153.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForSequenceClassification for predictions without further training.
Didn't find file ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-153/added_tokens.json. We won't load it.
loading file ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-153/vocab.txt
loading file ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-153/tokenizer.json
loading file None
loading file ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-153/special_tokens_map.json
loading file ../../data/lemon/model/bert-mini/structured_beer/checkpoints/checkpoint-153/tokenizer_conf

{'precision': 0.5833333333333334, 'recall': 1.0, 'f1': 0.7368421052631579}
structured_dblp_acm


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


PyTorch: setting up devices
Using amp fp16 backend
***** Running training *****
  Num examples = 7417
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 4640


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.0919,0.065357,0.952596,0.954751,0.95045
2,0.0611,0.057279,0.960265,0.941558,0.97973
3,0.0448,0.042003,0.96875,0.960177,0.977477
4,0.0185,0.04831,0.971111,0.958333,0.984234
5,0.0164,0.047152,0.974359,0.96468,0.984234
6,0.02,0.032681,0.978771,0.971175,0.986486
7,0.0308,0.042779,0.975501,0.964758,0.986486
8,0.0011,0.044452,0.973154,0.966667,0.97973
9,0.0008,0.045661,0.975391,0.968889,0.981982
10,0.0008,0.043073,0.976484,0.971047,0.981982


***** Running Evaluation *****
  Num examples = 2473
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-232
Configuration saved in ../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-232/config.json
Model weights saved in ../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-232/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-232/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-232/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 2473
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-464
Configuration saved in ../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-464/config.json
Model weights saved in ../.

None

PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 2473
  Batch size = 64


training...done


loading configuration file ../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-3944/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-3944/pytorch_model.bin
All model checkpoint weig

{'precision': 0.9842342342342343, 'recall': 0.9842342342342343, 'f1': 0.9842342342342343}
reload test...
../../data/lemon/model/bert-mini/structured_dblp_acm/checkpoints/checkpoint-3944


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 2473
  Batch size = 64


{'precision': 0.9842342342342343, 'recall': 0.9842342342342343, 'f1': 0.9842342342342343}
structured_dblp_google_scholar


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


PyTorch: setting up devices
Using amp fp16 backend
***** Running training *****
  Num examples = 17223
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 10780


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.1197,0.097321,0.919596,0.948361,0.892523
2,0.034,0.072524,0.934121,0.90893,0.960748
3,0.0447,0.074255,0.939352,0.917186,0.962617
4,0.0275,0.069579,0.949248,0.954631,0.943925
5,0.035,0.075309,0.947658,0.931408,0.964486
6,0.0039,0.080249,0.948529,0.933092,0.964486
7,0.011,0.091971,0.947863,0.952786,0.942991
8,0.0188,0.093714,0.94722,0.955323,0.939252
9,0.0199,0.100588,0.946729,0.946729,0.946729
10,0.0243,0.113559,0.942453,0.951429,0.933645


***** Running Evaluation *****
  Num examples = 5742
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_dblp_google_scholar/checkpoints/checkpoint-539
Configuration saved in ../../data/lemon/model/bert-mini/structured_dblp_google_scholar/checkpoints/checkpoint-539/config.json
Model weights saved in ../../data/lemon/model/bert-mini/structured_dblp_google_scholar/checkpoints/checkpoint-539/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/structured_dblp_google_scholar/checkpoints/checkpoint-539/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/structured_dblp_google_scholar/checkpoints/checkpoint-539/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5742
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_dblp_google_scholar/checkpoints/checkpoint-1078
Configuration saved in ../../data/lemon/model/bert-mini/structured_dblp_goo

None

training...done


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 5742
  Batch size = 64


loading configuration file ../../data/lemon/model/bert-mini/structured_dblp_google_scholar/checkpoints/checkpoint-9163/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/structured_dblp_google_scholar/checkpoints/checkpoint-9163/pytorch_model.bin
All

{'precision': 0.9343664539653601, 'recall': 0.9579439252336449, 'f1': 0.9460083064143978}
reload test...
../../data/lemon/model/bert-mini/structured_dblp_google_scholar/checkpoints/checkpoint-9163


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 5742
  Batch size = 64


{'precision': 0.9343664539653601, 'recall': 0.9579439252336449, 'f1': 0.9460083064143978}
structured_fodors_zagat


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.7522,0.593497,0.0,0.0,0.0
2,0.4914,0.383444,0.0,0.0,0.0
3,0.3745,0.334352,0.0,0.0,0.0
4,0.2693,0.229094,0.0,0.0,0.0
5,0.1953,0.145318,0.93617,0.88,1.0
6,0.1333,0.085233,0.956522,0.916667,1.0
7,0.0706,0.075395,0.897959,0.814815,1.0
8,0.0431,0.04024,0.977778,0.956522,1.0
9,0.0386,0.076857,0.897959,0.814815,1.0
10,0.0267,0.033907,0.977778,0.956522,1.0


***** Running Evaluation *****
  Num examples = 190
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-18
Configuration saved in ../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-18/config.json
Model weights saved in ../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-18/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-18/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-18/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 190
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-36
Configuration saved in ../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-36/config.json
Model we

None

PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 189
  Batch size = 64


training...done


loading configuration file ../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-144/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-144/pytorch_model.bin
All model checkpoin

{'precision': 1.0, 'recall': 0.9545454545454546, 'f1': 0.9767441860465117}
reload test...
../../data/lemon/model/bert-mini/structured_fodors_zagat/checkpoints/checkpoint-144


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 189
  Batch size = 64


{'precision': 1.0, 'recall': 0.9545454545454546, 'f1': 0.9767441860465117}
structured_walmart_amazon


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


PyTorch: setting up devices
Using amp fp16 backend
***** Running training *****
  Num examples = 6144
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3840


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.1936,0.188023,0.657825,0.673913,0.642487
2,0.135,0.132691,0.740047,0.675214,0.818653
3,0.0782,0.12313,0.778667,0.802198,0.756477
4,0.0784,0.134115,0.782609,0.822857,0.746114
5,0.0782,0.15708,0.75737,0.673387,0.865285
6,0.0513,0.157639,0.780952,0.722467,0.849741
7,0.0722,0.157432,0.790361,0.738739,0.849741
8,0.0705,0.183607,0.760181,0.674699,0.870466
9,0.0438,0.169929,0.79803,0.760563,0.839378
10,0.0436,0.201277,0.773893,0.70339,0.860104


***** Running Evaluation *****
  Num examples = 2049
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-192
Configuration saved in ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-192/config.json
Model weights saved in ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-192/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-192/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-192/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 2049
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-384
Configuration saved in ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-3

None

PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 2049
  Batch size = 64


training...done


loading configuration file ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-2496/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-2496/pytorch_model.bin


{'precision': 0.8241758241758241, 'recall': 0.7772020725388601, 'f1': 0.8}
reload test...
../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-2496


All model checkpoint weights were used when initializing BertForSequenceClassification.

All the weights of BertForSequenceClassification were initialized from the model checkpoint at ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-2496.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForSequenceClassification for predictions without further training.
Didn't find file ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-2496/added_tokens.json. We won't load it.
loading file ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-2496/vocab.txt
loading file ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-2496/tokenizer.json
loading file None
loading file ../../data/lemon/model/bert-mini/structured_walmart_amazon/checkpoints/checkpoint-2496/special_tokens_map.json
loading file ../../data/lemon/model/bert-mini/st

{'precision': 0.8241758241758241, 'recall': 0.7772020725388601, 'f1': 0.8}
structured_itunes_amazon


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.7068,0.677378,0.688525,0.617647,0.777778
2,0.6489,0.582895,0.0,0.0,0.0
3,0.5555,0.540179,0.0,0.0,0.0
4,0.5436,0.49134,0.0,0.0,0.0
5,0.4861,0.400731,0.0,0.0,0.0
6,0.4717,0.329969,0.486486,0.9,0.333333
7,0.3096,0.287391,0.852459,0.764706,0.962963
8,0.264,0.238446,0.881356,0.8125,0.962963
9,0.2149,0.219264,0.881356,0.8125,0.962963
10,0.1819,0.19986,0.881356,0.8125,0.962963


***** Running Evaluation *****
  Num examples = 109
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-11
Configuration saved in ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-11/config.json
Model weights saved in ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-11/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-11/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-11/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 109
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-22
Configuration saved in ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-22/config.json
M

None

PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 109
  Batch size = 64


training...done


loading configuration file ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-88/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-88/pytorch_model.bin


{'precision': 0.8387096774193549, 'recall': 0.9629629629629629, 'f1': 0.896551724137931}
reload test...
../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-88


All model checkpoint weights were used when initializing BertForSequenceClassification.

All the weights of BertForSequenceClassification were initialized from the model checkpoint at ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-88.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForSequenceClassification for predictions without further training.
Didn't find file ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-88/added_tokens.json. We won't load it.
loading file ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-88/vocab.txt
loading file ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-88/tokenizer.json
loading file None
loading file ../../data/lemon/model/bert-mini/structured_itunes_amazon/checkpoints/checkpoint-88/special_tokens_map.json
loading file ../../data/lemon/model/bert-mini/structured_itunes

{'precision': 0.8387096774193549, 'recall': 0.9629629629629629, 'f1': 0.896551724137931}
dirty_dblp_acm


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


PyTorch: setting up devices
Using amp fp16 backend
***** Running training *****
  Num examples = 7417
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 4640


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.0833,0.062872,0.955257,0.948889,0.961712
2,0.0557,0.051785,0.964286,0.955752,0.972973
3,0.0584,0.054063,0.964758,0.943966,0.986486
4,0.0296,0.049818,0.968958,0.954148,0.984234
5,0.0036,0.058948,0.96567,0.949891,0.981982
6,0.0289,0.048474,0.970917,0.964444,0.977477
7,0.0199,0.045744,0.972129,0.962472,0.981982
8,0.0146,0.055521,0.967956,0.950108,0.986486
9,0.0021,0.056251,0.973274,0.962555,0.984234
10,0.0038,0.046247,0.977477,0.977477,0.977477


***** Running Evaluation *****
  Num examples = 2473
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-232
Configuration saved in ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-232/config.json
Model weights saved in ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-232/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-232/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-232/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 2473
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-464
Configuration saved in ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-464/config.json
Model weights saved in ../../data/lemon/model/bert-mini/dirty_

None

PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 2473
  Batch size = 64


training...done


loading configuration file ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-2320/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-2320/pytorch_model.bin
All model checkpoint weights were u

{'precision': 0.9710467706013363, 'recall': 0.9819819819819819, 'f1': 0.9764837625979844}
reload test...
../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-2320


Didn't find file ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-2320/added_tokens.json. We won't load it.
loading file ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-2320/vocab.txt
loading file ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-2320/tokenizer.json
loading file None
loading file ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-2320/special_tokens_map.json
loading file ../../data/lemon/model/bert-mini/dirty_dblp_acm/checkpoints/checkpoint-2320/tokenizer_config.json
PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 2473
  Batch size = 64


{'precision': 0.9710467706013363, 'recall': 0.9819819819819819, 'f1': 0.9764837625979844}
dirty_dblp_google_scholar


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


PyTorch: setting up devices
Using amp fp16 backend
***** Running training *****
  Num examples = 17223
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 10780


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.1039,0.103827,0.902812,0.956113,0.85514
2,0.0345,0.077261,0.935574,0.934701,0.936449
3,0.0451,0.082416,0.942029,0.913884,0.971963
4,0.0294,0.081727,0.941871,0.916078,0.969159
5,0.0283,0.076678,0.945455,0.943256,0.947664
6,0.0018,0.091246,0.946168,0.924242,0.969159
7,0.0172,0.088188,0.949883,0.952113,0.947664
8,0.016,0.090595,0.948196,0.938645,0.957944
9,0.0322,0.105887,0.943861,0.922391,0.966355
10,0.0115,0.103383,0.946977,0.942593,0.951402


***** Running Evaluation *****
  Num examples = 5742
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-539
Configuration saved in ../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-539/config.json
Model weights saved in ../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-539/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-539/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-539/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 5742
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-1078
Configuration saved in ../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-

None

training...done


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 5742
  Batch size = 64


loading configuration file ../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-7007/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-7007/pytorch_model.bin
All model che

{'precision': 0.9322493224932249, 'recall': 0.9644859813084112, 'f1': 0.9480937069361507}
reload test...
../../data/lemon/model/bert-mini/dirty_dblp_google_scholar/checkpoints/checkpoint-7007


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 5742
  Batch size = 64


{'precision': 0.9322493224932249, 'recall': 0.9644859813084112, 'f1': 0.9480937069361507}
dirty_walmart_amazon


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


PyTorch: setting up devices
Using amp fp16 backend
***** Running training *****
  Num examples = 6144
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3840


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.2041,0.189859,0.65445,0.661376,0.647668
2,0.1415,0.13316,0.759615,0.70852,0.818653
3,0.0891,0.132392,0.763441,0.793296,0.735751
4,0.0752,0.141147,0.77635,0.770408,0.782383
5,0.0934,0.135127,0.777778,0.758621,0.797927
6,0.0459,0.159461,0.772947,0.723982,0.829016
7,0.0666,0.178855,0.772182,0.71875,0.834197
8,0.0722,0.172283,0.768868,0.705628,0.84456
9,0.0406,0.187913,0.762353,0.698276,0.839378
10,0.0378,0.181708,0.778055,0.75,0.80829


***** Running Evaluation *****
  Num examples = 2049
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-192
Configuration saved in ../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-192/config.json
Model weights saved in ../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-192/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-192/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-192/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 2049
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-384
Configuration saved in ../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-384/config.json
Model weights saved 

None

training...done


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 2049
  Batch size = 64


loading configuration file ../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-2496/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-2496/pytorch_model.bin
All model checkpoint we

{'precision': 0.827027027027027, 'recall': 0.7927461139896373, 'f1': 0.8095238095238094}
reload test...
../../data/lemon/model/bert-mini/dirty_walmart_amazon/checkpoints/checkpoint-2496


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 2049
  Batch size = 64


{'precision': 0.827027027027027, 'recall': 0.7927461139896373, 'f1': 0.8095238095238094}
dirty_itunes_amazon


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.7068,0.67638,0.709677,0.628571,0.814815
2,0.6495,0.584827,0.0,0.0,0.0
3,0.558,0.542023,0.0,0.0,0.0
4,0.5441,0.491534,0.0,0.0,0.0
5,0.4814,0.403135,0.0,0.0,0.0
6,0.4662,0.330833,0.444444,0.888889,0.296296
7,0.3093,0.290032,0.852459,0.764706,0.962963
8,0.2668,0.238002,0.881356,0.8125,0.962963
9,0.2152,0.207472,0.881356,0.8125,0.962963
10,0.1755,0.199581,0.870968,0.771429,1.0


***** Running Evaluation *****
  Num examples = 109
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-11
Configuration saved in ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-11/config.json
Model weights saved in ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-11/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-11/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-11/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 109
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-22
Configuration saved in ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-22/config.json
Model weights saved in ../../data/le

None

PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 109
  Batch size = 64


training...done


loading configuration file ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-154/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-154/pytorch_model.bin


{'precision': 0.8125, 'recall': 0.9629629629629629, 'f1': 0.8813559322033898}
reload test...
../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-154


All model checkpoint weights were used when initializing BertForSequenceClassification.

All the weights of BertForSequenceClassification were initialized from the model checkpoint at ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-154.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForSequenceClassification for predictions without further training.
Didn't find file ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-154/added_tokens.json. We won't load it.
loading file ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-154/vocab.txt
loading file ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-154/tokenizer.json
loading file None
loading file ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/checkpoint-154/special_tokens_map.json
loading file ../../data/lemon/model/bert-mini/dirty_itunes_amazon/checkpoints/check

{'precision': 0.8125, 'recall': 0.9629629629629629, 'f1': 0.8813559322033898}
textual_abt_buy


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


PyTorch: setting up devices
Using amp fp16 backend
***** Running training *****
  Num examples = 5743
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3600


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.1459,0.167457,0.701176,0.680365,0.723301
2,0.1445,0.130802,0.799054,0.778802,0.820388
3,0.0861,0.131973,0.789744,0.836957,0.747573
4,0.048,0.114675,0.817955,0.841026,0.796117
5,0.0761,0.128118,0.815668,0.776316,0.859223
6,0.0878,0.154777,0.814815,0.895349,0.747573
7,0.0643,0.153357,0.816537,0.872928,0.76699
8,0.0092,0.129701,0.855037,0.865672,0.84466
9,0.0454,0.147034,0.823219,0.901734,0.757282
10,0.0486,0.141984,0.845361,0.901099,0.796117


***** Running Evaluation *****
  Num examples = 1916
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-180
Configuration saved in ../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-180/config.json
Model weights saved in ../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-180/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-180/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-180/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 1916
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-360
Configuration saved in ../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-360/config.json
Model weights saved in ../../data/lemon/model/bert-mini

None

PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 1916
  Batch size = 64


training...done


loading configuration file ../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-1980/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-1980/pytorch_model.bin
All model checkpoint weights were

{'precision': 0.8177570093457944, 'recall': 0.8495145631067961, 'f1': 0.8333333333333333}
reload test...
../../data/lemon/model/bert-mini/textual_abt_buy/checkpoints/checkpoint-1980


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 1916
  Batch size = 64


{'precision': 0.8177570093457944, 'recall': 0.8495145631067961, 'f1': 0.8333333333333333}
textual_company


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/google/bert_uncased_L-4_H-256_A-4/resolve/main/config.json from cache at /home/hironori/.cache/huggingface/transformers/d4bb65fac1f629b879ae238c27022568307f79a7aa4d55060745f828812b4dd0.2bbf3a8935447a5e3abcc0d62367c14e846bd3595ebb28a305e457ad1d24ff8f
Model config BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/google/bert_uncased_L-4_H-256_A

training...


PyTorch: setting up devices
Using amp fp16 backend
***** Running training *****
  Num examples = 67596
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 42260


Epoch,Training Loss,Validation Loss,F1,Precision,Recall
1,0.2291,0.204612,0.842067,0.871458,0.814594
2,0.1779,0.208816,0.84077,0.905138,0.78495
3,0.161,0.231617,0.840256,0.904878,0.784248
4,0.1202,0.248359,0.837884,0.904288,0.780565
5,0.1722,0.286417,0.842864,0.87785,0.81056
6,0.1,0.314467,0.836202,0.850381,0.822487
7,0.0911,0.320258,0.834587,0.863466,0.807578
8,0.0894,0.362015,0.824223,0.917043,0.748465
9,0.0395,0.388115,0.831748,0.88807,0.782143
10,0.052,0.393575,0.833623,0.870986,0.799333


***** Running Evaluation *****
  Num examples = 22533
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-2113
Configuration saved in ../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-2113/config.json
Model weights saved in ../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-2113/pytorch_model.bin
tokenizer config file saved in ../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-2113/tokenizer_config.json
Special tokens file saved in ../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-2113/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 22533
  Batch size = 64
Saving model checkpoint to ../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-4226
Configuration saved in ../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-4226/config.json
Model weights saved in ../../data/lemon/model/

None

training...done


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 22503
  Batch size = 64


loading configuration file ../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-10565/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file ../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-10565/pytorch_model.bin
All model checkpoint weights we

{'precision': 0.8327693677649154, 'recall': 0.8290780141843972, 'f1': 0.8309195912927589}
reload test...
../../data/lemon/model/bert-mini/textual_company/checkpoints/checkpoint-10565


PyTorch: setting up devices
Using amp fp16 backend
***** Running Prediction *****
  Num examples = 22503
  Batch size = 64


{'precision': 0.8327693677649154, 'recall': 0.8290780141843972, 'f1': 0.8309195912927589}
