# ディープラーニングによる自然言語処理: 評判分析モデルの実装

このノートブックでは、ディープラーニングを用いた自然言語処理のためのツールである[AllenNLP](https://allennlp.org/)を用いて、**畳み込みニューラルネットワーク（CNN）**を用いた**評判分析**のシステムを作成します。

## 環境のセットアップ

In [1]:
# AllenNLPをインストール
!pip install allennlp==2.9.3 google-cloud-storage==2.1.0 cached-path==1.1.2
# fugashiをUniDicの依存ライブラリを含めてインストール
!pip install fugashi[unidic]
# UniDicの辞書ファイルをダウンロード
!python -m unidic download

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting allennlp==2.9.3
  Downloading allennlp-2.9.3-py3-none-any.whl (719 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m719.6/719.6 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-cloud-storage==2.1.0
  Downloading google_cloud_storage-2.1.0-py2.py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.6/106.6 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cached-path==1.1.2
  Downloading cached_path-1.1.2-py3-none-any.whl (26 kB)
Collecting huggingface-hub>=0.0.16
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dill
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB

## データセットのセットアップ

In [2]:
# データセットの出力ディレクトリを作成
!mkdir -p data/amazon_reviews
# データセットをダウンロード
!wget -q -O data/amazon_reviews/amazon_reviews_multilingual_JP_v1_00.tsv.gz "https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_multilingual_JP_v1_00.tsv.gz"
# データセットを解凍し、data/amazon_reviewsに展開
!gunzip data/amazon_reviews/amazon_reviews_multilingual_JP_v1_00.tsv.gz

In [3]:
import csv
import json
import os
import random
import warnings
from bs4 import BeautifulSoup

# csvライブラリのフィールドの最大サイズを変更
csv.field_size_limit(1000000)
# BeautifulSoupの出力する警告を抑制
warnings.filterwarnings("ignore", category=UserWarning, module="bs4")

# データセットをファイルから読み込む
data = []
with open("data/amazon_reviews/amazon_reviews_multilingual_JP_v1_00.tsv") as f:
    reader = csv.reader(f, delimiter="\t")
    # 1行目はヘッダなので無視する
    next(reader)
    for r in reader:
        # レビューのテキストを取得
        review_body = r[13]
        # レビューのテキストからHTMLタグを除去
        review_body = BeautifulSoup(review_body, "html.parser").get_text()
        # 評価の値を取得
        ratings = int(r[7])
        # 評価が2以下の場合に否定的、4以上の場合に肯定的と扱う
        if ratings <= 2:
            data.append(dict(text=review_body, label="negative"))
        elif ratings >= 4:
            data.append(dict(text=review_body, label="positive"))

# データセットから50,000件をランダムに抽出する
random.seed(1)
random.shuffle(data)
data = data[:50000]

# データセットの80%を訓練データ、10%を検証データ、10%をテストデータとして用いる
split_data = {}
eval_size = int(len(data) * 0.1)
split_data["test"] = data[:eval_size]
split_data["validation"] = data[eval_size:eval_size * 2]
split_data["train"] = data[eval_size * 2:]

# 行区切りJSON形式でデータセットを書き込む
for fold in ("train", "validation", "test"):
    out_file = os.path.join("data/amazon_reviews", "amazon_reviews_{}.jsonl".format(fold))
    with open(out_file, mode="w") as f:
        for item in split_data[fold]:
            json.dump(item, f, ensure_ascii=False)
            f.write("\n")

  review_body = BeautifulSoup(review_body, "html.parser").get_text()
  review_body = BeautifulSoup(review_body, "html.parser").get_text()


In [4]:
!ls data/amazon_reviews/*.jsonl

data/amazon_reviews/amazon_reviews_test.jsonl
data/amazon_reviews/amazon_reviews_train.jsonl
data/amazon_reviews/amazon_reviews_validation.jsonl


## モデルの実装

In [5]:
model_config = """{
    "random_seed": 1,
    "pytorch_seed": 1,
    "train_data_path": "data/amazon_reviews/amazon_reviews_train.jsonl",
    "validation_data_path": "data/amazon_reviews/amazon_reviews_validation.jsonl",
    "dataset_reader": {
        "type": "text_classification_json",
        "tokenizer": {
            "type": "mecab"
        },
        "token_indexers": {
            "tokens": {
                "type": "single_id"
            }
        }
    },
    "data_loader": {
        "batch_size": 32,
        "shuffle": true
    },
    "validation_data_loader": {
        "batch_size": 32,
        "shuffle": false
    },
    "vocabulary": {},
    "datasets_for_vocab_creation": ["train"],
    "model": {
        "type": "basic_classifier",
        "text_field_embedder": {
            "token_embedders": {
                "tokens": {
                    "type": "embedding",
                    "embedding_dim": 100
                }
            }
        },
        "seq2vec_encoder": {
           "type": "cnn",
           "embedding_dim": 100,
           "ngram_filter_sizes": [2],
           "num_filters": 64,
           "conv_layer_activation": "relu"
        }
    },
    "trainer": {
        "cuda_device": 0,
        "optimizer": {
            "type": "adam"
        },
        "num_epochs": 10,
        "patience": 3,
        "callbacks": [
            {
                "type": "tensorboard"
            }
        ]
    }
}"""
with open("amazon_reviews.jsonnet", "w") as f:
  f.write(model_config)

## モデルの訓練

In [6]:
mecab_tokenizer_code = '''from allennlp.data.tokenizers.token_class import Token
from allennlp.data.tokenizers.tokenizer import Tokenizer
from fugashi import Tagger

@Tokenizer.register("mecab")
class MecabTokenizer(Tokenizer):
    def __init__(self):
        # Taggerインスタンスを作成
        self._tagger = Tagger()

    def tokenize(self, text):
        """入力テキストをMeCabを用いて解析する"""
        tokens = []
        # 入力テキストを単語に分割
        for word in self._tagger(text):
            # 単語のテキスト（word.surface）と品詞（word.feature.pos1)からTokenインスタンスを作成
            token = Token(text=word.surface, pos_=word.feature.pos1)
            tokens.append(token)

        return tokens
'''
with open("mecab_tokenizer.py", "w") as f:
  f.write(mecab_tokenizer_code)

In [7]:
!allennlp train --serialization-dir exp_amazon_reviews --include-package mecab_tokenizer amazon_reviews.jsonnet

2023-05-06 13:29:28.638335: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-06 13:29:37,947 - INFO - allennlp.common.params - evaluation = None
2023-05-06 13:29:37,947 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 13:29:37,947 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:29:37,948 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:29:37,948 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:29:37,949 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:29:37,949 - INFO - allennlp.common.params - type = default
2023-05-06 13:29:37,950 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:29:37,950 - INFO - alle

## 性能の評価

In [8]:
!allennlp evaluate --include-package mecab_tokenizer exp_amazon_reviews/model.tar.gz data/amazon_reviews/amazon_reviews_test.jsonl

2023-05-06 13:32:21.488361: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-06 13:32:26,870 - INFO - allennlp.models.archival - loading archive file exp_amazon_reviews/model.tar.gz
2023-05-06 13:32:26,870 - INFO - allennlp.models.archival - extracting archive file exp_amazon_reviews/model.tar.gz to temp dir /tmp/tmp9fgyq33p
2023-05-06 13:32:27,336 - INFO - allennlp.data.vocabulary - Loading token dictionary from /tmp/tmp9fgyq33p/vocabulary.
2023-05-06 13:32:27,655 - INFO - allennlp.models.archival - removing temporary unarchived model dir at /tmp/tmp9fgyq33p
2023-05-06 13:32:27,666 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:32:27,939 - INFO - allennlp.commands.evaluate - Reading evaluation data from amazon_revie

## 学習したモデルを使う

In [9]:
input_json = """{"sentence":"この本は、役に立つし、面白い。"}
{"sentence":"この本は、役に立たないし、面白くない。"}
{"sentence":"この本は、役に立たないけど、面白い。"}
"""
with open("sentiment_analysis_input.json", "w") as f:
  f.write(input_json)

In [10]:
!allennlp predict --include-package mecab_tokenizer exp_amazon_reviews/model.tar.gz sentiment_analysis_input.json

2023-05-06 13:32:47.401271: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-06 13:32:53,083 - INFO - allennlp.models.archival - loading archive file exp_amazon_reviews/model.tar.gz
2023-05-06 13:32:53,083 - INFO - allennlp.models.archival - extracting archive file exp_amazon_reviews/model.tar.gz to temp dir /tmp/tmprgqp0m3q
2023-05-06 13:32:53,437 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:32:53,438 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:32:53,438 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:32:53,438 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 1

### ハイパーパラメータ探索

In [11]:
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-3.1.1-py3-none-any.whl (365 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.7/365.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cmaes>=0.9.1
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting alembic>=1.5.0
  Downloading alembic-1.10.4-py3-none-any.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.9/212.9 kB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Mako
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, cmaes, alembic, optuna
Successfully installed Mako-1.2.4 alembic-1.10.4 cmaes-0.9.1 colorlog-6.7.0 optuna-3.

In [12]:
optuna_config = """local embedding_dim = std.parseInt(std.extVar("embedding_dim"));
local num_filters = std.parseInt(std.extVar("num_filters"));
local max_filter_size = std.parseInt(std.extVar("max_filter_size"));
local ngram_filter_sizes = std.range(2, max_filter_size);
{
    "random_seed": 1,
    "pytorch_seed": 1,
    "train_data_path": "data/amazon_reviews/amazon_reviews_train.jsonl",
    "validation_data_path": "data/amazon_reviews/amazon_reviews_validation.jsonl",
    "dataset_reader": {
        "type": "text_classification_json",
        "tokenizer": {
            "type": "mecab"
        },
        "token_indexers": {
            "tokens": {
                "type": "single_id"
            }
        }
    },
    "data_loader": {
        "batch_size": 32,
        "shuffle": true
    },
    "validation_data_loader": {
        "batch_size": 32,
        "shuffle": false
    },
    "vocabulary": {},
    "datasets_for_vocab_creation": ["train"],
    "model": {
        "type": "basic_classifier",
        "text_field_embedder": {
            "token_embedders": {
                "tokens": {
                    "type": "embedding",
                    "embedding_dim": embedding_dim
                }
            }
        },
        "seq2vec_encoder": {
           "type": "cnn",
           "embedding_dim": embedding_dim,
           "ngram_filter_sizes": ngram_filter_sizes,
           "num_filters": num_filters,
           "conv_layer_activation": "relu"
        }
    },
    "trainer": {
        "cuda_device": 0,
        "optimizer": {
            "type": "adam"
        },
        "num_epochs": 10,
        "patience": 3
    }
}
"""
with open("amazon_reviews_optuna.jsonnet", "w") as f:
  f.write(optuna_config)

In [13]:
import optuna
from optuna.samplers import TPESampler
from optuna.integration.allennlp import AllenNLPExecutor, dump_best_config

def objective(trial):
    """Optunaの目的関数"""
    # 探索するハイパーパラメータと値の範囲を定義
    trial.suggest_int("embedding_dim", 50, 200)
    trial.suggest_int("max_filter_size", 2, 5)
    trial.suggest_int("num_filters", 32, 256)

    # AllenNLPExecutorを作成し訓練と評価を行う
    serialization_dir = "exp_amazon_reviews_optuna/trials/" + str(trial.number)
    executor = AllenNLPExecutor(
        trial=trial,
        metrics="best_validation_accuracy",
        serialization_dir=serialization_dir,
        config_file="amazon_reviews_optuna.jsonnet",
        include_package="mecab_tokenizer"
    )
    return executor.run()

In [14]:
!mkdir -p exp_amazon_reviews_optuna

In [15]:
# ハイパーパラメータ探索を開始
study = optuna.create_study(direction="maximize", sampler=TPESampler(seed=1))
study.optimize(objective, n_trials=100)

# 最も性能の良かったハイパーパラメータを用いて設定ファイルを作成
dump_best_config("amazon_reviews_optuna.jsonnet", "amazon_reviews_best.json", study)

[32m[I 2023-05-06 13:33:11,658][0m A new study created in memory with name: no-name-763c30fc-732b-448c-ac7a-30cf3735bae2[0m


2023-05-06 13:33:11,724 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:33:11,729 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:33:11,730 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:33:11,735 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:33:11,737 - INFO - allennlp.common.params - type = default
2023-05-06 13:33:11,740 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:33:11,743 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:33:11,745 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:33:11,747 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 13:33:11,750 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = single_id
2023-05-06 13:33:11,751 - INFO - allennlp.common.params - dataset_reader.token_index

  executor = AllenNLPExecutor(
loading instances: 40000it [01:22, 482.05it/s]

2023-05-06 13:34:34,778 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 13:34:34,782 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32





2023-05-06 13:34:34,784 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 13:34:34,786 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 13:34:34,792 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 13:34:34,795 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 13:34:34,796 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 13:34:34,799 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 13:34:34,800 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 13:34:34,803 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 13:34:34,806 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 13:34:34,807 - INFO - allennlp.common.params - validation_data_loader.collate_fn

loading instances: 5000it [00:12, 411.67it/s]

2023-05-06 13:34:46,958 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 13:34:46,961 - INFO - allennlp.common.params - type = from_instances
2023-05-06 13:34:46,964 - INFO - allennlp.common.params - min_count = None
2023-05-06 13:34:46,966 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 13:34:46,968 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 13:34:46,969 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 13:34:46,971 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 13:34:46,972 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 13:34:46,973 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 13:34:46,974 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 13:34:46,975 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 13


building vocab: 40000it [00:05, 7687.93it/s] 


2023-05-06 13:34:52,336 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 13:34:52,338 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 13:34:52,342 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 13:34:52,344 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 13:34:52,346 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 112
2023-05-06 13:34:52,348 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 13:34:52,351 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 13:34:52,354 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 13:34:52,356 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 13:35:00,028 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:35:00,031 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:35:00,039 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9072, batch_loss: 0.0923, loss: 0.2526 ||: 100%|##########| 1250/1250 [00:13<00:00, 90.32it/s]

2023-05-06 13:35:13,770 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 13:35:13,782 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:35:13,783 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:35:13,788 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9270, batch_loss: 0.0526, loss: 0.1941 ||: 100%|##########| 157/157 [00:00<00:00, 276.02it/s]

2023-05-06 13:35:14,346 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:35:14,348 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.927
2023-05-06 13:35:14,351 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |    42.228  |       N/A
2023-05-06 13:35:14,352 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.253  |     0.194
2023-05-06 13:35:14,353 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5666.230  |       N/A





2023-05-06 13:35:14,811 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.893665
2023-05-06 13:35:14,816 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:09
2023-05-06 13:35:14,819 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 13:35:14,822 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 5.6G
2023-05-06 13:35:14,824 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 683M
2023-05-06 13:35:14,826 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9436, batch_loss: 0.2636, loss: 0.1443 ||: 100%|##########| 1250/1250 [00:13<00:00, 90.86it/s]

2023-05-06 13:35:28,589 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9278, batch_loss: 0.0106, loss: 0.1848 ||: 100%|##########| 157/157 [00:00<00:00, 276.32it/s]

2023-05-06 13:35:29,165 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:35:29,166 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.944  |     0.928
2023-05-06 13:35:29,171 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   683.172  |       N/A
2023-05-06 13:35:29,172 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.144  |     0.185
2023-05-06 13:35:29,176 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5721.398  |       N/A





2023-05-06 13:35:29,621 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.801857
2023-05-06 13:35:29,629 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:56
2023-05-06 13:35:29,635 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 13:35:29,641 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 5.6G
2023-05-06 13:35:29,645 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 683M
2023-05-06 13:35:29,650 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9747, batch_loss: 0.0021, loss: 0.0720 ||: 100%|##########| 1250/1250 [00:13<00:00, 90.68it/s]

2023-05-06 13:35:43,443 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9232, batch_loss: 0.0033, loss: 0.2294 ||: 100%|##########| 157/157 [00:00<00:00, 281.06it/s]

2023-05-06 13:35:44,008 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:35:44,010 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.975  |     0.923
2023-05-06 13:35:44,012 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   683.172  |       N/A
2023-05-06 13:35:44,014 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.072  |     0.229
2023-05-06 13:35:44,016 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5722.652  |       N/A





2023-05-06 13:35:44,500 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.865050
2023-05-06 13:35:44,501 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:42
2023-05-06 13:35:44,503 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 13:35:44,506 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 5.6G
2023-05-06 13:35:44,512 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 683M
2023-05-06 13:35:44,514 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9929, batch_loss: 0.1215, loss: 0.0245 ||: 100%|##########| 1250/1250 [00:13<00:00, 92.15it/s]

2023-05-06 13:35:58,084 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9226, batch_loss: 0.0003, loss: 0.2730 ||: 100%|##########| 157/157 [00:00<00:00, 204.52it/s]

2023-05-06 13:35:58,859 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:35:58,862 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.993  |     0.923
2023-05-06 13:35:58,864 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   683.172  |       N/A
2023-05-06 13:35:58,866 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.025  |     0.273
2023-05-06 13:35:58,868 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5722.887  |       N/A





2023-05-06 13:35:59,379 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.876215
2023-05-06 13:35:59,382 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:28
2023-05-06 13:35:59,384 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 13:35:59,386 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 5.6G
2023-05-06 13:35:59,388 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 683M
2023-05-06 13:35:59,390 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9988, batch_loss: 0.0047, loss: 0.0064 ||: 100%|##########| 1250/1250 [00:13<00:00, 93.32it/s]

2023-05-06 13:36:12,791 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9186, batch_loss: 0.0001, loss: 0.3258 ||: 100%|##########| 157/157 [00:00<00:00, 198.59it/s]

2023-05-06 13:36:13,591 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:36:13,594 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.919
2023-05-06 13:36:13,596 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   683.172  |       N/A
2023-05-06 13:36:13,598 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.006  |     0.326
2023-05-06 13:36:13,599 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5722.887  |       N/A





2023-05-06 13:36:14,167 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.782745
2023-05-06 13:36:14,169 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 13:36:14,223 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 5722.88671875,
  "peak_gpu_0_memory_MB": 683.171875,
  "training_duration": "0:01:13.673324",
  "epoch": 4,
  "training_accuracy": 0.99875,
  "training_loss": 0.0064172021535690875,
  "training_worker_0_memory_MB": 5722.88671875,
  "training_gpu_0_memory_MB": 683.171875,
  "validation_accuracy": 0.9186,
  "validation_loss": 0.3258156739103487,
  "best_validation_accuracy": 0.9278,
  "best_validation_loss": 0.18483600880547313
}
2023-05-06 13:36:14,225 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/0/model.tar.gz


[32m[I 2023-05-06 13:36:17,083][0m Trial 0 finished with value: 0.9278 and parameters: {'embedding_dim': 112, 'max_filter_size': 4, 'num_filters': 32}. Best is trial 0 with value: 0.9278.[0m


2023-05-06 13:36:17,130 - INFO - allennlp.common.params - evaluation = None
2023-05-06 13:36:17,131 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 13:36:17,135 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:36:17,138 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:36:17,141 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:36:17,144 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:36:17,146 - INFO - allennlp.common.params - type = default
2023-05-06 13:36:17,150 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:36:17,156 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:36:17,157 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:36:17,160 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 13:36:17,161 - INFO - all

loading instances: 40000it [01:21, 492.27it/s]

2023-05-06 13:37:38,461 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 13:37:38,463 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 13:37:38,467 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 13:37:38,470 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 13:37:38,472 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 13:37:38,473 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 13:37:38,474 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 13:37:38,476 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 13:37:38,480 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 13:37:38,481 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 656.22it/s]

2023-05-06 13:37:46,106 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 13:37:46,110 - INFO - allennlp.common.params - type = from_instances
2023-05-06 13:37:46,114 - INFO - allennlp.common.params - min_count = None
2023-05-06 13:37:46,115 - INFO - allennlp.common.params - max_vocab_size = None





2023-05-06 13:37:46,117 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 13:37:46,119 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 13:37:46,121 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 13:37:46,123 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 13:37:46,124 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 13:37:46,129 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 13:37:46,130 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 13:37:46,135 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:03, 12383.65it/s]


2023-05-06 13:37:49,506 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 13:37:49,508 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 13:37:49,511 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 13:37:49,515 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 13:37:49,516 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 95
2023-05-06 13:37:49,518 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 13:37:49,520 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 13:37:49,522 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 13:37:49,527 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 13:37:55,924 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:37:55,926 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:37:55,934 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.8990, batch_loss: 0.1734, loss: 0.2758 ||: 100%|##########| 1250/1250 [00:09<00:00, 130.83it/s]

2023-05-06 13:38:05,345 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 13:38:05,356 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:38:05,360 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:38:05,365 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9136, batch_loss: 0.0924, loss: 0.2113 ||: 100%|##########| 157/157 [00:00<00:00, 340.53it/s]

2023-05-06 13:38:05,813 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:38:05,816 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.899  |     0.914
2023-05-06 13:38:05,819 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   683.172  |       N/A
2023-05-06 13:38:05,821 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.276  |     0.211
2023-05-06 13:38:05,822 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  6340.574  |       N/A





2023-05-06 13:38:06,152 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:10.372854
2023-05-06 13:38:06,162 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:30
2023-05-06 13:38:06,169 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 13:38:06,174 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 6.2G
2023-05-06 13:38:06,175 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 492M
2023-05-06 13:38:06,181 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9280, batch_loss: 0.2676, loss: 0.1817 ||: 100%|##########| 1250/1250 [00:09<00:00, 126.02it/s]

2023-05-06 13:38:16,104 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0335, loss: 0.1920 ||: 100%|##########| 157/157 [00:00<00:00, 343.10it/s]

2023-05-06 13:38:16,570 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:38:16,572 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.928  |     0.925
2023-05-06 13:38:16,576 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   491.745  |       N/A
2023-05-06 13:38:16,578 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.182  |     0.192
2023-05-06 13:38:16,580 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  6376.473  |       N/A





2023-05-06 13:38:16,945 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:10.776748
2023-05-06 13:38:16,954 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:23
2023-05-06 13:38:16,963 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 13:38:16,966 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 6.2G
2023-05-06 13:38:16,968 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 492M
2023-05-06 13:38:16,969 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9506, batch_loss: 0.0101, loss: 0.1277 ||: 100%|##########| 1250/1250 [00:09<00:00, 133.29it/s]

2023-05-06 13:38:26,353 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9214, batch_loss: 0.0374, loss: 0.2173 ||: 100%|##########| 157/157 [00:00<00:00, 267.12it/s]

2023-05-06 13:38:26,950 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:38:26,952 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.921
2023-05-06 13:38:26,954 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   491.869  |       N/A
2023-05-06 13:38:26,955 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.128  |     0.217
2023-05-06 13:38:26,957 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  6376.473  |       N/A





2023-05-06 13:38:27,484 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:10.520929
2023-05-06 13:38:27,486 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:12
2023-05-06 13:38:27,488 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 13:38:27,490 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 6.2G
2023-05-06 13:38:27,492 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 492M
2023-05-06 13:38:27,494 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9705, batch_loss: 0.1337, loss: 0.0802 ||: 100%|##########| 1250/1250 [00:09<00:00, 125.28it/s]

2023-05-06 13:38:37,477 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9230, batch_loss: 0.0175, loss: 0.2300 ||: 100%|##########| 157/157 [00:00<00:00, 340.46it/s]

2023-05-06 13:38:37,946 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:38:37,948 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.971  |     0.923
2023-05-06 13:38:37,951 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   491.685  |       N/A
2023-05-06 13:38:37,954 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.080  |     0.230
2023-05-06 13:38:37,955 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  6376.473  |       N/A





2023-05-06 13:38:38,322 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:10.833699
2023-05-06 13:38:38,324 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:03
2023-05-06 13:38:38,326 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 13:38:38,331 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 6.2G
2023-05-06 13:38:38,334 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 492M
2023-05-06 13:38:38,336 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9856, batch_loss: 0.1151, loss: 0.0438 ||: 100%|##########| 1250/1250 [00:09<00:00, 125.98it/s]

2023-05-06 13:38:48,264 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9154, batch_loss: 0.0045, loss: 0.2639 ||: 100%|##########| 157/157 [00:00<00:00, 250.27it/s]

2023-05-06 13:38:48,900 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:38:48,903 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.986  |     0.915
2023-05-06 13:38:48,906 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   491.854  |       N/A
2023-05-06 13:38:48,908 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.044  |     0.264
2023-05-06 13:38:48,909 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  6376.473  |       N/A





2023-05-06 13:38:49,415 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:11.088727
2023-05-06 13:38:49,418 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 13:38:49,467 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 6376.47265625,
  "peak_gpu_0_memory_MB": 683.171875,
  "training_duration": "0:00:53.120870",
  "epoch": 4,
  "training_accuracy": 0.98555,
  "training_loss": 0.04379949700366706,
  "training_worker_0_memory_MB": 6376.47265625,
  "training_gpu_0_memory_MB": 491.8544921875,
  "validation_accuracy": 0.9154,
  "validation_loss": 0.26394719112913606,
  "best_validation_accuracy": 0.925,
  "best_validation_loss": 0.19200433420527513
}
2023-05-06 13:38:49,470 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/1/model.tar.gz


[32m[I 2023-05-06 13:38:51,582][0m Trial 1 finished with value: 0.925 and parameters: {'embedding_dim': 95, 'max_filter_size': 2, 'num_filters': 52}. Best is trial 0 with value: 0.9278.[0m


2023-05-06 13:38:51,653 - INFO - allennlp.common.params - evaluation = None
2023-05-06 13:38:51,656 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 13:38:51,657 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:38:51,662 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:38:51,664 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:38:51,665 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:38:51,667 - INFO - allennlp.common.params - type = default
2023-05-06 13:38:51,669 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:38:51,670 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:38:51,671 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:38:51,672 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 13:38:51,676 - INFO - all

loading instances: 40000it [01:25, 465.59it/s]

2023-05-06 13:40:17,633 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 13:40:17,635 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 13:40:17,638 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 13:40:17,641 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 13:40:17,643 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 13:40:17,645 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 13:40:17,647 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 13:40:17,653 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 13:40:17,654 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 13:40:17,655 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:12, 397.70it/s]

2023-05-06 13:40:30,233 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 13:40:30,236 - INFO - allennlp.common.params - type = from_instances
2023-05-06 13:40:30,238 - INFO - allennlp.common.params - min_count = None
2023-05-06 13:40:30,240 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 13:40:30,242 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 13:40:30,243 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 13:40:30,245 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 13:40:30,246 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 13:40:30,247 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 13:40:30,248 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 13:40:30,249 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 13


building vocab: 40000it [00:04, 9548.42it/s] 


2023-05-06 13:40:34,593 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 13:40:34,600 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 13:40:34,602 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 13:40:34,604 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 13:40:34,606 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 78
2023-05-06 13:40:34,607 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 13:40:34,608 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 13:40:34,610 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 13:40:34,611 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 13:40:39,200 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:40:39,203 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:40:39,210 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9044, batch_loss: 0.0835, loss: 0.2567 ||: 100%|##########| 1250/1250 [00:13<00:00, 95.31it/s] 

2023-05-06 13:40:52,227 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 13:40:52,240 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:40:52,241 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:40:52,246 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9236, batch_loss: 0.0356, loss: 0.1944 ||: 100%|##########| 157/157 [00:00<00:00, 242.37it/s]

2023-05-06 13:40:52,883 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:40:52,886 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.904  |     0.924
2023-05-06 13:40:52,887 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   491.922  |       N/A
2023-05-06 13:40:52,888 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.257  |     0.194
2023-05-06 13:40:52,889 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  6977.504  |       N/A





2023-05-06 13:40:53,131 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.030405
2023-05-06 13:40:53,132 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:04
2023-05-06 13:40:53,138 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 13:40:53,141 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 6.8G
2023-05-06 13:40:53,150 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 760M
2023-05-06 13:40:53,151 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9406, batch_loss: 0.2321, loss: 0.1516 ||: 100%|##########| 1250/1250 [00:13<00:00, 95.36it/s]

2023-05-06 13:41:06,269 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9276, batch_loss: 0.0124, loss: 0.1846 ||: 100%|##########| 157/157 [00:00<00:00, 244.19it/s]

2023-05-06 13:41:06,920 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:41:06,922 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.941  |     0.928
2023-05-06 13:41:06,924 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   760.232  |       N/A
2023-05-06 13:41:06,926 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.152  |     0.185
2023-05-06 13:41:06,929 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  7007.406  |       N/A





2023-05-06 13:41:07,188 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.050424
2023-05-06 13:41:07,189 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:51
2023-05-06 13:41:07,191 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 13:41:07,195 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 6.8G
2023-05-06 13:41:07,199 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 760M
2023-05-06 13:41:07,201 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9718, batch_loss: 0.0073, loss: 0.0788 ||: 100%|##########| 1250/1250 [00:12<00:00, 96.83it/s]

2023-05-06 13:41:20,119 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9284, batch_loss: 0.0049, loss: 0.2110 ||: 100%|##########| 157/157 [00:00<00:00, 181.17it/s]

2023-05-06 13:41:20,995 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:41:20,997 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.972  |     0.928
2023-05-06 13:41:21,003 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   760.356  |       N/A
2023-05-06 13:41:21,005 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.079  |     0.211
2023-05-06 13:41:21,006 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  7007.406  |       N/A





2023-05-06 13:41:21,323 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.131525
2023-05-06 13:41:21,325 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:37
2023-05-06 13:41:21,333 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 13:41:21,337 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 6.8G
2023-05-06 13:41:21,339 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 760M
2023-05-06 13:41:21,341 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9924, batch_loss: 0.0522, loss: 0.0262 ||: 100%|##########| 1250/1250 [00:12<00:00, 98.66it/s]

2023-05-06 13:41:34,019 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0015, loss: 0.2534 ||: 100%|##########| 157/157 [00:00<00:00, 203.31it/s]

2023-05-06 13:41:34,801 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:41:34,805 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.992  |     0.923
2023-05-06 13:41:34,807 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   760.172  |       N/A
2023-05-06 13:41:34,809 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.026  |     0.253
2023-05-06 13:41:34,810 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  7007.406  |       N/A





2023-05-06 13:41:35,109 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:13.775789
2023-05-06 13:41:35,111 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:23
2023-05-06 13:41:35,113 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 13:41:35,117 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 6.8G
2023-05-06 13:41:35,118 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 760M
2023-05-06 13:41:35,120 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9990, batch_loss: 0.0022, loss: 0.0061 ||: 100%|##########| 1250/1250 [00:12<00:00, 96.43it/s]

2023-05-06 13:41:48,088 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9194, batch_loss: 0.0038, loss: 0.3049 ||: 100%|##########| 157/157 [00:00<00:00, 248.01it/s]

2023-05-06 13:41:48,733 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:41:48,734 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.919
2023-05-06 13:41:48,737 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   760.342  |       N/A
2023-05-06 13:41:48,739 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.006  |     0.305
2023-05-06 13:41:48,741 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  7007.406  |       N/A





2023-05-06 13:41:48,962 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:13.848706
2023-05-06 13:41:48,964 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 13:41:48,992 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 7007.40625,
  "peak_gpu_0_memory_MB": 760.3564453125,
  "training_duration": "0:01:09.632418",
  "epoch": 4,
  "training_accuracy": 0.999025,
  "training_loss": 0.006137777547576115,
  "training_worker_0_memory_MB": 7007.40625,
  "training_gpu_0_memory_MB": 760.341796875,
  "validation_accuracy": 0.9194,
  "validation_loss": 0.30486786338963373,
  "best_validation_accuracy": 0.9276,
  "best_validation_loss": 0.18462004832282758
}
2023-05-06 13:41:48,995 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/2/model.tar.gz


[32m[I 2023-05-06 13:41:50,825][0m Trial 2 finished with value: 0.9276 and parameters: {'embedding_dim': 78, 'max_filter_size': 3, 'num_filters': 121}. Best is trial 0 with value: 0.9278.[0m


2023-05-06 13:41:50,916 - INFO - allennlp.common.params - evaluation = None
2023-05-06 13:41:50,920 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 13:41:50,924 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:41:50,930 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:41:50,931 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:41:50,933 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:41:50,935 - INFO - allennlp.common.params - type = default
2023-05-06 13:41:50,936 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:41:50,939 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:41:50,945 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:41:50,946 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 13:41:50,947 - INFO - all

loading instances: 40000it [01:21, 493.26it/s]

2023-05-06 13:43:12,100 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 13:43:12,105 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 13:43:12,106 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 13:43:12,109 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 13:43:12,113 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 13:43:12,115 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 13:43:12,117 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 13:43:12,119 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 13:43:12,121 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 13:43:12,124 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:12, 387.06it/s]

2023-05-06 13:43:25,053 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 13:43:25,056 - INFO - allennlp.common.params - type = from_instances
2023-05-06 13:43:25,059 - INFO - allennlp.common.params - min_count = None
2023-05-06 13:43:25,061 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 13:43:25,062 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 13:43:25,063 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 13:43:25,064 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 13:43:25,065 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 13:43:25,066 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 13:43:25,067 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 13:43:25,068 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 13


building vocab: 40000it [00:03, 11788.70it/s]


2023-05-06 13:43:28,597 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 13:43:28,599 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 13:43:28,604 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 13:43:28,606 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 13:43:28,608 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 131
2023-05-06 13:43:28,610 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 13:43:28,612 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 13:43:28,614 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 13:43:28,616 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 13:43:33,300 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:43:33,302 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:43:33,313 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9073, batch_loss: 0.0744, loss: 0.2471 ||: 100%|##########| 1250/1250 [00:21<00:00, 58.71it/s]

2023-05-06 13:43:54,482 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 13:43:54,497 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:43:54,499 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:43:54,506 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9178, batch_loss: 0.0458, loss: 0.2090 ||: 100%|##########| 157/157 [00:01<00:00, 149.65it/s]

2023-05-06 13:43:55,538 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:43:55,540 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.918
2023-05-06 13:43:55,542 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   760.410  |       N/A
2023-05-06 13:43:55,544 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.209
2023-05-06 13:43:55,548 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  7694.828  |       N/A





2023-05-06 13:43:56,227 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.049475
2023-05-06 13:43:56,229 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:21
2023-05-06 13:43:56,233 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 13:43:56,235 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 7.6G
2023-05-06 13:43:56,238 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 13:43:56,241 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9448, batch_loss: 0.2576, loss: 0.1433 ||: 100%|##########| 1250/1250 [00:21<00:00, 57.90it/s]

2023-05-06 13:44:17,835 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9294, batch_loss: 0.0276, loss: 0.1893 ||: 100%|##########| 157/157 [00:00<00:00, 177.09it/s]

2023-05-06 13:44:18,728 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:44:18,730 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.945  |     0.929
2023-05-06 13:44:18,732 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1210.484  |       N/A
2023-05-06 13:44:18,735 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.143  |     0.189
2023-05-06 13:44:18,736 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  7744.316  |       N/A





2023-05-06 13:44:19,250 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.016466
2023-05-06 13:44:19,259 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:02
2023-05-06 13:44:19,262 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 13:44:19,264 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 7.6G
2023-05-06 13:44:19,270 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 13:44:19,271 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9774, batch_loss: 0.0012, loss: 0.0644 ||: 100%|##########| 1250/1250 [00:21<00:00, 56.93it/s]

2023-05-06 13:44:41,235 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0038, loss: 0.2631 ||: 100%|##########| 157/157 [00:01<00:00, 151.85it/s]

2023-05-06 13:44:42,276 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:44:42,278 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.977  |     0.925
2023-05-06 13:44:42,280 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1210.608  |       N/A
2023-05-06 13:44:42,282 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.064  |     0.263
2023-05-06 13:44:42,283 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  7744.316  |       N/A





2023-05-06 13:44:43,000 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.738124
2023-05-06 13:44:43,006 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:41
2023-05-06 13:44:43,009 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 13:44:43,011 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 7.6G
2023-05-06 13:44:43,013 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 13:44:43,017 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9954, batch_loss: 0.0271, loss: 0.0165 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.40it/s]

2023-05-06 13:45:05,585 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9220, batch_loss: 0.0021, loss: 0.3026 ||: 100%|##########| 157/157 [00:00<00:00, 174.94it/s]

2023-05-06 13:45:06,492 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:45:06,494 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.922
2023-05-06 13:45:06,500 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1210.424  |       N/A
2023-05-06 13:45:06,501 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.016  |     0.303
2023-05-06 13:45:06,504 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  7744.414  |       N/A





2023-05-06 13:45:07,040 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.030280
2023-05-06 13:45:07,041 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:19
2023-05-06 13:45:07,045 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 13:45:07,046 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 7.6G
2023-05-06 13:45:07,053 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 13:45:07,058 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0080, loss: 0.0031 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.79it/s]

2023-05-06 13:45:29,075 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9212, batch_loss: 0.0003, loss: 0.3661 ||: 100%|##########| 157/157 [00:00<00:00, 177.82it/s]

2023-05-06 13:45:29,965 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:45:29,969 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.921
2023-05-06 13:45:29,970 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1210.594  |       N/A
2023-05-06 13:45:29,972 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.366
2023-05-06 13:45:29,977 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  7744.414  |       N/A





2023-05-06 13:45:30,523 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.477932
2023-05-06 13:45:30,524 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 13:45:30,577 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 7744.4140625,
  "peak_gpu_0_memory_MB": 1210.6083984375,
  "training_duration": "0:01:56.787920",
  "epoch": 4,
  "training_accuracy": 0.99965,
  "training_loss": 0.003149416855815798,
  "training_worker_0_memory_MB": 7744.4140625,
  "training_gpu_0_memory_MB": 1210.59375,
  "validation_accuracy": 0.9212,
  "validation_loss": 0.3661251171576571,
  "best_validation_accuracy": 0.9294,
  "best_validation_loss": 0.18930707590737542
}
2023-05-06 13:45:30,578 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/3/model.tar.gz


[32m[I 2023-05-06 13:45:33,677][0m Trial 3 finished with value: 0.9294 and parameters: {'embedding_dim': 131, 'max_filter_size': 3, 'num_filters': 186}. Best is trial 3 with value: 0.9294.[0m


2023-05-06 13:45:33,789 - INFO - allennlp.common.params - evaluation = None
2023-05-06 13:45:33,794 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 13:45:33,798 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:45:33,801 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:45:33,803 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:45:33,808 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:45:33,809 - INFO - allennlp.common.params - type = default
2023-05-06 13:45:33,811 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:45:33,812 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:45:33,815 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:45:33,816 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 13:45:33,818 - INFO - all

loading instances: 40000it [01:22, 485.03it/s]

2023-05-06 13:46:56,338 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 13:46:56,343 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 13:46:56,347 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 13:46:56,349 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 13:46:56,352 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 13:46:56,355 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 13:46:56,358 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 13:46:56,359 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 13:46:56,362 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 13:46:56,363 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 526.98it/s]

2023-05-06 13:47:05,857 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 13:47:05,859 - INFO - allennlp.common.params - type = from_instances
2023-05-06 13:47:05,862 - INFO - allennlp.common.params - min_count = None
2023-05-06 13:47:05,863 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 13:47:05,866 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 13:47:05,868 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 13:47:05,870 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 13:47:05,876 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 13:47:05,877 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 13:47:05,881 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 13:47:05,887 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 13


building vocab: 40000it [00:03, 11968.78it/s]


2023-05-06 13:47:09,379 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 13:47:09,381 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 13:47:09,384 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 13:47:09,386 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 13:47:09,387 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 80
2023-05-06 13:47:09,388 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 13:47:09,390 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 13:47:09,391 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 13:47:09,392 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 13:47:13,822 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:47:13,823 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:47:13,830 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9073, batch_loss: 0.0790, loss: 0.2519 ||: 100%|##########| 1250/1250 [00:15<00:00, 81.41it/s]

2023-05-06 13:47:29,040 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 13:47:29,059 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:47:29,061 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:47:29,066 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9218, batch_loss: 0.0563, loss: 0.1962 ||: 100%|##########| 157/157 [00:00<00:00, 230.18it/s]

2023-05-06 13:47:29,729 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:47:29,733 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.922
2023-05-06 13:47:29,735 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1210.662  |       N/A
2023-05-06 13:47:29,737 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.252  |     0.196
2023-05-06 13:47:29,739 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  8284.812  |       N/A





2023-05-06 13:47:29,993 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:16.315441
2023-05-06 13:47:29,995 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:24
2023-05-06 13:47:29,998 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 13:47:30,012 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 8.1G
2023-05-06 13:47:30,014 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 557M
2023-05-06 13:47:30,016 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9429, batch_loss: 0.2110, loss: 0.1471 ||: 100%|##########| 1250/1250 [00:15<00:00, 81.97it/s]

2023-05-06 13:47:45,275 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9278, batch_loss: 0.0156, loss: 0.1851 ||: 100%|##########| 157/157 [00:00<00:00, 234.13it/s]

2023-05-06 13:47:45,955 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:47:45,956 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.943  |     0.928
2023-05-06 13:47:45,960 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   556.952  |       N/A
2023-05-06 13:47:45,962 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.147  |     0.185
2023-05-06 13:47:45,963 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  8341.438  |       N/A





2023-05-06 13:47:46,215 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:16.216726
2023-05-06 13:47:46,225 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:09
2023-05-06 13:47:46,227 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 13:47:46,233 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 8.1G
2023-05-06 13:47:46,235 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 557M
2023-05-06 13:47:46,253 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9724, batch_loss: 0.0039, loss: 0.0747 ||: 100%|##########| 1250/1250 [00:15<00:00, 79.11it/s]

2023-05-06 13:48:02,074 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9256, batch_loss: 0.0044, loss: 0.2380 ||: 100%|##########| 157/157 [00:00<00:00, 224.62it/s]

2023-05-06 13:48:02,785 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:48:02,787 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.972  |     0.926
2023-05-06 13:48:02,789 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   556.952  |       N/A
2023-05-06 13:48:02,791 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.075  |     0.238
2023-05-06 13:48:02,792 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  8341.438  |       N/A





2023-05-06 13:48:03,065 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:16.838118
2023-05-06 13:48:03,080 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:54
2023-05-06 13:48:03,087 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 13:48:03,095 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 8.1G
2023-05-06 13:48:03,098 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 557M
2023-05-06 13:48:03,101 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9919, batch_loss: 0.1110, loss: 0.0274 ||: 100%|##########| 1250/1250 [00:15<00:00, 79.97it/s]

2023-05-06 13:48:18,741 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0089, loss: 0.2750 ||: 100%|##########| 157/157 [00:00<00:00, 230.50it/s]

2023-05-06 13:48:19,430 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:48:19,433 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.992  |     0.923
2023-05-06 13:48:19,434 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   556.952  |       N/A
2023-05-06 13:48:19,435 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.027  |     0.275
2023-05-06 13:48:19,436 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  8341.438  |       N/A





2023-05-06 13:48:19,698 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:16.611470
2023-05-06 13:48:19,704 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:38
2023-05-06 13:48:19,709 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 13:48:19,714 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 8.1G
2023-05-06 13:48:19,716 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 557M
2023-05-06 13:48:19,718 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9990, batch_loss: 0.0017, loss: 0.0065 ||: 100%|##########| 1250/1250 [00:16<00:00, 78.09it/s]

2023-05-06 13:48:35,735 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9182, batch_loss: 0.0215, loss: 0.3135 ||: 100%|##########| 157/157 [00:00<00:00, 221.78it/s]

2023-05-06 13:48:36,449 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:48:36,450 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.918
2023-05-06 13:48:36,457 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   556.952  |       N/A
2023-05-06 13:48:36,458 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.007  |     0.313
2023-05-06 13:48:36,460 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  8341.438  |       N/A





2023-05-06 13:48:36,750 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:17.041282
2023-05-06 13:48:36,752 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 13:48:36,777 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 8341.4375,
  "peak_gpu_0_memory_MB": 1210.66162109375,
  "training_duration": "0:01:22.771132",
  "epoch": 4,
  "training_accuracy": 0.99895,
  "training_loss": 0.006547120769810863,
  "training_worker_0_memory_MB": 8341.4375,
  "training_gpu_0_memory_MB": 556.95166015625,
  "validation_accuracy": 0.9182,
  "validation_loss": 0.31345538933209743,
  "best_validation_accuracy": 0.9278,
  "best_validation_loss": 0.18511579485882998
}
2023-05-06 13:48:36,779 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/4/model.tar.gz


[32m[I 2023-05-06 13:48:38,529][0m Trial 4 finished with value: 0.9278 and parameters: {'embedding_dim': 80, 'max_filter_size': 5, 'num_filters': 38}. Best is trial 3 with value: 0.9294.[0m


2023-05-06 13:48:38,600 - INFO - allennlp.common.params - evaluation = None
2023-05-06 13:48:38,602 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 13:48:38,605 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:48:38,610 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:48:38,612 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:48:38,613 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:48:38,617 - INFO - allennlp.common.params - type = default
2023-05-06 13:48:38,619 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:48:38,622 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:48:38,624 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:48:38,626 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 13:48:38,629 - INFO - all

loading instances: 40000it [01:22, 486.65it/s]

2023-05-06 13:50:00,874 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 13:50:00,877 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 13:50:00,880 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 13:50:00,882 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 13:50:00,883 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 13:50:00,884 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 13:50:00,885 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 13:50:00,886 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None





2023-05-06 13:50:00,887 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 13:50:00,888 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 13:50:00,889 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 13:50:00,890 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:13, 383.98it/s]

2023-05-06 13:50:13,918 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 13:50:13,921 - INFO - allennlp.common.params - type = from_instances
2023-05-06 13:50:13,923 - INFO - allennlp.common.params - min_count = None
2023-05-06 13:50:13,928 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 13:50:13,930 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 13:50:13,931 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 13:50:13,932 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 13:50:13,933 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 13:50:13,935 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 13:50:13,936 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 13:50:13,940 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 13


building vocab: 40000it [00:03, 11582.91it/s]


2023-05-06 13:50:17,535 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 13:50:17,537 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 13:50:17,540 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 13:50:17,543 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 13:50:17,545 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 151
2023-05-06 13:50:17,550 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 13:50:17,552 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 13:50:17,554 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 13:50:17,556 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 13:50:22,204 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:50:22,206 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:50:22,213 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9081, batch_loss: 0.0689, loss: 0.2472 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.56it/s]

2023-05-06 13:50:44,596 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 13:50:44,609 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:50:44,611 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:50:44,616 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9202, batch_loss: 0.0466, loss: 0.2027 ||: 100%|##########| 157/157 [00:01<00:00, 145.28it/s]

2023-05-06 13:50:45,685 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:50:45,687 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.920
2023-05-06 13:50:45,690 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   556.952  |       N/A
2023-05-06 13:50:45,692 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.203
2023-05-06 13:50:45,694 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  8970.008  |       N/A





2023-05-06 13:50:46,504 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.414649
2023-05-06 13:50:46,509 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:32
2023-05-06 13:50:46,513 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 13:50:46,516 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 8.8G
2023-05-06 13:50:46,518 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 13:50:46,520 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9468, batch_loss: 0.2478, loss: 0.1393 ||: 100%|##########| 1250/1250 [00:23<00:00, 54.05it/s]

2023-05-06 13:51:09,652 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9274, batch_loss: 0.0254, loss: 0.1918 ||: 100%|##########| 157/157 [00:00<00:00, 172.22it/s]

2023-05-06 13:51:10,577 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:51:10,580 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.927
2023-05-06 13:51:10,583 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1160.864  |       N/A
2023-05-06 13:51:10,587 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.139  |     0.192
2023-05-06 13:51:10,589 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  9026.703  |       N/A





2023-05-06 13:51:11,218 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.704612
2023-05-06 13:51:11,219 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:13
2023-05-06 13:51:11,225 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 13:51:11,228 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 8.8G
2023-05-06 13:51:11,230 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 13:51:11,231 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9794, batch_loss: 0.0015, loss: 0.0585 ||: 100%|##########| 1250/1250 [00:23<00:00, 52.88it/s]

2023-05-06 13:51:34,877 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9232, batch_loss: 0.0210, loss: 0.2764 ||: 100%|##########| 157/157 [00:01<00:00, 143.01it/s]

2023-05-06 13:51:35,989 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:51:35,991 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.979  |     0.923
2023-05-06 13:51:35,993 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1160.989  |       N/A
2023-05-06 13:51:35,995 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.059  |     0.276
2023-05-06 13:51:35,997 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  9026.711  |       N/A





2023-05-06 13:51:36,858 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.633056
2023-05-06 13:51:36,861 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:52
2023-05-06 13:51:36,863 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 13:51:36,865 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 8.8G
2023-05-06 13:51:36,868 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 13:51:36,870 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9960, batch_loss: 0.0185, loss: 0.0145 ||: 100%|##########| 1250/1250 [00:23<00:00, 54.15it/s]

2023-05-06 13:51:59,961 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9198, batch_loss: 0.0009, loss: 0.3056 ||: 100%|##########| 157/157 [00:00<00:00, 173.55it/s]

2023-05-06 13:52:00,876 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:52:00,881 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.920
2023-05-06 13:52:00,884 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1160.805  |       N/A
2023-05-06 13:52:00,885 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.015  |     0.306
2023-05-06 13:52:00,887 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  9026.938  |       N/A





2023-05-06 13:52:01,495 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.631206
2023-05-06 13:52:01,497 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:28
2023-05-06 13:52:01,500 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 13:52:01,502 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 8.8G
2023-05-06 13:52:01,505 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 13:52:01,507 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0007, loss: 0.0023 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.45it/s]

2023-05-06 13:52:24,468 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9226, batch_loss: 0.0002, loss: 0.3886 ||: 100%|##########| 157/157 [00:00<00:00, 175.76it/s]

2023-05-06 13:52:25,373 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:52:25,374 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.923
2023-05-06 13:52:25,376 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1160.974  |       N/A
2023-05-06 13:52:25,379 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.389
2023-05-06 13:52:25,383 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  9026.938  |       N/A





2023-05-06 13:52:26,024 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.524551
2023-05-06 13:52:26,026 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 13:52:26,103 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 9026.9375,
  "peak_gpu_0_memory_MB": 1160.98876953125,
  "training_duration": "0:02:03.282918",
  "epoch": 4,
  "training_accuracy": 0.99975,
  "training_loss": 0.0022792009373544717,
  "training_worker_0_memory_MB": 9026.9375,
  "training_gpu_0_memory_MB": 1160.97412109375,
  "validation_accuracy": 0.9226,
  "validation_loss": 0.3885550320979503,
  "best_validation_accuracy": 0.9274,
  "best_validation_loss": 0.19178105316534164
}
2023-05-06 13:52:26,107 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/5/model.tar.gz


[32m[I 2023-05-06 13:52:30,425][0m Trial 5 finished with value: 0.9274 and parameters: {'embedding_dim': 151, 'max_filter_size': 3, 'num_filters': 157}. Best is trial 3 with value: 0.9294.[0m


2023-05-06 13:52:30,533 - INFO - allennlp.common.params - evaluation = None
2023-05-06 13:52:30,536 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 13:52:30,538 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:52:30,542 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:52:30,545 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:52:30,547 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:52:30,549 - INFO - allennlp.common.params - type = default
2023-05-06 13:52:30,552 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:52:30,554 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:52:30,555 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:52:30,557 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 13:52:30,560 - INFO - all

loading instances: 40000it [01:21, 490.93it/s]

2023-05-06 13:53:52,097 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 13:53:52,104 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 13:53:52,105 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 13:53:52,109 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 13:53:52,111 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 13:53:52,113 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 13:53:52,114 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 13:53:52,115 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 13:53:52,118 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 13:53:52,119 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:13, 372.90it/s]

2023-05-06 13:54:05,534 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 13:54:05,537 - INFO - allennlp.common.params - type = from_instances
2023-05-06 13:54:05,540 - INFO - allennlp.common.params - min_count = None
2023-05-06 13:54:05,542 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 13:54:05,544 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 13:54:05,546 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 13:54:05,548 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 13:54:05,550 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 13:54:05,554 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 13:54:05,554 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 13:54:05,559 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 13


building vocab: 40000it [00:03, 10391.99it/s]


2023-05-06 13:54:09,578 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 13:54:09,580 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 13:54:09,585 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 13:54:09,590 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 13:54:09,594 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 71
2023-05-06 13:54:09,596 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 13:54:09,598 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 13:54:09,599 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 13:54:09,600 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 13:54:17,748 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:54:17,750 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:54:17,756 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9016, batch_loss: 0.1112, loss: 0.2650 ||: 100%|##########| 1250/1250 [00:11<00:00, 111.00it/s]

2023-05-06 13:54:28,913 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 13:54:28,926 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:54:28,930 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:54:28,938 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9132, batch_loss: 0.0769, loss: 0.2104 ||: 100%|##########| 157/157 [00:00<00:00, 199.68it/s]

2023-05-06 13:54:29,709 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:54:29,711 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.902  |     0.913
2023-05-06 13:54:29,713 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1161.042  |       N/A
2023-05-06 13:54:29,715 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.265  |     0.210
2023-05-06 13:54:29,717 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  9575.234  |       N/A





2023-05-06 13:54:29,961 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:12.324731
2023-05-06 13:54:29,968 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:48
2023-05-06 13:54:29,972 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 13:54:29,976 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 9.4G
2023-05-06 13:54:29,984 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 888M
2023-05-06 13:54:29,987 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9336, batch_loss: 0.2041, loss: 0.1689 ||: 100%|##########| 1250/1250 [00:11<00:00, 105.73it/s]

2023-05-06 13:54:41,820 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9218, batch_loss: 0.0229, loss: 0.1964 ||: 100%|##########| 157/157 [00:00<00:00, 259.28it/s]

2023-05-06 13:54:42,433 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:54:42,439 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.934  |     0.922
2023-05-06 13:54:42,441 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   887.853  |       N/A





2023-05-06 13:54:42,445 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.169  |     0.196
2023-05-06 13:54:42,446 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  9602.117  |       N/A
2023-05-06 13:54:42,605 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:12.633003
2023-05-06 13:54:42,614 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:39
2023-05-06 13:54:42,620 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 13:54:42,622 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 9.4G
2023-05-06 13:54:42,630 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 888M
2023-05-06 13:54:42,634 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9605, batch_loss: 0.0093, loss: 0.1056 ||: 100%|##########| 1250/1250 [00:11<00:00, 104.80it/s]

2023-05-06 13:54:54,569 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9186, batch_loss: 0.0075, loss: 0.2370 ||: 100%|##########| 157/157 [00:00<00:00, 265.83it/s]

2023-05-06 13:54:55,166 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:54:55,168 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.961  |     0.919
2023-05-06 13:54:55,171 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   887.978  |       N/A
2023-05-06 13:54:55,172 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.106  |     0.237
2023-05-06 13:54:55,173 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  9602.375  |       N/A





2023-05-06 13:54:55,381 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:12.760832
2023-05-06 13:54:55,391 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:27
2023-05-06 13:54:55,393 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 13:54:55,397 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 9.4G
2023-05-06 13:54:55,403 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 888M
2023-05-06 13:54:55,404 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9803, batch_loss: 0.0367, loss: 0.0549 ||: 100%|##########| 1250/1250 [00:11<00:00, 111.22it/s]

2023-05-06 13:55:06,651 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9184, batch_loss: 0.0021, loss: 0.2585 ||: 100%|##########| 157/157 [00:00<00:00, 187.49it/s]

2023-05-06 13:55:07,496 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:55:07,498 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.980  |     0.918
2023-05-06 13:55:07,500 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   887.793  |       N/A
2023-05-06 13:55:07,501 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.055  |     0.258
2023-05-06 13:55:07,504 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  9602.375  |       N/A





2023-05-06 13:55:07,781 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:12.388373
2023-05-06 13:55:07,784 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:14
2023-05-06 13:55:07,787 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 13:55:07,790 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 9.4G
2023-05-06 13:55:07,792 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 888M
2023-05-06 13:55:07,795 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9941, batch_loss: 0.0102, loss: 0.0209 ||: 100%|##########| 1250/1250 [00:11<00:00, 110.54it/s]

2023-05-06 13:55:19,109 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9050, batch_loss: 0.0087, loss: 0.3293 ||: 100%|##########| 157/157 [00:00<00:00, 251.24it/s]

2023-05-06 13:55:19,742 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:55:19,749 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.994  |     0.905
2023-05-06 13:55:19,751 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   887.963  |       N/A
2023-05-06 13:55:19,756 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.021  |     0.329
2023-05-06 13:55:19,757 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  9602.375  |       N/A





2023-05-06 13:55:19,976 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:12.188829
2023-05-06 13:55:19,988 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 13:55:20,012 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 9602.375,
  "peak_gpu_0_memory_MB": 1161.0419921875,
  "training_duration": "0:01:02.105185",
  "epoch": 4,
  "training_accuracy": 0.994075,
  "training_loss": 0.02091707002609037,
  "training_worker_0_memory_MB": 9602.375,
  "training_gpu_0_memory_MB": 887.962890625,
  "validation_accuracy": 0.905,
  "validation_loss": 0.3292736224533551,
  "best_validation_accuracy": 0.9218,
  "best_validation_loss": 0.1963670999286281
}
2023-05-06 13:55:20,014 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/6/model.tar.gz


[32m[I 2023-05-06 13:55:21,707][0m Trial 6 finished with value: 0.9218 and parameters: {'embedding_dim': 71, 'max_filter_size': 2, 'num_filters': 212}. Best is trial 3 with value: 0.9294.[0m


2023-05-06 13:55:21,821 - INFO - allennlp.common.params - evaluation = None
2023-05-06 13:55:21,825 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 13:55:21,828 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:55:21,832 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:55:21,834 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:55:21,835 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:55:21,836 - INFO - allennlp.common.params - type = default
2023-05-06 13:55:21,838 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:55:21,839 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:55:21,840 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:55:21,841 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 13:55:21,842 - INFO - all

loading instances: 40000it [01:36, 413.98it/s]

2023-05-06 13:56:58,515 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 13:56:58,518 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 13:56:58,520 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 13:56:58,526 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 13:56:58,528 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None





2023-05-06 13:56:58,530 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 13:56:58,531 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 13:56:58,534 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 13:56:58,535 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 13:56:58,536 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 13:56:58,537 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 13:56:58,538 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:07, 638.93it/s]

2023-05-06 13:57:06,369 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 13:57:06,376 - INFO - allennlp.common.params - type = from_instances
2023-05-06 13:57:06,377 - INFO - allennlp.common.params - min_count = None
2023-05-06 13:57:06,379 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 13:57:06,381 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 13:57:06,385 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 13:57:06,386 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 13:57:06,389 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 13:57:06,391 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 13:57:06,393 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 13:57:06,395 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 13


building vocab: 40000it [00:03, 10364.14it/s]


2023-05-06 13:57:10,509 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 13:57:10,514 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 13:57:10,516 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 13:57:10,522 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 13:57:10,524 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 196
2023-05-06 13:57:10,526 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 13:57:10,531 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 13:57:10,532 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 13:57:10,533 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 13:57:17,921 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:57:17,923 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:57:17,934 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9083, batch_loss: 0.0897, loss: 0.2434 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.11it/s]

2023-05-06 13:57:46,785 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 13:57:46,800 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 13:57:46,802 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 13:57:46,808 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9178, batch_loss: 0.0152, loss: 0.2092 ||: 100%|##########| 157/157 [00:01<00:00, 129.55it/s]

2023-05-06 13:57:48,009 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:57:48,010 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.918
2023-05-06 13:57:48,011 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   888.031  |       N/A
2023-05-06 13:57:48,013 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.243  |     0.209
2023-05-06 13:57:48,014 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  10316.641  |       N/A





2023-05-06 13:57:48,993 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.210987
2023-05-06 13:57:48,995 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:32
2023-05-06 13:57:49,004 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 13:57:49,005 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 10G
2023-05-06 13:57:49,007 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.4G
2023-05-06 13:57:49,009 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9476, batch_loss: 0.2923, loss: 0.1359 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.65it/s]

2023-05-06 13:58:17,651 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9312, batch_loss: 0.0062, loss: 0.1841 ||: 100%|##########| 157/157 [00:01<00:00, 138.85it/s]

2023-05-06 13:58:18,789 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:58:18,791 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.948  |     0.931
2023-05-06 13:58:18,793 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1448.580  |       N/A
2023-05-06 13:58:18,794 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.136  |     0.184
2023-05-06 13:58:18,796 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  10388.234  |       N/A





2023-05-06 13:58:19,849 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.845403
2023-05-06 13:58:19,862 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:04
2023-05-06 13:58:19,864 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 13:58:19,866 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 10G
2023-05-06 13:58:19,868 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.4G
2023-05-06 13:58:19,870 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9827, batch_loss: 0.0009, loss: 0.0510 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.77it/s]

2023-05-06 13:58:48,436 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0014, loss: 0.2660 ||: 100%|##########| 157/157 [00:01<00:00, 152.02it/s]

2023-05-06 13:58:49,476 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:58:49,481 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.926
2023-05-06 13:58:49,483 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1448.705  |       N/A
2023-05-06 13:58:49,485 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.051  |     0.266
2023-05-06 13:58:49,487 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  10388.234  |       N/A





2023-05-06 13:58:50,306 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.442831
2023-05-06 13:58:50,309 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:33
2023-05-06 13:58:50,311 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 13:58:50,312 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 10G
2023-05-06 13:58:50,315 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.4G
2023-05-06 13:58:50,316 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9974, batch_loss: 0.0212, loss: 0.0101 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.50it/s]

2023-05-06 13:59:19,057 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0005, loss: 0.2958 ||: 100%|##########| 157/157 [00:01<00:00, 150.92it/s]

2023-05-06 13:59:20,104 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:59:20,105 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.926
2023-05-06 13:59:20,106 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1448.664  |       N/A
2023-05-06 13:59:20,112 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.296
2023-05-06 13:59:20,114 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  10388.234  |       N/A





2023-05-06 13:59:20,918 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.607434
2023-05-06 13:59:20,920 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:03
2023-05-06 13:59:20,921 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 13:59:20,926 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 10G
2023-05-06 13:59:20,929 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.4G
2023-05-06 13:59:20,932 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0019, loss: 0.0014 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.34it/s]

2023-05-06 13:59:49,782 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9266, batch_loss: 0.0002, loss: 0.3522 ||: 100%|##########| 157/157 [00:01<00:00, 151.73it/s]

2023-05-06 13:59:50,824 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 13:59:50,825 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.927
2023-05-06 13:59:50,832 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1448.833  |       N/A
2023-05-06 13:59:50,833 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.352
2023-05-06 13:59:50,835 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  10388.234  |       N/A





2023-05-06 13:59:51,622 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.701094
2023-05-06 13:59:51,638 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 13:59:51,713 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 10388.234375,
  "peak_gpu_0_memory_MB": 1448.83349609375,
  "training_duration": "0:02:33.041499",
  "epoch": 4,
  "training_accuracy": 0.999925,
  "training_loss": 0.0013955287909953767,
  "training_worker_0_memory_MB": 10388.234375,
  "training_gpu_0_memory_MB": 1448.83349609375,
  "validation_accuracy": 0.9266,
  "validation_loss": 0.352167169974448,
  "best_validation_accuracy": 0.9312,
  "best_validation_loss": 0.18414808204694158
}
2023-05-06 13:59:51,715 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/7/model.tar.gz


[32m[I 2023-05-06 13:59:55,617][0m Trial 7 finished with value: 0.9312 and parameters: {'embedding_dim': 196, 'max_filter_size': 3, 'num_filters': 187}. Best is trial 7 with value: 0.9312.[0m


2023-05-06 13:59:55,689 - INFO - allennlp.common.params - evaluation = None
2023-05-06 13:59:55,691 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 13:59:55,697 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 13:59:55,698 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 13:59:55,700 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 13:59:55,702 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 13:59:55,704 - INFO - allennlp.common.params - type = default
2023-05-06 13:59:55,706 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 13:59:55,709 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 13:59:55,711 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 13:59:55,713 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 13:59:55,715 - INFO - all

loading instances: 40000it [01:23, 478.18it/s]

2023-05-06 14:01:19,419 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:01:19,422 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:01:19,424 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:01:19,425 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:01:19,429 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:01:19,432 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:01:19,433 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:01:19,434 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:01:19,435 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:01:19,436 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 700.04it/s]

2023-05-06 14:01:26,585 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:01:26,587 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:01:26,590 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:01:26,592 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:01:26,593 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:01:26,594 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:01:26,595 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:01:26,596 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:01:26,598 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:01:26,598 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:01:26,599 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:05, 6940.33it/s]


2023-05-06 14:01:32,570 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:01:32,578 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:01:32,580 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:01:32,583 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:01:32,585 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 182
2023-05-06 14:01:32,587 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:01:32,588 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:01:32,590 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:01:32,591 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:01:37,348 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:01:37,351 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:01:37,359 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9101, batch_loss: 0.0743, loss: 0.2397 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.56it/s]

2023-05-06 14:02:06,613 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:02:06,628 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:02:06,630 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:02:06,643 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9236, batch_loss: 0.0263, loss: 0.1934 ||: 100%|##########| 157/157 [00:01<00:00, 142.80it/s]

2023-05-06 14:02:07,720 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:02:07,721 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.924
2023-05-06 14:02:07,728 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1448.901  |       N/A
2023-05-06 14:02:07,730 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.240  |     0.193
2023-05-06 14:02:07,732 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  10947.559  |       N/A





2023-05-06 14:02:08,478 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.246790
2023-05-06 14:02:08,482 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:34
2023-05-06 14:02:08,484 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:02:08,486 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 11G
2023-05-06 14:02:08,488 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:02:08,491 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9496, batch_loss: 0.1522, loss: 0.1289 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.15it/s]

2023-05-06 14:02:37,467 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9336, batch_loss: 0.0089, loss: 0.1831 ||: 100%|##########| 157/157 [00:01<00:00, 136.74it/s]

2023-05-06 14:02:38,630 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:02:38,631 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.934
2023-05-06 14:02:38,633 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.439  |       N/A
2023-05-06 14:02:38,635 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.129  |     0.183
2023-05-06 14:02:38,636 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  11016.496  |       N/A





2023-05-06 14:02:39,570 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.086380
2023-05-06 14:02:39,573 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:05
2023-05-06 14:02:39,576 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:02:39,578 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 11G
2023-05-06 14:02:39,581 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:02:39,582 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9845, batch_loss: 0.0048, loss: 0.0463 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.35it/s]

2023-05-06 14:03:08,421 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9268, batch_loss: 0.0012, loss: 0.2683 ||: 100%|##########| 157/157 [00:01<00:00, 141.09it/s]

2023-05-06 14:03:09,541 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:03:09,542 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.927
2023-05-06 14:03:09,544 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.439  |       N/A
2023-05-06 14:03:09,546 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.046  |     0.268
2023-05-06 14:03:09,547 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  11016.496  |       N/A





2023-05-06 14:03:10,504 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.928005
2023-05-06 14:03:10,512 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:35
2023-05-06 14:03:10,514 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:03:10,515 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 11G
2023-05-06 14:03:10,517 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:03:10,519 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9977, batch_loss: 0.0100, loss: 0.0099 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.98it/s]

2023-05-06 14:03:39,614 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9280, batch_loss: 0.0001, loss: 0.3033 ||: 100%|##########| 157/157 [00:00<00:00, 165.10it/s]

2023-05-06 14:03:40,576 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:03:40,579 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.928
2023-05-06 14:03:40,581 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.439  |       N/A
2023-05-06 14:03:40,582 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.303
2023-05-06 14:03:40,584 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  11016.496  |       N/A





2023-05-06 14:03:41,345 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.831473
2023-05-06 14:03:41,346 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:05
2023-05-06 14:03:41,349 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:03:41,353 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 11G
2023-05-06 14:03:41,356 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:03:41,361 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0008, loss: 0.0015 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.94it/s]

2023-05-06 14:04:10,479 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9286, batch_loss: 0.0000, loss: 0.3576 ||: 100%|##########| 157/157 [00:00<00:00, 162.95it/s]

2023-05-06 14:04:11,458 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:04:11,459 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.929
2023-05-06 14:04:11,461 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.439  |       N/A
2023-05-06 14:04:11,464 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.358
2023-05-06 14:04:11,466 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  11016.496  |       N/A





2023-05-06 14:04:12,231 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.882292
2023-05-06 14:04:12,233 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:04:12,311 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 11016.49609375,
  "peak_gpu_0_memory_MB": 1448.9013671875,
  "training_duration": "0:02:34.225992",
  "epoch": 4,
  "training_accuracy": 0.999875,
  "training_loss": 0.0015002080831993225,
  "training_worker_0_memory_MB": 11016.49609375,
  "training_gpu_0_memory_MB": 1158.439453125,
  "validation_accuracy": 0.9286,
  "validation_loss": 0.3575826093686773,
  "best_validation_accuracy": 0.9336,
  "best_validation_loss": 0.18306946107868555
}
2023-05-06 14:04:12,312 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/8/model.tar.gz


[32m[I 2023-05-06 14:04:15,999][0m Trial 8 finished with value: 0.9336 and parameters: {'embedding_dim': 182, 'max_filter_size': 5, 'num_filters': 51}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:04:16,067 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:04:16,068 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:04:16,075 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:04:16,078 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:04:16,082 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:04:16,085 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:04:16,089 - INFO - allennlp.common.params - type = default
2023-05-06 14:04:16,091 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:04:16,093 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:04:16,096 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:04:16,097 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:04:16,100 - INFO - all

loading instances: 40000it [01:22, 484.79it/s]

2023-05-06 14:05:38,661 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:05:38,663 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:05:38,666 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:05:38,669 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:05:38,670 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:05:38,671 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:05:38,671 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:05:38,672 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:05:38,673 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:05:38,675 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:08, 597.08it/s]


2023-05-06 14:05:47,059 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:05:47,062 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:05:47,064 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:05:47,066 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:05:47,067 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:05:47,068 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:05:47,069 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:05:47,071 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:05:47,072 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:05:47,073 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:05:47,074 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14

building vocab: 40000it [00:04, 8395.49it/s] 


2023-05-06 14:05:51,983 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:05:51,984 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:05:51,988 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:05:51,990 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:05:51,993 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 55
2023-05-06 14:05:51,995 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:05:51,999 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:05:52,002 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:05:52,005 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:05:56,505 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:05:56,507 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:05:56,513 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9013, batch_loss: 0.1135, loss: 0.2667 ||: 100%|##########| 1250/1250 [00:10<00:00, 118.38it/s]

2023-05-06 14:06:06,954 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:06:06,966 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:06:06,967 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:06:06,973 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9144, batch_loss: 0.0731, loss: 0.2126 ||: 100%|##########| 157/157 [00:00<00:00, 196.63it/s]

2023-05-06 14:06:07,761 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:06:07,767 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.901  |     0.914
2023-05-06 14:06:07,769 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.439  |       N/A
2023-05-06 14:06:07,771 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.267  |     0.213
2023-05-06 14:06:07,772 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  11508.934  |       N/A





2023-05-06 14:06:07,905 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:11.517895
2023-05-06 14:06:07,906 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:42
2023-05-06 14:06:07,909 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:06:07,913 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 11G
2023-05-06 14:06:07,915 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 882M
2023-05-06 14:06:07,917 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9313, batch_loss: 0.2643, loss: 0.1733 ||: 100%|##########| 1250/1250 [00:09<00:00, 132.02it/s]

2023-05-06 14:06:17,392 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9220, batch_loss: 0.0112, loss: 0.1965 ||: 100%|##########| 157/157 [00:00<00:00, 232.60it/s]

2023-05-06 14:06:18,080 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:06:18,082 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.931  |     0.922
2023-05-06 14:06:18,084 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   882.156  |       N/A
2023-05-06 14:06:18,088 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.173  |     0.196
2023-05-06 14:06:18,090 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  11527.250  |       N/A





2023-05-06 14:06:18,291 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:10.381981
2023-05-06 14:06:18,293 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:26
2023-05-06 14:06:18,294 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:06:18,296 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 11G
2023-05-06 14:06:18,298 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 882M
2023-05-06 14:06:18,300 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9577, batch_loss: 0.0188, loss: 0.1120 ||: 100%|##########| 1250/1250 [00:10<00:00, 119.72it/s]

2023-05-06 14:06:28,746 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0028, loss: 0.2236 ||: 100%|##########| 157/157 [00:00<00:00, 267.27it/s]

2023-05-06 14:06:29,341 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:06:29,344 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.958  |     0.925
2023-05-06 14:06:29,351 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   882.280  |       N/A
2023-05-06 14:06:29,352 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.112  |     0.224
2023-05-06 14:06:29,358 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  11527.250  |       N/A





2023-05-06 14:06:29,492 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:11.197991
2023-05-06 14:06:29,494 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:16
2023-05-06 14:06:29,496 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:06:29,499 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 11G
2023-05-06 14:06:29,501 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 882M
2023-05-06 14:06:29,504 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9785, batch_loss: 0.1037, loss: 0.0610 ||: 100%|##########| 1250/1250 [00:10<00:00, 124.56it/s]

2023-05-06 14:06:39,555 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9206, batch_loss: 0.0005, loss: 0.2515 ||: 100%|##########| 157/157 [00:00<00:00, 195.71it/s]

2023-05-06 14:06:40,370 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:06:40,371 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.979  |     0.921
2023-05-06 14:06:40,373 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   882.096  |       N/A
2023-05-06 14:06:40,378 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.061  |     0.251
2023-05-06 14:06:40,379 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  11527.508  |       N/A





2023-05-06 14:06:40,559 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:11.062148
2023-05-06 14:06:40,561 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:05
2023-05-06 14:06:40,565 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:06:40,567 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 11G
2023-05-06 14:06:40,569 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 882M
2023-05-06 14:06:40,571 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9928, batch_loss: 0.0236, loss: 0.0252 ||: 100%|##########| 1250/1250 [00:09<00:00, 128.32it/s]

2023-05-06 14:06:50,319 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9012, batch_loss: 0.0034, loss: 0.3252 ||: 100%|##########| 157/157 [00:00<00:00, 268.87it/s]

2023-05-06 14:06:50,909 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:06:50,912 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.993  |     0.901
2023-05-06 14:06:50,913 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   882.266  |       N/A
2023-05-06 14:06:50,914 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.025  |     0.325
2023-05-06 14:06:50,915 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  11527.508  |       N/A





2023-05-06 14:06:51,053 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:10.487968
2023-05-06 14:06:51,055 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:06:51,071 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 11527.5078125,
  "peak_gpu_0_memory_MB": 1158.439453125,
  "training_duration": "0:00:54.521220",
  "epoch": 4,
  "training_accuracy": 0.9928,
  "training_loss": 0.025168890933319928,
  "training_worker_0_memory_MB": 11527.5078125,
  "training_gpu_0_memory_MB": 882.265625,
  "validation_accuracy": 0.9012,
  "validation_loss": 0.3251868661531621,
  "best_validation_accuracy": 0.922,
  "best_validation_loss": 0.1964757717006905
}
2023-05-06 14:06:51,073 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/9/model.tar.gz


[32m[I 2023-05-06 14:06:52,316][0m Trial 9 finished with value: 0.922 and parameters: {'embedding_dim': 55, 'max_filter_size': 2, 'num_filters': 229}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:06:52,405 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:06:52,407 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:06:52,413 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:06:52,416 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:06:52,420 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:06:52,423 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:06:52,424 - INFO - allennlp.common.params - type = default
2023-05-06 14:06:52,428 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:06:52,430 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:06:52,432 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:06:52,433 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:06:52,437 - INFO - all

loading instances: 40000it [01:24, 471.02it/s]

2023-05-06 14:08:17,424 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:08:17,427 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:08:17,429 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:08:17,431 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:08:17,433 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:08:17,434 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:08:17,436 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:08:17,437 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:08:17,438 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:08:17,439 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 511.37it/s]

2023-05-06 14:08:27,223 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:08:27,225 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:08:27,232 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:08:27,233 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:08:27,235 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:08:27,237 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:08:27,239 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:08:27,240 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:08:27,241 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:08:27,242 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:08:27,243 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:03, 11714.21it/s]


2023-05-06 14:08:30,804 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:08:30,806 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:08:30,808 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:08:30,815 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:08:30,818 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 199
2023-05-06 14:08:30,820 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:08:30,822 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:08:30,827 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:08:30,828 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:08:35,533 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:08:35,536 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:08:35,545 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9094, batch_loss: 0.0601, loss: 0.2398 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.16it/s]

2023-05-06 14:09:14,268 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:09:14,289 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:09:14,298 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:09:14,303 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9198, batch_loss: 0.0426, loss: 0.2030 ||: 100%|##########| 157/157 [00:01<00:00, 98.28it/s] 

2023-05-06 14:09:15,878 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:09:15,882 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.920
2023-05-06 14:09:15,884 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   882.333  |       N/A
2023-05-06 14:09:15,888 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.240  |     0.203
2023-05-06 14:09:15,890 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12241.355  |       N/A





2023-05-06 14:09:16,918 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:41.523662
2023-05-06 14:09:16,920 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:06:04
2023-05-06 14:09:16,922 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:09:16,924 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 12G
2023-05-06 14:09:16,926 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.4G
2023-05-06 14:09:16,928 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9519, batch_loss: 0.2117, loss: 0.1266 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.67it/s]

2023-05-06 14:09:55,195 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9298, batch_loss: 0.0109, loss: 0.1993 ||: 100%|##########| 157/157 [00:01<00:00, 112.43it/s]

2023-05-06 14:09:56,601 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:09:56,603 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.952  |     0.930
2023-05-06 14:09:56,605 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1389.100  |       N/A
2023-05-06 14:09:56,606 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.127  |     0.199
2023-05-06 14:09:56,608 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12316.844  |       N/A





2023-05-06 14:09:57,408 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:40.485961
2023-05-06 14:09:57,409 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:24
2023-05-06 14:09:57,412 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:09:57,444 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 12G
2023-05-06 14:09:57,446 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.4G
2023-05-06 14:09:57,448 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9870, batch_loss: 0.0011, loss: 0.0404 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.46it/s]

2023-05-06 14:10:35,964 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9252, batch_loss: 0.0024, loss: 0.2615 ||: 100%|##########| 157/157 [00:01<00:00, 102.18it/s]

2023-05-06 14:10:37,513 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:10:37,515 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.987  |     0.925
2023-05-06 14:10:37,517 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1389.100  |       N/A
2023-05-06 14:10:37,518 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.040  |     0.261
2023-05-06 14:10:37,520 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12316.844  |       N/A





2023-05-06 14:10:38,676 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:41.264061
2023-05-06 14:10:38,680 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:44
2023-05-06 14:10:38,688 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:10:38,690 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 12G
2023-05-06 14:10:38,692 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.4G
2023-05-06 14:10:38,694 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9981, batch_loss: 0.0519, loss: 0.0075 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.52it/s]

2023-05-06 14:11:17,136 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9296, batch_loss: 0.0016, loss: 0.3280 ||: 100%|##########| 157/157 [00:01<00:00, 110.02it/s]

2023-05-06 14:11:18,574 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:11:18,578 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.930
2023-05-06 14:11:18,580 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1389.100  |       N/A
2023-05-06 14:11:18,583 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.008  |     0.328
2023-05-06 14:11:18,586 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12316.844  |       N/A





2023-05-06 14:11:19,439 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:40.750891
2023-05-06 14:11:19,442 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:04
2023-05-06 14:11:19,445 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:11:19,452 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 12G
2023-05-06 14:11:19,454 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.4G
2023-05-06 14:11:19,455 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0007, loss: 0.0018 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.34it/s]

2023-05-06 14:11:58,111 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9224, batch_loss: 0.0005, loss: 0.3593 ||: 100%|##########| 157/157 [00:01<00:00, 110.25it/s]

2023-05-06 14:11:59,541 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:11:59,544 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.922
2023-05-06 14:11:59,545 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1389.100  |       N/A
2023-05-06 14:11:59,547 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.359
2023-05-06 14:11:59,549 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12316.844  |       N/A





2023-05-06 14:12:00,437 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:40.991957
2023-05-06 14:12:00,439 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:12:00,527 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 12316.84375,
  "peak_gpu_0_memory_MB": 1389.10009765625,
  "training_duration": "0:03:24.147128",
  "epoch": 4,
  "training_accuracy": 0.999625,
  "training_loss": 0.0017987771151874768,
  "training_worker_0_memory_MB": 12316.84375,
  "training_gpu_0_memory_MB": 1389.10009765625,
  "validation_accuracy": 0.9224,
  "validation_loss": 0.3592704881481666,
  "best_validation_accuracy": 0.9298,
  "best_validation_loss": 0.19933941958555182
}
2023-05-06 14:12:00,528 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/10/model.tar.gz


[32m[I 2023-05-06 14:12:05,927][0m Trial 10 finished with value: 0.9298 and parameters: {'embedding_dim': 199, 'max_filter_size': 5, 'num_filters': 96}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:12:06,054 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:12:06,058 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:12:06,065 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:12:06,066 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:12:06,068 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:12:06,070 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:12:06,072 - INFO - allennlp.common.params - type = default
2023-05-06 14:12:06,076 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:12:06,078 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:12:06,079 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:12:06,080 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:12:06,081 - INFO - all

loading instances: 40000it [01:23, 477.65it/s]

2023-05-06 14:13:29,893 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:13:29,900 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:13:29,901 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:13:29,903 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:13:29,906 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:13:29,908 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:13:29,911 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:13:29,914 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:13:29,916 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:13:29,918 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:08, 594.37it/s]

2023-05-06 14:13:38,340 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:13:38,343 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:13:38,347 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:13:38,348 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:13:38,354 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:13:38,355 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:13:38,361 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:13:38,362 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:13:38,363 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:13:38,365 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:13:38,366 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:03, 11397.33it/s]


2023-05-06 14:13:42,028 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:13:42,030 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:13:42,035 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:13:42,038 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:13:42,041 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 199
2023-05-06 14:13:42,042 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:13:42,044 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:13:42,046 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:13:42,046 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:13:48,603 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:13:48,604 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:13:48,615 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9109, batch_loss: 0.0612, loss: 0.2399 ||: 100%|##########| 1250/1250 [00:40<00:00, 31.03it/s]

2023-05-06 14:14:28,715 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:14:28,734 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:14:28,742 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:14:28,748 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9200, batch_loss: 0.0281, loss: 0.2031 ||: 100%|##########| 157/157 [00:01<00:00, 100.48it/s]

2023-05-06 14:14:30,284 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:14:30,294 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.920
2023-05-06 14:14:30,295 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1389.100  |       N/A
2023-05-06 14:14:30,299 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.240  |     0.203
2023-05-06 14:14:30,300 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12870.387  |       N/A





2023-05-06 14:14:31,097 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:42.673517
2023-05-06 14:14:31,099 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:06:16
2023-05-06 14:14:31,102 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:14:31,104 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 13G
2023-05-06 14:14:31,106 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:14:31,107 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9502, batch_loss: 0.1613, loss: 0.1312 ||: 100%|##########| 1250/1250 [00:39<00:00, 31.77it/s]

2023-05-06 14:15:10,456 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9314, batch_loss: 0.0096, loss: 0.1940 ||: 100%|##########| 157/157 [00:01<00:00, 94.92it/s] 

2023-05-06 14:15:12,119 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:15:12,121 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.931
2023-05-06 14:15:12,126 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1570.622  |       N/A
2023-05-06 14:15:12,128 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.131  |     0.194





2023-05-06 14:15:12,135 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12945.762  |       N/A
2023-05-06 14:15:13,239 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:42.136378
2023-05-06 14:15:13,241 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:34
2023-05-06 14:15:13,242 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:15:13,244 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 13G
2023-05-06 14:15:13,246 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:15:13,248 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9848, batch_loss: 0.0020, loss: 0.0454 ||: 100%|##########| 1250/1250 [00:39<00:00, 31.30it/s]

2023-05-06 14:15:53,187 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9236, batch_loss: 0.0013, loss: 0.2552 ||: 100%|##########| 157/157 [00:01<00:00, 101.54it/s]

2023-05-06 14:15:54,739 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:15:54,744 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.924
2023-05-06 14:15:54,745 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1570.747  |       N/A
2023-05-06 14:15:54,746 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.045  |     0.255
2023-05-06 14:15:54,748 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12945.762  |       N/A





2023-05-06 14:15:55,575 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:42.333030
2023-05-06 14:15:55,578 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:54
2023-05-06 14:15:55,579 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:15:55,581 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 13G
2023-05-06 14:15:55,583 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:15:55,585 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.1205, loss: 0.0101 ||: 100%|##########| 1250/1250 [00:39<00:00, 31.28it/s]

2023-05-06 14:16:35,554 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9232, batch_loss: 0.0004, loss: 0.3569 ||: 100%|##########| 157/157 [00:01<00:00, 92.21it/s]

2023-05-06 14:16:37,269 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:16:37,273 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.923
2023-05-06 14:16:37,275 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1570.562  |       N/A
2023-05-06 14:16:37,278 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.357
2023-05-06 14:16:37,279 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12945.762  |       N/A





2023-05-06 14:16:38,386 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:42.806440
2023-05-06 14:16:38,389 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:13
2023-05-06 14:16:38,391 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:16:38,393 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 13G
2023-05-06 14:16:38,396 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:16:38,400 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0042, loss: 0.0019 ||: 100%|##########| 1250/1250 [00:39<00:00, 31.28it/s]

2023-05-06 14:17:18,368 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9176, batch_loss: 0.0001, loss: 0.3820 ||: 100%|##########| 157/157 [00:01<00:00, 100.54it/s]

2023-05-06 14:17:19,940 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:17:19,942 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.918
2023-05-06 14:17:19,944 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1570.732  |       N/A
2023-05-06 14:17:19,946 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.382
2023-05-06 14:17:19,950 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  12945.762  |       N/A





2023-05-06 14:17:20,822 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:42.431469
2023-05-06 14:17:20,824 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:17:20,909 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 12945.76171875,
  "peak_gpu_0_memory_MB": 1570.74658203125,
  "training_duration": "0:03:31.515462",
  "epoch": 4,
  "training_accuracy": 0.99965,
  "training_loss": 0.0018721456277209655,
  "training_worker_0_memory_MB": 12945.76171875,
  "training_gpu_0_memory_MB": 1570.73193359375,
  "validation_accuracy": 0.9176,
  "validation_loss": 0.3820085168066679,
  "best_validation_accuracy": 0.9314,
  "best_validation_loss": 0.1940183109087739
}
2023-05-06 14:17:20,911 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/11/model.tar.gz


[32m[I 2023-05-06 14:17:25,006][0m Trial 11 finished with value: 0.9314 and parameters: {'embedding_dim': 199, 'max_filter_size': 4, 'num_filters': 167}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:17:25,101 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:17:25,103 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:17:25,106 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:17:25,111 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:17:25,113 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:17:25,117 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:17:25,118 - INFO - allennlp.common.params - type = default
2023-05-06 14:17:25,119 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:17:25,120 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:17:25,121 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:17:25,122 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:17:25,123 - INFO - all

loading instances: 40000it [01:25, 468.84it/s]

2023-05-06 14:18:50,477 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:18:50,480 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32





2023-05-06 14:18:50,483 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:18:50,485 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:18:50,489 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:18:50,492 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:18:50,494 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:18:50,501 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:18:50,502 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:18:50,503 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 14:18:50,504 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 14:18:50,505 - INFO - allennlp.common.params - validation_data_loader.collate_fn

loading instances: 5000it [00:08, 620.62it/s]

2023-05-06 14:18:58,567 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:18:58,570 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:18:58,574 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:18:58,575 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:18:58,577 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:18:58,579 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:18:58,581 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:18:58,585 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:18:58,586 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:18:58,588 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:18:58,589 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:05, 6987.04it/s] 


2023-05-06 14:19:04,476 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:19:04,477 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:19:04,479 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:19:04,486 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:19:04,487 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 169
2023-05-06 14:19:04,490 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:19:04,492 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:19:04,497 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:19:04,499 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:19:09,308 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:19:09,310 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:19:09,318 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9101, batch_loss: 0.0740, loss: 0.2415 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.33it/s]

2023-05-06 14:19:36,165 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:19:36,182 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:19:36,185 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:19:36,192 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9220, batch_loss: 0.0512, loss: 0.1996 ||: 100%|##########| 157/157 [00:01<00:00, 129.23it/s]

2023-05-06 14:19:37,390 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:19:37,392 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.922
2023-05-06 14:19:37,394 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1570.800  |       N/A
2023-05-06 14:19:37,396 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.200
2023-05-06 14:19:37,398 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  13483.445  |       N/A





2023-05-06 14:19:38,291 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.115408
2023-05-06 14:19:38,294 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:13
2023-05-06 14:19:38,295 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:19:38,298 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 13G
2023-05-06 14:19:38,301 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:19:38,306 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9478, batch_loss: 0.2081, loss: 0.1332 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.05it/s]

2023-05-06 14:20:04,885 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9282, batch_loss: 0.0170, loss: 0.1888 ||: 100%|##########| 157/157 [00:01<00:00, 156.54it/s]

2023-05-06 14:20:05,898 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:20:05,899 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.948  |     0.928
2023-05-06 14:20:05,905 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1125.336  |       N/A
2023-05-06 14:20:05,908 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.133  |     0.189
2023-05-06 14:20:05,910 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  13547.520  |       N/A





2023-05-06 14:20:06,588 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.292896
2023-05-06 14:20:06,599 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:46
2023-05-06 14:20:06,601 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:20:06,602 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 13G
2023-05-06 14:20:06,604 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:20:06,606 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9831, batch_loss: 0.0021, loss: 0.0500 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.26it/s]

2023-05-06 14:20:33,063 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0183, loss: 0.2476 ||: 100%|##########| 157/157 [00:00<00:00, 157.10it/s]

2023-05-06 14:20:34,072 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:20:34,074 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.925
2023-05-06 14:20:34,077 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1125.461  |       N/A
2023-05-06 14:20:34,078 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.050  |     0.248
2023-05-06 14:20:34,079 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  13547.520  |       N/A





2023-05-06 14:20:34,820 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.219669
2023-05-06 14:20:34,822 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:18
2023-05-06 14:20:34,825 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:20:34,827 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 13G
2023-05-06 14:20:34,829 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:20:34,831 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9971, batch_loss: 0.0386, loss: 0.0111 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.42it/s]

2023-05-06 14:21:01,200 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9202, batch_loss: 0.0040, loss: 0.3428 ||: 100%|##########| 157/157 [00:01<00:00, 137.34it/s]

2023-05-06 14:21:02,350 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:21:02,351 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.920
2023-05-06 14:21:02,353 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1125.277  |       N/A
2023-05-06 14:21:02,355 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.343
2023-05-06 14:21:02,356 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  13547.520  |       N/A





2023-05-06 14:21:03,292 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.467022
2023-05-06 14:21:03,299 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:49
2023-05-06 14:21:03,301 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:21:03,303 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 13G
2023-05-06 14:21:03,308 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:21:03,310 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0005, loss: 0.0016 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.58it/s]

2023-05-06 14:21:30,148 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0010, loss: 0.4122 ||: 100%|##########| 157/157 [00:01<00:00, 148.77it/s]

2023-05-06 14:21:31,216 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:21:31,220 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.923
2023-05-06 14:21:31,221 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1125.446  |       N/A
2023-05-06 14:21:31,225 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.412
2023-05-06 14:21:31,230 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  13547.520  |       N/A





2023-05-06 14:21:31,993 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.691854
2023-05-06 14:21:31,994 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:21:32,071 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 13547.51953125,
  "peak_gpu_0_memory_MB": 1570.7998046875,
  "training_duration": "0:02:22.039657",
  "epoch": 4,
  "training_accuracy": 0.999925,
  "training_loss": 0.0015693517023431924,
  "training_worker_0_memory_MB": 13547.51953125,
  "training_gpu_0_memory_MB": 1125.4462890625,
  "validation_accuracy": 0.9234,
  "validation_loss": 0.4122261857053348,
  "best_validation_accuracy": 0.9282,
  "best_validation_loss": 0.18882613135561063
}
2023-05-06 14:21:32,073 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/12/model.tar.gz


[32m[I 2023-05-06 14:21:35,809][0m Trial 12 finished with value: 0.9282 and parameters: {'embedding_dim': 169, 'max_filter_size': 4, 'num_filters': 90}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:21:35,944 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:21:35,946 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:21:35,949 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:21:35,950 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:21:35,952 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:21:35,954 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:21:35,955 - INFO - allennlp.common.params - type = default
2023-05-06 14:21:35,958 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:21:35,959 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:21:35,960 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:21:35,961 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:21:35,962 - INFO - all

loading instances: 40000it [01:24, 472.03it/s]

2023-05-06 14:23:00,755 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:23:00,757 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:23:00,759 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:23:00,761 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:23:00,762 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:23:00,764 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:23:00,765 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:23:00,766 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:23:00,767 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:23:00,768 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 525.56it/s]

2023-05-06 14:23:10,288 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:23:10,289 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:23:10,294 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:23:10,294 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:23:10,297 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:23:10,298 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:23:10,299 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:23:10,301 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:23:10,303 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:23:10,305 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:23:10,306 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:03, 11878.54it/s]


2023-05-06 14:23:13,823 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:23:13,825 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:23:13,828 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:23:13,829 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:23:13,832 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 168
2023-05-06 14:23:13,834 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:23:13,836 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:23:13,837 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:23:13,838 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:23:18,518 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:23:18,519 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:23:18,528 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9105, batch_loss: 0.0635, loss: 0.2373 ||: 100%|##########| 1250/1250 [00:44<00:00, 28.19it/s]

2023-05-06 14:24:02,748 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:24:02,769 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:24:02,771 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:24:02,780 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9218, batch_loss: 0.0294, loss: 0.2042 ||: 100%|##########| 157/157 [00:01<00:00, 92.21it/s]

2023-05-06 14:24:04,459 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:24:04,462 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.922
2023-05-06 14:24:04,463 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1125.514  |       N/A
2023-05-06 14:24:04,465 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.237  |     0.204
2023-05-06 14:24:04,466 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14126.582  |       N/A





2023-05-06 14:24:05,171 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:46.789207
2023-05-06 14:24:05,179 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:06:54
2023-05-06 14:24:05,181 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:24:05,185 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 14G
2023-05-06 14:24:05,188 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:24:05,190 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9513, batch_loss: 0.1558, loss: 0.1266 ||: 100%|##########| 1250/1250 [00:43<00:00, 28.85it/s]

2023-05-06 14:24:48,526 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9314, batch_loss: 0.0162, loss: 0.1931 ||: 100%|##########| 157/157 [00:01<00:00, 88.05it/s]

2023-05-06 14:24:50,316 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:24:50,318 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.931
2023-05-06 14:24:50,319 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1488.656  |       N/A
2023-05-06 14:24:50,321 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.127  |     0.193
2023-05-06 14:24:50,322 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14190.691  |       N/A





2023-05-06 14:24:51,217 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:46.035931
2023-05-06 14:24:51,220 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:06:07
2023-05-06 14:24:51,222 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:24:51,224 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 14G
2023-05-06 14:24:51,225 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:24:51,228 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9868, batch_loss: 0.0004, loss: 0.0389 ||: 100%|##########| 1250/1250 [00:43<00:00, 28.45it/s]

2023-05-06 14:25:35,171 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9322, batch_loss: 0.0015, loss: 0.2724 ||: 100%|##########| 157/157 [00:01<00:00, 88.05it/s]

2023-05-06 14:25:36,965 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:25:36,967 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.987  |     0.932
2023-05-06 14:25:36,968 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1488.781  |       N/A
2023-05-06 14:25:36,970 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.039  |     0.272
2023-05-06 14:25:36,971 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14190.691  |       N/A





2023-05-06 14:25:37,918 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:46.696131
2023-05-06 14:25:37,920 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:23
2023-05-06 14:25:37,922 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:25:37,923 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 14G
2023-05-06 14:25:37,927 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:25:37,928 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9983, batch_loss: 0.0341, loss: 0.0066 ||: 100%|##########| 1250/1250 [00:43<00:00, 28.75it/s]

2023-05-06 14:26:21,413 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0008, loss: 0.3365 ||: 100%|##########| 157/157 [00:01<00:00, 93.39it/s]

2023-05-06 14:26:23,101 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:26:23,106 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.925
2023-05-06 14:26:23,107 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1488.597  |       N/A
2023-05-06 14:26:23,109 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.007  |     0.337
2023-05-06 14:26:23,111 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14190.691  |       N/A





2023-05-06 14:26:23,784 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:45.861875
2023-05-06 14:26:23,785 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:37
2023-05-06 14:26:23,793 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:26:23,797 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 14G
2023-05-06 14:26:23,801 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:26:23,803 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0016, loss: 0.0012 ||: 100%|##########| 1250/1250 [00:43<00:00, 28.54it/s]

2023-05-06 14:27:07,610 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9276, batch_loss: 0.0005, loss: 0.4416 ||: 100%|##########| 157/157 [00:01<00:00, 93.89it/s]

2023-05-06 14:27:09,290 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:27:09,292 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.928
2023-05-06 14:27:09,294 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1488.766  |       N/A
2023-05-06 14:27:09,296 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.442
2023-05-06 14:27:09,302 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14190.691  |       N/A





2023-05-06 14:27:10,055 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:46.262611
2023-05-06 14:27:10,057 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:27:10,130 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 14190.69140625,
  "peak_gpu_0_memory_MB": 1488.78076171875,
  "training_duration": "0:03:50.907679",
  "epoch": 4,
  "training_accuracy": 0.9998,
  "training_loss": 0.0012061578352280777,
  "training_worker_0_memory_MB": 14190.69140625,
  "training_gpu_0_memory_MB": 1488.76611328125,
  "validation_accuracy": 0.9276,
  "validation_loss": 0.4416416408851888,
  "best_validation_accuracy": 0.9314,
  "best_validation_loss": 0.19308543965742467
}
2023-05-06 14:27:10,132 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/13/model.tar.gz


[32m[I 2023-05-06 14:27:13,532][0m Trial 13 finished with value: 0.9314 and parameters: {'embedding_dim': 168, 'max_filter_size': 5, 'num_filters': 145}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:27:13,619 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:27:13,621 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:27:13,624 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:27:13,626 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:27:13,628 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:27:13,630 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:27:13,632 - INFO - allennlp.common.params - type = default
2023-05-06 14:27:13,634 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:27:13,635 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:27:13,637 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:27:13,638 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:27:13,639 - INFO - all

loading instances: 40000it [01:20, 494.47it/s]

2023-05-06 14:28:36,181 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:28:36,184 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:28:36,189 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:28:36,191 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:28:36,192 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:28:36,195 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:28:36,198 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:28:36,199 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:28:36,202 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:28:36,203 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:12, 392.70it/s]

2023-05-06 14:28:48,948 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:28:48,950 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:28:48,952 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:28:48,954 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:28:48,956 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:28:48,957 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:28:48,959 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:28:48,961 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:28:48,962 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:28:48,963 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:28:48,964 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:04, 8950.07it/s] 


2023-05-06 14:28:53,577 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:28:53,579 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:28:53,582 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:28:53,584 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:28:53,586 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 174
2023-05-06 14:28:53,589 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:28:53,591 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:28:53,592 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:28:53,594 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:28:58,412 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:28:58,414 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:28:58,427 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9073, batch_loss: 0.0830, loss: 0.2424 ||: 100%|##########| 1250/1250 [00:27<00:00, 46.01it/s]

2023-05-06 14:29:25,475 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:29:25,493 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:29:25,496 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:29:25,503 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9244, batch_loss: 0.0397, loss: 0.1944 ||: 100%|##########| 157/157 [00:01<00:00, 144.64it/s]

2023-05-06 14:29:26,570 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:29:26,573 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.924
2023-05-06 14:29:26,574 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1488.834  |       N/A
2023-05-06 14:29:26,575 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.242  |     0.194
2023-05-06 14:29:26,577 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14778.973  |       N/A





2023-05-06 14:29:27,310 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.014512
2023-05-06 14:29:27,320 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:14
2023-05-06 14:29:27,322 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:29:27,326 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 14G
2023-05-06 14:29:27,340 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:29:27,344 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9483, batch_loss: 0.1717, loss: 0.1330 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.94it/s]

2023-05-06 14:29:53,987 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9282, batch_loss: 0.0130, loss: 0.1877 ||: 100%|##########| 157/157 [00:01<00:00, 138.45it/s]

2023-05-06 14:29:55,130 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:29:55,131 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.948  |     0.928
2023-05-06 14:29:55,136 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1127.440  |       N/A
2023-05-06 14:29:55,138 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.133  |     0.188
2023-05-06 14:29:55,139 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14844.824  |       N/A





2023-05-06 14:29:56,070 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.748174
2023-05-06 14:29:56,072 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:47
2023-05-06 14:29:56,074 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:29:56,077 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 14G
2023-05-06 14:29:56,078 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:29:56,082 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9829, batch_loss: 0.0013, loss: 0.0495 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.95it/s]

2023-05-06 14:30:22,715 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0035, loss: 0.2498 ||: 100%|##########| 157/157 [00:01<00:00, 155.12it/s]

2023-05-06 14:30:23,734 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:30:23,740 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.926
2023-05-06 14:30:23,743 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1127.440  |       N/A
2023-05-06 14:30:23,746 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.049  |     0.250
2023-05-06 14:30:23,747 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14845.082  |       N/A





2023-05-06 14:30:24,562 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.487319
2023-05-06 14:30:24,563 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:19
2023-05-06 14:30:24,566 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:30:24,568 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 14G
2023-05-06 14:30:24,570 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:30:24,572 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9971, batch_loss: 0.0236, loss: 0.0108 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.57it/s]

2023-05-06 14:30:51,416 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9224, batch_loss: 0.0007, loss: 0.3001 ||: 100%|##########| 157/157 [00:01<00:00, 133.61it/s]

2023-05-06 14:30:52,599 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:30:52,605 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.922
2023-05-06 14:30:52,607 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1127.440  |       N/A
2023-05-06 14:30:52,609 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.300
2023-05-06 14:30:52,611 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14845.082  |       N/A





2023-05-06 14:30:53,394 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.828059
2023-05-06 14:30:53,395 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:51
2023-05-06 14:30:53,402 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:30:53,406 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 14G
2023-05-06 14:30:53,410 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:30:53,414 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0015, loss: 0.0018 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.83it/s]

2023-05-06 14:31:20,111 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9218, batch_loss: 0.0006, loss: 0.3495 ||: 100%|##########| 157/157 [00:01<00:00, 138.43it/s]

2023-05-06 14:31:21,253 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:31:21,255 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.922
2023-05-06 14:31:21,257 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1127.440  |       N/A
2023-05-06 14:31:21,258 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.350
2023-05-06 14:31:21,260 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  14845.082  |       N/A





2023-05-06 14:31:22,234 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.831699
2023-05-06 14:31:22,237 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:31:22,328 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 14845.08203125,
  "peak_gpu_0_memory_MB": 1488.833984375,
  "training_duration": "0:02:22.957476",
  "epoch": 4,
  "training_accuracy": 0.99985,
  "training_loss": 0.001848954895888164,
  "training_worker_0_memory_MB": 14845.08203125,
  "training_gpu_0_memory_MB": 1127.43994140625,
  "validation_accuracy": 0.9218,
  "validation_loss": 0.349537079265878,
  "best_validation_accuracy": 0.9282,
  "best_validation_loss": 0.18767630565721707
}
2023-05-06 14:31:22,330 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/14/model.tar.gz


[32m[I 2023-05-06 14:31:27,083][0m Trial 14 finished with value: 0.9282 and parameters: {'embedding_dim': 174, 'max_filter_size': 4, 'num_filters': 83}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:31:27,173 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:31:27,175 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:31:27,182 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:31:27,183 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:31:27,184 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:31:27,185 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:31:27,186 - INFO - allennlp.common.params - type = default
2023-05-06 14:31:27,189 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:31:27,192 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:31:27,194 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:31:27,194 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:31:27,196 - INFO - all

loading instances: 40000it [01:20, 496.85it/s]

2023-05-06 14:32:47,754 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:32:47,758 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:32:47,759 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:32:47,761 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:32:47,765 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:32:47,766 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:32:47,767 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:32:47,771 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:32:47,773 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:32:47,774 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 523.12it/s]

2023-05-06 14:32:57,343 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:32:57,352 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:32:57,353 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:32:57,354 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:32:57,356 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:32:57,358 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:32:57,359 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:32:57,360 - INFO - allennlp.common.params - tokens_to_add = None





2023-05-06 14:32:57,361 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:32:57,362 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:32:57,364 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14:32:57,365 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:03, 11528.00it/s]


2023-05-06 14:33:00,982 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:33:00,984 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:33:00,987 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:33:00,989 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:33:00,991 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 184
2023-05-06 14:33:00,992 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:33:00,994 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:33:00,996 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:33:01,002 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:33:11,458 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:33:11,460 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:33:11,468 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9113, batch_loss: 0.0565, loss: 0.2377 ||: 100%|##########| 1250/1250 [00:56<00:00, 22.10it/s]

2023-05-06 14:34:07,882 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:34:07,909 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:34:07,912 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:34:07,920 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9196, batch_loss: 0.0391, loss: 0.2078 ||: 100%|##########| 157/157 [00:02<00:00, 66.02it/s]

2023-05-06 14:34:10,267 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:34:10,271 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.920
2023-05-06 14:34:10,272 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1127.440  |       N/A
2023-05-06 14:34:10,274 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.238  |     0.208
2023-05-06 14:34:10,277 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  15411.969  |       N/A





2023-05-06 14:34:11,050 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:59.742993
2023-05-06 14:34:11,051 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:08:50
2023-05-06 14:34:11,054 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:34:11,056 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 15G
2023-05-06 14:34:11,059 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 2.1G
2023-05-06 14:34:11,068 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9520, batch_loss: 0.1739, loss: 0.1271 ||: 100%|##########| 1250/1250 [00:55<00:00, 22.35it/s]

2023-05-06 14:35:07,005 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9288, batch_loss: 0.0084, loss: 0.2154 ||: 100%|##########| 157/157 [00:02<00:00, 62.46it/s]

2023-05-06 14:35:09,526 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:35:09,527 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.952  |     0.929
2023-05-06 14:35:09,529 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  2161.695  |       N/A
2023-05-06 14:35:09,530 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.127  |     0.215
2023-05-06 14:35:09,531 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  15481.988  |       N/A





2023-05-06 14:35:10,364 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:59.310353
2023-05-06 14:35:10,366 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:07:52
2023-05-06 14:35:10,368 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:35:10,370 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 15G
2023-05-06 14:35:10,373 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 2.1G
2023-05-06 14:35:10,375 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9868, batch_loss: 0.0020, loss: 0.0388 ||: 100%|##########| 1250/1250 [00:56<00:00, 22.25it/s]

2023-05-06 14:36:06,571 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9274, batch_loss: 0.0018, loss: 0.2506 ||: 100%|##########| 157/157 [00:02<00:00, 66.40it/s]

2023-05-06 14:36:08,947 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:36:08,951 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.987  |     0.927
2023-05-06 14:36:08,955 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  2161.819  |       N/A
2023-05-06 14:36:08,957 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.039  |     0.251
2023-05-06 14:36:08,961 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  15481.992  |       N/A





2023-05-06 14:36:09,752 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:59.384066
2023-05-06 14:36:09,754 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:06:54
2023-05-06 14:36:09,756 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:36:09,758 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 15G
2023-05-06 14:36:09,761 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 2.1G
2023-05-06 14:36:09,763 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9977, batch_loss: 0.0848, loss: 0.0080 ||: 100%|##########| 1250/1250 [00:55<00:00, 22.39it/s]

2023-05-06 14:37:05,604 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0021, loss: 0.3295 ||: 100%|##########| 157/157 [00:02<00:00, 63.58it/s]

2023-05-06 14:37:08,082 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:37:08,084 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.925
2023-05-06 14:37:08,086 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  2161.635  |       N/A
2023-05-06 14:37:08,087 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.008  |     0.329
2023-05-06 14:37:08,093 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  15481.992  |       N/A





2023-05-06 14:37:08,876 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:59.119140
2023-05-06 14:37:08,878 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:37:08,962 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 0,
  "peak_worker_0_memory_MB": 15481.9921875,
  "peak_gpu_0_memory_MB": 2161.8193359375,
  "training_duration": "0:03:56.774666",
  "epoch": 3,
  "training_accuracy": 0.997725,
  "training_loss": 0.007975286129316373,
  "training_worker_0_memory_MB": 15481.9921875,
  "training_gpu_0_memory_MB": 2161.63525390625,
  "validation_accuracy": 0.9254,
  "validation_loss": 0.32947119506770023,
  "best_validation_accuracy": 0.9196,
  "best_validation_loss": 0.2078077605551785
}
2023-05-06 14:37:08,964 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/15/model.tar.gz


[32m[I 2023-05-06 14:37:12,681][0m Trial 15 finished with value: 0.9196 and parameters: {'embedding_dim': 184, 'max_filter_size': 5, 'num_filters': 251}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:37:14,230 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:37:14,231 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:37:14,234 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:37:14,238 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:37:14,240 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:37:14,242 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:37:14,243 - INFO - allennlp.common.params - type = default
2023-05-06 14:37:14,245 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:37:14,246 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:37:14,247 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:37:14,248 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:37:14,249 - INFO - all

loading instances: 40000it [01:16, 520.44it/s]

2023-05-06 14:38:31,147 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:38:31,151 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32





2023-05-06 14:38:31,153 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:38:31,157 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:38:31,159 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:38:31,160 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:38:31,163 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:38:31,167 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:38:31,167 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:38:31,169 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 14:38:31,170 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 14:38:31,171 - INFO - allennlp.common.params - validation_data_loader.collate_fn

loading instances: 5000it [00:11, 427.69it/s]

2023-05-06 14:38:42,866 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:38:42,869 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:38:42,872 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:38:42,874 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:38:42,876 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:38:42,880 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:38:42,882 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:38:42,882 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:38:42,888 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:38:42,891 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:38:42,893 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:04, 9576.57it/s]


2023-05-06 14:38:47,278 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:38:47,281 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:38:47,283 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:38:47,284 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:38:47,286 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 146
2023-05-06 14:38:47,288 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:38:47,290 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:38:47,291 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:38:47,292 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:38:53,773 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:38:53,775 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:38:53,781 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9100, batch_loss: 0.0479, loss: 0.2433 ||: 100%|##########| 1250/1250 [00:25<00:00, 48.26it/s]

2023-05-06 14:39:19,571 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:39:19,587 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:39:19,589 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:39:19,596 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9218, batch_loss: 0.0376, loss: 0.2024 ||: 100%|##########| 157/157 [00:01<00:00, 139.29it/s]

2023-05-06 14:39:20,708 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:39:20,710 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.922
2023-05-06 14:39:20,713 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  2161.805  |       N/A
2023-05-06 14:39:20,715 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.243  |     0.202
2023-05-06 14:39:20,720 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16043.980  |       N/A





2023-05-06 14:39:21,411 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.750536
2023-05-06 14:39:21,414 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:03
2023-05-06 14:39:21,416 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:39:21,418 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 16G
2023-05-06 14:39:21,419 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:39:21,421 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9464, batch_loss: 0.1650, loss: 0.1378 ||: 100%|##########| 1250/1250 [00:25<00:00, 48.47it/s]

2023-05-06 14:39:47,218 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9310, batch_loss: 0.0154, loss: 0.1849 ||: 100%|##########| 157/157 [00:00<00:00, 158.14it/s]

2023-05-06 14:39:48,217 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:39:48,219 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.946  |     0.931
2023-05-06 14:39:48,221 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1138.248  |       N/A
2023-05-06 14:39:48,223 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.138  |     0.185
2023-05-06 14:39:48,225 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16102.754  |       N/A





2023-05-06 14:39:48,835 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.418946
2023-05-06 14:39:48,839 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:38
2023-05-06 14:39:48,841 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:39:48,843 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 16G
2023-05-06 14:39:48,850 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:39:48,854 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9813, batch_loss: 0.0064, loss: 0.0536 ||: 100%|##########| 1250/1250 [00:25<00:00, 48.83it/s]

2023-05-06 14:40:14,465 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9270, batch_loss: 0.0066, loss: 0.2300 ||: 100%|##########| 157/157 [00:01<00:00, 135.04it/s]

2023-05-06 14:40:15,636 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:40:15,637 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.927
2023-05-06 14:40:15,639 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1138.373  |       N/A
2023-05-06 14:40:15,642 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.054  |     0.230
2023-05-06 14:40:15,643 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16102.961  |       N/A





2023-05-06 14:40:16,454 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.613189
2023-05-06 14:40:16,457 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:11
2023-05-06 14:40:16,458 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:40:16,460 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 16G
2023-05-06 14:40:16,463 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:40:16,464 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.0506, loss: 0.0113 ||: 100%|##########| 1250/1250 [00:25<00:00, 48.70it/s]

2023-05-06 14:40:42,137 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0009, loss: 0.3206 ||: 100%|##########| 157/157 [00:00<00:00, 157.58it/s]

2023-05-06 14:40:43,140 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:40:43,142 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.926
2023-05-06 14:40:43,146 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1138.188  |       N/A
2023-05-06 14:40:43,147 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.321
2023-05-06 14:40:43,154 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16102.961  |       N/A





2023-05-06 14:40:43,733 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.274079
2023-05-06 14:40:43,734 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:44
2023-05-06 14:40:43,737 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:40:43,739 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 16G
2023-05-06 14:40:43,741 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 14:40:43,744 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0006, loss: 0.0018 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.62it/s]

2023-05-06 14:41:10,001 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0006, loss: 0.3435 ||: 100%|##########| 157/157 [00:01<00:00, 156.03it/s]

2023-05-06 14:41:11,023 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:41:11,029 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.923
2023-05-06 14:41:11,032 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1138.358  |       N/A
2023-05-06 14:41:11,036 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.344
2023-05-06 14:41:11,036 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16102.961  |       N/A





2023-05-06 14:41:11,651 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.914151
2023-05-06 14:41:11,652 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:41:11,721 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 16102.9609375,
  "peak_gpu_0_memory_MB": 2161.8046875,
  "training_duration": "0:02:17.362650",
  "epoch": 4,
  "training_accuracy": 0.999825,
  "training_loss": 0.0018162159795625484,
  "training_worker_0_memory_MB": 16102.9609375,
  "training_gpu_0_memory_MB": 1138.35791015625,
  "validation_accuracy": 0.9234,
  "validation_loss": 0.3435083372862476,
  "best_validation_accuracy": 0.931,
  "best_validation_loss": 0.1849452287765445
}
2023-05-06 14:41:11,725 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/16/model.tar.gz


[32m[I 2023-05-06 14:41:14,605][0m Trial 16 finished with value: 0.931 and parameters: {'embedding_dim': 146, 'max_filter_size': 4, 'num_filters': 119}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:41:14,702 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:41:14,705 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:41:14,708 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:41:14,711 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:41:14,713 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:41:14,715 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:41:14,717 - INFO - allennlp.common.params - type = default
2023-05-06 14:41:14,718 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:41:14,720 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:41:14,721 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:41:14,722 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:41:14,723 - INFO - all

loading instances: 40000it [01:25, 465.89it/s]

2023-05-06 14:42:40,622 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:42:40,635 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:42:40,637 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:42:40,639 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:42:40,640 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:42:40,642 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:42:40,644 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:42:40,645 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:42:40,646 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:42:40,647 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 683.78it/s]

2023-05-06 14:42:47,966 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:42:47,968 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:42:47,972 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:42:47,974 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:42:47,977 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:42:47,979 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:42:47,980 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:42:47,981 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:42:47,984 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:42:47,985 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:42:47,986 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:05, 6785.12it/s]


2023-05-06 14:42:54,109 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:42:54,111 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:42:54,113 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:42:54,115 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:42:54,117 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 152
2023-05-06 14:42:54,119 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:42:54,122 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:42:54,124 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:42:54,125 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:42:58,797 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:42:58,799 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:42:58,806 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9110, batch_loss: 0.0679, loss: 0.2381 ||: 100%|##########| 1250/1250 [00:43<00:00, 28.48it/s]

2023-05-06 14:43:42,549 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:43:42,574 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:43:42,579 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:43:42,585 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9226, batch_loss: 0.0403, loss: 0.2025 ||: 100%|##########| 157/157 [00:01<00:00, 81.62it/s]

2023-05-06 14:43:44,484 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:43:44,486 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.923
2023-05-06 14:43:44,488 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1138.426  |       N/A
2023-05-06 14:43:44,491 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.238  |     0.202
2023-05-06 14:43:44,493 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16694.562  |       N/A





2023-05-06 14:43:45,272 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:46.622698
2023-05-06 14:43:45,277 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:06:52
2023-05-06 14:43:45,281 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:43:45,285 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 16G
2023-05-06 14:43:45,287 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:43:45,289 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9525, batch_loss: 0.1813, loss: 0.1256 ||: 100%|##########| 1250/1250 [00:42<00:00, 29.29it/s]

2023-05-06 14:44:27,978 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9316, batch_loss: 0.0075, loss: 0.1937 ||: 100%|##########| 157/157 [00:01<00:00, 88.94it/s]

2023-05-06 14:44:29,755 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:44:29,757 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.952  |     0.932
2023-05-06 14:44:29,761 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1517.515  |       N/A
2023-05-06 14:44:29,762 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.126  |     0.194
2023-05-06 14:44:29,765 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16752.348  |       N/A





2023-05-06 14:44:30,486 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:45.205269
2023-05-06 14:44:30,488 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:06:04
2023-05-06 14:44:30,490 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:44:30,494 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 16G
2023-05-06 14:44:30,496 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:44:30,498 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9862, batch_loss: 0.0004, loss: 0.0395 ||: 100%|##########| 1250/1250 [00:43<00:00, 28.67it/s]

2023-05-06 14:45:14,099 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0017, loss: 0.2790 ||: 100%|##########| 157/157 [00:01<00:00, 88.76it/s]

2023-05-06 14:45:15,880 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 14:45:15,888 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.986  |     0.925
2023-05-06 14:45:15,891 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1517.640  |       N/A
2023-05-06 14:45:15,894 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.039  |     0.279
2023-05-06 14:45:15,896 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16752.348  |       N/A
2023-05-06 14:45:16,546 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:46.055261
2023-05-06 14:45:16,548 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:20
2023-05-06 14:45:16,550 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:45:16,554 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 16G
2023-05-06 14:45:16,557 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9986, batch_loss: 0.0538, loss: 0.0068 ||: 100%|##########| 1250/1250 [00:43<00:00, 29.04it/s]

2023-05-06 14:45:59,608 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9236, batch_loss: 0.0006, loss: 0.3207 ||: 100%|##########| 157/157 [00:01<00:00, 81.39it/s]

2023-05-06 14:46:01,547 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:46:01,550 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.924
2023-05-06 14:46:01,552 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1517.456  |       N/A
2023-05-06 14:46:01,556 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.007  |     0.321
2023-05-06 14:46:01,558 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16752.348  |       N/A





2023-05-06 14:46:02,377 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:45.826973
2023-05-06 14:46:02,380 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:34
2023-05-06 14:46:02,383 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:46:02,385 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 16G
2023-05-06 14:46:02,387 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 14:46:02,388 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 1.0000, batch_loss: 0.0045, loss: 0.0009 ||: 100%|##########| 1250/1250 [00:43<00:00, 28.93it/s]

2023-05-06 14:46:45,595 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9236, batch_loss: 0.0003, loss: 0.3586 ||: 100%|##########| 157/157 [00:01<00:00, 89.32it/s]

2023-05-06 14:46:47,361 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:46:47,363 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 14:46:47,365 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1517.625  |       N/A
2023-05-06 14:46:47,367 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.359
2023-05-06 14:46:47,368 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  16752.348  |       N/A





2023-05-06 14:46:48,156 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:45.772399
2023-05-06 14:46:48,163 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:46:48,265 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 16752.34765625,
  "peak_gpu_0_memory_MB": 1517.6396484375,
  "training_duration": "0:03:48.712109",
  "epoch": 4,
  "training_accuracy": 0.99995,
  "training_loss": 0.0009397718255400832,
  "training_worker_0_memory_MB": 16752.34765625,
  "training_gpu_0_memory_MB": 1517.625,
  "validation_accuracy": 0.9236,
  "validation_loss": 0.35855625579793565,
  "best_validation_accuracy": 0.9316,
  "best_validation_loss": 0.1936592226373447
}
2023-05-06 14:46:48,267 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/17/model.tar.gz


[32m[I 2023-05-06 14:46:52,494][0m Trial 17 finished with value: 0.9316 and parameters: {'embedding_dim': 152, 'max_filter_size': 5, 'num_filters': 163}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:46:52,625 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:46:52,628 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:46:52,632 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:46:52,634 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:46:52,635 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:46:52,638 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:46:52,640 - INFO - allennlp.common.params - type = default
2023-05-06 14:46:52,645 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:46:52,647 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:46:52,648 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:46:52,651 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:46:52,652 - INFO - all

loading instances: 40000it [01:23, 481.34it/s]

2023-05-06 14:48:15,805 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:48:15,807 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:48:15,808 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:48:15,810 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:48:15,814 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:48:15,815 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:48:15,818 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:48:15,819 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:48:15,820 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:48:15,821 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 532.51it/s]

2023-05-06 14:48:25,219 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:48:25,225 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:48:25,228 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:48:25,230 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:48:25,233 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:48:25,236 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:48:25,239 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:48:25,240 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:48:25,240 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:48:25,241 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:48:25,242 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:03, 11356.38it/s]


2023-05-06 14:48:28,914 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:48:28,917 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:48:28,922 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:48:28,924 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:48:28,926 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 152
2023-05-06 14:48:28,927 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:48:28,931 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:48:28,933 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:48:28,935 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:48:34,024 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:48:34,026 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:48:34,033 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9112, batch_loss: 0.0481, loss: 0.2383 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.67it/s]


2023-05-06 14:49:07,965 - INFO - allennlp.training.gradient_descent_trainer - Validating


  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:49:07,985 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:49:07,987 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:49:07,992 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9224, batch_loss: 0.0401, loss: 0.2020 ||: 100%|##########| 157/157 [00:01<00:00, 108.53it/s]

2023-05-06 14:49:09,423 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:49:09,424 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.922
2023-05-06 14:49:09,426 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1517.693  |       N/A
2023-05-06 14:49:09,428 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.238  |     0.202
2023-05-06 14:49:09,430 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  17349.891  |       N/A





2023-05-06 14:49:10,230 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.362378
2023-05-06 14:49:10,237 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:19
2023-05-06 14:49:10,239 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:49:10,240 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 17G
2023-05-06 14:49:10,245 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 14:49:10,247 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9514, batch_loss: 0.1477, loss: 0.1264 ||: 100%|##########| 1250/1250 [00:33<00:00, 36.84it/s]

2023-05-06 14:49:44,183 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9314, batch_loss: 0.0090, loss: 0.1880 ||: 100%|##########| 157/157 [00:01<00:00, 106.82it/s]

2023-05-06 14:49:45,659 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:49:45,661 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.931
2023-05-06 14:49:45,664 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1305.505  |       N/A
2023-05-06 14:49:45,666 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.126  |     0.188
2023-05-06 14:49:45,667 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  17407.859  |       N/A





2023-05-06 14:49:46,540 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.301084
2023-05-06 14:49:46,542 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:47
2023-05-06 14:49:46,544 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:49:46,546 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 17G
2023-05-06 14:49:46,548 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 14:49:46,550 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9850, batch_loss: 0.0014, loss: 0.0430 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.69it/s]

2023-05-06 14:50:20,619 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9282, batch_loss: 0.0026, loss: 0.2453 ||: 100%|##########| 157/157 [00:01<00:00, 106.75it/s]

2023-05-06 14:50:22,102 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:50:22,104 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.928
2023-05-06 14:50:22,106 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1305.629  |       N/A
2023-05-06 14:50:22,107 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.043  |     0.245
2023-05-06 14:50:22,109 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  17407.859  |       N/A





2023-05-06 14:50:22,917 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.372741
2023-05-06 14:50:22,919 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:12
2023-05-06 14:50:22,925 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:50:22,927 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 17G
2023-05-06 14:50:22,929 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 14:50:22,933 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9982, batch_loss: 0.0565, loss: 0.0076 ||: 100%|##########| 1250/1250 [00:33<00:00, 37.03it/s]

2023-05-06 14:50:56,695 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9232, batch_loss: 0.0005, loss: 0.3244 ||: 100%|##########| 157/157 [00:01<00:00, 107.30it/s]

2023-05-06 14:50:58,168 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:50:58,170 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.923
2023-05-06 14:50:58,172 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1305.445  |       N/A
2023-05-06 14:50:58,174 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.008  |     0.324
2023-05-06 14:50:58,175 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  17407.859  |       N/A





2023-05-06 14:50:58,973 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.048149
2023-05-06 14:50:58,976 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:36
2023-05-06 14:50:58,978 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:50:58,980 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 17G
2023-05-06 14:50:58,982 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 14:50:58,984 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 1.0000, batch_loss: 0.0003, loss: 0.0010 ||: 100%|##########| 1250/1250 [00:33<00:00, 36.81it/s]

2023-05-06 14:51:32,944 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9240, batch_loss: 0.0001, loss: 0.3883 ||: 100%|##########| 157/157 [00:01<00:00, 114.97it/s]

2023-05-06 14:51:34,317 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:51:34,322 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924





2023-05-06 14:51:34,330 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1305.615  |       N/A
2023-05-06 14:51:34,331 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.388
2023-05-06 14:51:34,332 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  17407.859  |       N/A
2023-05-06 14:51:34,996 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.017883
2023-05-06 14:51:35,000 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:51:35,082 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 17407.859375,
  "peak_gpu_0_memory_MB": 1517.69287109375,
  "training_duration": "0:03:00.448523",
  "epoch": 4,
  "training_accuracy": 0.999975,
  "training_loss": 0.0009669119135156507,
  "training_worker_0_memory_MB": 17407.859375,
  "training_gpu_0_memory_MB": 1305.61474609375,
  "valid

[32m[I 2023-05-06 14:51:38,105][0m Trial 18 finished with value: 0.9314 and parameters: {'embedding_dim': 152, 'max_filter_size': 5, 'num_filters': 123}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:51:38,191 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:51:38,193 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:51:38,198 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:51:38,200 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:51:38,202 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:51:38,204 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:51:38,205 - INFO - allennlp.common.params - type = default
2023-05-06 14:51:38,208 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:51:38,212 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:51:38,213 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:51:38,214 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:51:38,215 - INFO - all

loading instances: 40000it [01:27, 457.78it/s]

2023-05-06 14:53:05,643 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:53:05,646 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:53:05,654 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:53:05,657 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:53:05,659 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:53:05,661 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:53:05,664 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:53:05,665 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:53:05,666 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:53:05,672 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:08, 604.16it/s]


2023-05-06 14:53:13,956 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:53:13,959 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:53:13,960 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:53:13,962 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:53:13,964 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:53:13,965 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:53:13,969 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:53:13,970 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:53:13,971 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:53:13,972 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:53:13,973 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14

building vocab: 40000it [00:05, 7379.90it/s] 


2023-05-06 14:53:19,547 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:53:19,553 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:53:19,555 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:53:19,557 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:53:19,559 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 125
2023-05-06 14:53:19,566 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:53:19,567 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:53:19,569 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:53:19,570 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:53:24,239 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:53:24,241 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:53:24,247 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9097, batch_loss: 0.0682, loss: 0.2426 ||: 100%|##########| 1250/1250 [00:21<00:00, 58.52it/s]

2023-05-06 14:53:45,503 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:53:45,522 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:53:45,524 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:53:45,528 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9210, batch_loss: 0.0367, loss: 0.1978 ||: 100%|##########| 157/157 [00:01<00:00, 154.54it/s]

2023-05-06 14:53:46,534 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:53:46,536 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.921
2023-05-06 14:53:46,537 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1305.683  |       N/A
2023-05-06 14:53:46,541 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.243  |     0.198
2023-05-06 14:53:46,543 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  17971.680  |       N/A





2023-05-06 14:53:47,193 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.062809
2023-05-06 14:53:47,201 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:21
2023-05-06 14:53:47,203 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:53:47,205 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 18G
2023-05-06 14:53:47,207 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 878M
2023-05-06 14:53:47,210 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9501, batch_loss: 0.2757, loss: 0.1308 ||: 100%|##########| 1250/1250 [00:21<00:00, 56.89it/s]

2023-05-06 14:54:09,188 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9330, batch_loss: 0.0099, loss: 0.1842 ||: 100%|##########| 157/157 [00:01<00:00, 153.39it/s]

2023-05-06 14:54:10,221 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:54:10,223 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.933
2023-05-06 14:54:10,224 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   877.517  |       N/A
2023-05-06 14:54:10,226 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.131  |     0.184
2023-05-06 14:54:10,228 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  18019.797  |       N/A





2023-05-06 14:54:10,891 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.688696
2023-05-06 14:54:10,893 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:04
2023-05-06 14:54:10,895 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:54:10,896 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 18G
2023-05-06 14:54:10,898 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 878M
2023-05-06 14:54:10,900 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9827, batch_loss: 0.0015, loss: 0.0502 ||: 100%|##########| 1250/1250 [00:21<00:00, 56.85it/s]

2023-05-06 14:54:32,895 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0027, loss: 0.2555 ||: 100%|##########| 157/157 [00:00<00:00, 185.75it/s]

2023-05-06 14:54:33,753 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 14:54:33,754 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.926
2023-05-06 14:54:33,759 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   877.517  |       N/A
2023-05-06 14:54:33,760 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.050  |     0.256
2023-05-06 14:54:33,763 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  18019.797  |       N/A
2023-05-06 14:54:34,318 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.423138
2023-05-06 14:54:34,319 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:42
2023-05-06 14:54:34,324 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:54:34,325 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 18G
2023-05-06 14:54:34,327 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9974, batch_loss: 0.0837, loss: 0.0111 ||: 100%|##########| 1250/1250 [00:21<00:00, 57.34it/s]

2023-05-06 14:54:56,132 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9236, batch_loss: 0.0012, loss: 0.2977 ||: 100%|##########| 157/157 [00:00<00:00, 162.59it/s]

2023-05-06 14:54:57,109 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:54:57,111 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.924
2023-05-06 14:54:57,112 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   877.517  |       N/A
2023-05-06 14:54:57,114 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.298
2023-05-06 14:54:57,116 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  18019.797  |       N/A





2023-05-06 14:54:57,768 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.444623
2023-05-06 14:54:57,772 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:19
2023-05-06 14:54:57,776 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:54:57,777 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 18G
2023-05-06 14:54:57,778 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 878M
2023-05-06 14:54:57,780 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0006, loss: 0.0019 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.49it/s]

2023-05-06 14:55:19,914 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0008, loss: 0.3532 ||: 100%|##########| 157/157 [00:00<00:00, 162.87it/s]

2023-05-06 14:55:20,888 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:55:20,890 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.926
2023-05-06 14:55:20,891 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   877.517  |       N/A
2023-05-06 14:55:20,892 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.353
2023-05-06 14:55:20,895 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  18019.797  |       N/A





2023-05-06 14:55:21,411 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.635077
2023-05-06 14:55:21,412 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:55:21,473 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 18019.796875,
  "peak_gpu_0_memory_MB": 1305.6826171875,
  "training_duration": "0:01:56.756872",
  "epoch": 4,
  "training_accuracy": 0.999825,
  "training_loss": 0.001900963989754382,
  "training_worker_0_memory_MB": 18019.796875,
  "training_gpu_0_memory_MB": 877.5166015625,
  "validation_accuracy": 0.926,
  "validation_loss": 0.3531664236842441,
  "best_validation_accuracy": 0.933,
  "best_validation_loss": 0.18424697459977904
}
2023-05-06 14:55:21,475 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/19/model.tar.gz


[32m[I 2023-05-06 14:55:23,989][0m Trial 19 finished with value: 0.933 and parameters: {'embedding_dim': 125, 'max_filter_size': 5, 'num_filters': 62}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:55:24,085 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:55:24,086 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:55:24,089 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:55:24,093 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:55:24,095 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:55:24,097 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:55:24,099 - INFO - allennlp.common.params - type = default
2023-05-06 14:55:24,100 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:55:24,101 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:55:24,102 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:55:24,103 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:55:24,105 - INFO - all

loading instances: 40000it [01:23, 479.92it/s]

2023-05-06 14:56:47,490 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 14:56:47,496 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 14:56:47,497 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 14:56:47,499 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 14:56:47,501 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 14:56:47,505 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 14:56:47,506 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 14:56:47,507 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 14:56:47,508 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 14:56:47,512 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:06, 716.59it/s]

2023-05-06 14:56:54,495 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 14:56:54,498 - INFO - allennlp.common.params - type = from_instances
2023-05-06 14:56:54,501 - INFO - allennlp.common.params - min_count = None
2023-05-06 14:56:54,507 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 14:56:54,508 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 14:56:54,509 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 14:56:54,510 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 14:56:54,511 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 14:56:54,512 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 14:56:54,513 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 14:56:54,514 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 14


building vocab: 40000it [00:04, 8615.63it/s]


2023-05-06 14:56:59,388 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 14:56:59,392 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 14:56:59,395 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 14:56:59,397 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 14:56:59,399 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 111
2023-05-06 14:56:59,400 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 14:56:59,402 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 14:56:59,403 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 14:56:59,405 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 14:57:05,708 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:57:05,710 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:57:05,719 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9103, batch_loss: 0.0785, loss: 0.2415 ||: 100%|##########| 1250/1250 [00:21<00:00, 56.86it/s]

2023-05-06 14:57:27,579 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 14:57:27,594 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 14:57:27,595 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 14:57:27,600 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9242, batch_loss: 0.0213, loss: 0.1921 ||: 100%|##########| 157/157 [00:00<00:00, 164.54it/s]

2023-05-06 14:57:28,541 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:57:28,548 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.924
2023-05-06 14:57:28,549 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   877.517  |       N/A
2023-05-06 14:57:28,551 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.192
2023-05-06 14:57:28,555 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  18587.773  |       N/A





2023-05-06 14:57:29,003 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.414767
2023-05-06 14:57:29,007 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:26
2023-05-06 14:57:29,009 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 14:57:29,011 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 18G
2023-05-06 14:57:29,017 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 834M
2023-05-06 14:57:29,018 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9499, batch_loss: 0.1636, loss: 0.1296 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.48it/s]

2023-05-06 14:57:51,559 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9332, batch_loss: 0.0131, loss: 0.1855 ||: 100%|##########| 157/157 [00:01<00:00, 134.11it/s]

2023-05-06 14:57:52,737 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:57:52,739 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.933
2023-05-06 14:57:52,741 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   834.021  |       N/A
2023-05-06 14:57:52,743 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.186
2023-05-06 14:57:52,745 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  18630.188  |       N/A





2023-05-06 14:57:53,394 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.384751
2023-05-06 14:57:53,395 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:08
2023-05-06 14:57:53,403 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 14:57:53,405 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 18G
2023-05-06 14:57:53,407 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 834M
2023-05-06 14:57:53,409 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9832, batch_loss: 0.0008, loss: 0.0497 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.94it/s]

2023-05-06 14:58:16,168 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9270, batch_loss: 0.0025, loss: 0.2732 ||: 100%|##########| 157/157 [00:00<00:00, 164.82it/s]

2023-05-06 14:58:17,127 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:58:17,128 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.927
2023-05-06 14:58:17,131 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   834.146  |       N/A
2023-05-06 14:58:17,133 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.050  |     0.273
2023-05-06 14:58:17,134 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  18630.188  |       N/A





2023-05-06 14:58:17,653 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.249411
2023-05-06 14:58:17,654 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:46
2023-05-06 14:58:17,660 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 14:58:17,664 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 18G
2023-05-06 14:58:17,667 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 834M
2023-05-06 14:58:17,673 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.0475, loss: 0.0112 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.50it/s]

2023-05-06 14:58:40,207 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9278, batch_loss: 0.0003, loss: 0.3225 ||: 100%|##########| 157/157 [00:01<00:00, 150.36it/s]

2023-05-06 14:58:41,258 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:58:41,262 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.928
2023-05-06 14:58:41,263 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   833.962  |       N/A
2023-05-06 14:58:41,265 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.323
2023-05-06 14:58:41,266 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  18630.188  |       N/A





2023-05-06 14:58:41,867 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.206999
2023-05-06 14:58:41,874 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:23
2023-05-06 14:58:41,876 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 14:58:41,877 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 18G
2023-05-06 14:58:41,879 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 834M
2023-05-06 14:58:41,880 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0006, loss: 0.0021 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.52it/s]

2023-05-06 14:59:04,818 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0004, loss: 0.3604 ||: 100%|##########| 157/157 [00:01<00:00, 147.94it/s]

2023-05-06 14:59:05,886 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 14:59:05,888 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.926
2023-05-06 14:59:05,889 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   834.131  |       N/A
2023-05-06 14:59:05,891 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.360
2023-05-06 14:59:05,893 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  18630.188  |       N/A





2023-05-06 14:59:06,342 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.466840
2023-05-06 14:59:06,344 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 14:59:06,393 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 18630.1875,
  "peak_gpu_0_memory_MB": 877.5166015625,
  "training_duration": "0:02:00.297632",
  "epoch": 4,
  "training_accuracy": 0.9997,
  "training_loss": 0.0021318943956168367,
  "training_worker_0_memory_MB": 18630.1875,
  "training_gpu_0_memory_MB": 834.13134765625,
  "validation_accuracy": 0.926,
  "validation_loss": 0.36037566871486104,
  "best_validation_accuracy": 0.9332,
  "best_validation_loss": 0.18553844397401165
}
2023-05-06 14:59:06,395 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/20/model.tar.gz


[32m[I 2023-05-06 14:59:08,618][0m Trial 20 finished with value: 0.9332 and parameters: {'embedding_dim': 111, 'max_filter_size': 5, 'num_filters': 66}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 14:59:08,710 - INFO - allennlp.common.params - evaluation = None
2023-05-06 14:59:08,712 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 14:59:08,718 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 14:59:08,720 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 14:59:08,727 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 14:59:08,728 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 14:59:08,729 - INFO - allennlp.common.params - type = default
2023-05-06 14:59:08,730 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 14:59:08,735 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 14:59:08,737 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 14:59:08,737 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 14:59:08,743 - INFO - all

loading instances: 40000it [01:22, 485.08it/s]

2023-05-06 15:00:31,247 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:00:31,249 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:00:31,251 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:00:31,253 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:00:31,255 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:00:31,256 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:00:31,257 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:00:31,258 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:00:31,259 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:00:31,260 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:11, 436.17it/s]

2023-05-06 15:00:42,730 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:00:42,733 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:00:42,739 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:00:42,740 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:00:42,742 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:00:42,743 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:00:42,745 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:00:42,747 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:00:42,749 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:00:42,752 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:00:42,754 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:05, 7045.21it/s]


2023-05-06 15:00:48,642 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:00:48,649 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:00:48,651 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:00:48,656 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:00:48,657 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 113
2023-05-06 15:00:48,659 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:00:48,664 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:00:48,665 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:00:48,667 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:00:54,109 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:00:54,110 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:00:54,116 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9086, batch_loss: 0.0544, loss: 0.2436 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.69it/s]

2023-05-06 15:01:16,428 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:01:16,447 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:01:16,448 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:01:16,453 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9220, batch_loss: 0.0486, loss: 0.1975 ||: 100%|##########| 157/157 [00:00<00:00, 160.59it/s]

2023-05-06 15:01:17,413 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:01:17,415 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.922
2023-05-06 15:01:17,416 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   834.199  |       N/A
2023-05-06 15:01:17,418 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.244  |     0.198
2023-05-06 15:01:17,420 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19226.855  |       N/A





2023-05-06 15:01:17,899 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.924037
2023-05-06 15:01:17,900 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:30
2023-05-06 15:01:17,911 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:01:17,913 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 19G
2023-05-06 15:01:17,916 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 848M
2023-05-06 15:01:17,918 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9500, batch_loss: 0.1298, loss: 0.1306 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.77it/s]

2023-05-06 15:01:40,747 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9302, batch_loss: 0.0099, loss: 0.1867 ||: 100%|##########| 157/157 [00:01<00:00, 139.88it/s]

2023-05-06 15:01:41,880 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:01:41,881 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.930
2023-05-06 15:01:41,882 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   847.798  |       N/A
2023-05-06 15:01:41,883 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.131  |     0.187
2023-05-06 15:01:41,886 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19269.914  |       N/A





2023-05-06 15:01:42,479 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.568903
2023-05-06 15:01:42,482 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:11
2023-05-06 15:01:42,488 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:01:42,489 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 19G
2023-05-06 15:01:42,491 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 848M
2023-05-06 15:01:42,494 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9829, batch_loss: 0.0006, loss: 0.0497 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.60it/s]

2023-05-06 15:02:05,393 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9266, batch_loss: 0.0062, loss: 0.2713 ||: 100%|##########| 157/157 [00:00<00:00, 162.62it/s]

2023-05-06 15:02:06,368 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:02:06,370 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.927
2023-05-06 15:02:06,371 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   847.923  |       N/A
2023-05-06 15:02:06,373 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.050  |     0.271
2023-05-06 15:02:06,375 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19269.914  |       N/A





2023-05-06 15:02:06,893 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.404950
2023-05-06 15:02:06,894 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:48
2023-05-06 15:02:06,901 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:02:06,905 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 19G
2023-05-06 15:02:06,908 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 848M
2023-05-06 15:02:06,912 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9973, batch_loss: 0.0390, loss: 0.0115 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.50it/s]

2023-05-06 15:02:29,854 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9256, batch_loss: 0.0013, loss: 0.3118 ||: 100%|##########| 157/157 [00:01<00:00, 141.98it/s]

2023-05-06 15:02:30,966 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:02:30,968 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.926
2023-05-06 15:02:30,970 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   847.739  |       N/A
2023-05-06 15:02:30,972 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.312
2023-05-06 15:02:30,973 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19269.914  |       N/A





2023-05-06 15:02:31,586 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.684166
2023-05-06 15:02:31,588 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:25
2023-05-06 15:02:31,589 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:02:31,591 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 19G
2023-05-06 15:02:31,595 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 848M
2023-05-06 15:02:31,598 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0009, loss: 0.0025 ||: 100%|##########| 1250/1250 [00:23<00:00, 53.13it/s]

2023-05-06 15:02:55,133 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9230, batch_loss: 0.0005, loss: 0.3541 ||: 100%|##########| 157/157 [00:00<00:00, 161.94it/s]

2023-05-06 15:02:56,110 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:02:56,112 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.923
2023-05-06 15:02:56,114 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   847.908  |       N/A
2023-05-06 15:02:56,116 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.354
2023-05-06 15:02:56,118 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19269.914  |       N/A





2023-05-06 15:02:56,608 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.018661
2023-05-06 15:02:56,610 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:02:56,670 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 19269.9140625,
  "peak_gpu_0_memory_MB": 847.9228515625,
  "training_duration": "0:02:02.134639",
  "epoch": 4,
  "training_accuracy": 0.999575,
  "training_loss": 0.0024642448470120143,
  "training_worker_0_memory_MB": 19269.9140625,
  "training_gpu_0_memory_MB": 847.908203125,
  "validation_accuracy": 0.923,
  "validation_loss": 0.35414588870628955,
  "best_validation_accuracy": 0.9302,
  "best_validation_loss": 0.18671730921194432
}
2023-05-06 15:02:56,671 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/21/model.tar.gz


[32m[I 2023-05-06 15:02:58,975][0m Trial 21 finished with value: 0.9302 and parameters: {'embedding_dim': 113, 'max_filter_size': 5, 'num_filters': 67}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:02:59,070 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:02:59,079 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:02:59,081 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:02:59,082 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:02:59,084 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:02:59,087 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:02:59,088 - INFO - allennlp.common.params - type = default
2023-05-06 15:02:59,091 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:02:59,092 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:02:59,094 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:02:59,096 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:02:59,098 - INFO - all

loading instances: 40000it [01:23, 477.58it/s]

2023-05-06 15:04:22,897 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:04:22,906 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:04:22,907 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:04:22,909 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:04:22,912 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:04:22,914 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:04:22,916 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:04:22,922 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:04:22,924 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:04:22,927 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 677.90it/s]


2023-05-06 15:04:30,311 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:04:30,313 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:04:30,317 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:04:30,319 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:04:30,320 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:04:30,321 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:04:30,322 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:04:30,323 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:04:30,324 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:04:30,325 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:04:30,328 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15

building vocab: 40000it [00:06, 6655.04it/s]


2023-05-06 15:04:36,566 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:04:36,569 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:04:36,571 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:04:36,574 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:04:36,576 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 129
2023-05-06 15:04:36,578 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:04:36,579 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:04:36,580 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:04:36,582 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:04:41,492 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:04:41,494 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:04:41,500 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9102, batch_loss: 0.0927, loss: 0.2428 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.21it/s]

2023-05-06 15:05:03,620 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:05:03,637 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:05:03,640 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:05:03,644 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9236, batch_loss: 0.0515, loss: 0.1941 ||: 100%|##########| 157/157 [00:00<00:00, 184.52it/s]

2023-05-06 15:05:04,483 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:05:04,485 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.924
2023-05-06 15:05:04,488 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   847.976  |       N/A
2023-05-06 15:05:04,490 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.243  |     0.194
2023-05-06 15:05:04,493 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19862.949  |       N/A





2023-05-06 15:05:05,007 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.635530
2023-05-06 15:05:05,009 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:27
2023-05-06 15:05:05,022 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:05:05,026 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 19G
2023-05-06 15:05:05,027 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 887M
2023-05-06 15:05:05,031 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9486, batch_loss: 0.1832, loss: 0.1323 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.81it/s]

2023-05-06 15:05:27,845 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9320, batch_loss: 0.0232, loss: 0.1810 ||: 100%|##########| 157/157 [00:01<00:00, 153.11it/s]

2023-05-06 15:05:28,876 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:05:28,879 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.932
2023-05-06 15:05:28,881 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   887.463  |       N/A
2023-05-06 15:05:28,883 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.132  |     0.181
2023-05-06 15:05:28,885 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19906.996  |       N/A





2023-05-06 15:05:29,539 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.517636
2023-05-06 15:05:29,542 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:10
2023-05-06 15:05:29,544 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:05:29,546 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 19G
2023-05-06 15:05:29,548 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 887M
2023-05-06 15:05:29,549 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9821, batch_loss: 0.0032, loss: 0.0527 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.80it/s]

2023-05-06 15:05:52,366 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9304, batch_loss: 0.0080, loss: 0.2376 ||: 100%|##########| 157/157 [00:00<00:00, 188.31it/s]

2023-05-06 15:05:53,207 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:05:53,209 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.982  |     0.930
2023-05-06 15:05:53,211 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   887.463  |       N/A
2023-05-06 15:05:53,213 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.053  |     0.238
2023-05-06 15:05:53,214 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19906.996  |       N/A





2023-05-06 15:05:53,790 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.246367
2023-05-06 15:05:53,792 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:47
2023-05-06 15:05:53,798 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:05:53,803 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 19G
2023-05-06 15:05:53,807 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 887M
2023-05-06 15:05:53,810 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9966, batch_loss: 0.0159, loss: 0.0126 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.88it/s]

2023-05-06 15:06:16,596 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0014, loss: 0.2911 ||: 100%|##########| 157/157 [00:00<00:00, 161.89it/s]


2023-05-06 15:06:17,577 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:06:17,579 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.926
2023-05-06 15:06:17,581 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   887.463  |       N/A
2023-05-06 15:06:17,582 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.013  |     0.291
2023-05-06 15:06:17,584 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19906.996  |       N/A
2023-05-06 15:06:18,272 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.473421
2023-05-06 15:06:18,274 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:24
2023-05-06 15:06:18,276 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:06:18,282 - INFO - allennlp.training.gradient_descent_t

accuracy: 0.9998, batch_loss: 0.0020, loss: 0.0020 ||: 100%|##########| 1250/1250 [00:23<00:00, 53.78it/s]

2023-05-06 15:06:41,537 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0010, loss: 0.3550 ||: 100%|##########| 157/157 [00:00<00:00, 186.76it/s]

2023-05-06 15:06:42,384 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:06:42,385 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925





2023-05-06 15:06:42,388 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   887.463  |       N/A
2023-05-06 15:06:42,395 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.355
2023-05-06 15:06:42,397 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  19906.996  |       N/A
2023-05-06 15:06:42,921 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.645100
2023-05-06 15:06:42,923 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:06:42,987 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 19906.99609375,
  "peak_gpu_0_memory_MB": 887.46337890625,
  "training_duration": "0:02:01.011936",
  "epoch": 4,
  "training_accuracy": 0.9998,
  "training_loss": 0.0020427591097992265,
  "training_worker_0_memory_MB": 19906.99609375,
  "training_gpu_0_memory_MB": 887.46337890625,
  "valid

[32m[I 2023-05-06 15:06:45,622][0m Trial 22 finished with value: 0.932 and parameters: {'embedding_dim': 129, 'max_filter_size': 5, 'num_filters': 58}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:06:45,716 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:06:45,719 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:06:45,722 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:06:45,724 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:06:45,726 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:06:45,727 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:06:45,729 - INFO - allennlp.common.params - type = default
2023-05-06 15:06:45,732 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:06:45,734 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:06:45,735 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:06:45,736 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:06:45,737 - INFO - all

loading instances: 40000it [01:24, 473.06it/s]

2023-05-06 15:08:10,348 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:08:10,352 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:08:10,356 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:08:10,357 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:08:10,360 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:08:10,361 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:08:10,364 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:08:10,366 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:08:10,367 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:08:10,370 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:12, 389.04it/s]

2023-05-06 15:08:23,232 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:08:23,234 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:08:23,236 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:08:23,238 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:08:23,239 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:08:23,241 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:08:23,243 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:08:23,244 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:08:23,246 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:08:23,247 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:08:23,248 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:04, 9372.47it/s] 


2023-05-06 15:08:27,669 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:08:27,671 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:08:27,674 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:08:27,676 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:08:27,679 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 105
2023-05-06 15:08:27,680 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:08:27,682 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:08:27,684 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:08:27,685 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:08:32,283 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:08:32,285 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:08:32,292 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9090, batch_loss: 0.0538, loss: 0.2458 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.84it/s]

2023-05-06 15:08:49,577 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:08:49,590 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:08:49,590 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:08:49,606 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9208, batch_loss: 0.0656, loss: 0.1965 ||: 100%|##########| 157/157 [00:00<00:00, 195.75it/s]

2023-05-06 15:08:50,386 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:08:50,388 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.921
2023-05-06 15:08:50,389 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   887.463  |       N/A
2023-05-06 15:08:50,391 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.246  |     0.196
2023-05-06 15:08:50,392 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  20433.512  |       N/A





2023-05-06 15:08:50,834 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.667064
2023-05-06 15:08:50,836 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:43
2023-05-06 15:08:50,840 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:08:50,842 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 20G
2023-05-06 15:08:50,843 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 777M
2023-05-06 15:08:50,846 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9477, batch_loss: 0.1972, loss: 0.1364 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.96it/s]

2023-05-06 15:09:08,465 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9302, batch_loss: 0.0187, loss: 0.1806 ||: 100%|##########| 157/157 [00:00<00:00, 202.80it/s]

2023-05-06 15:09:09,249 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:09:09,251 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.948  |     0.930
2023-05-06 15:09:09,256 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   777.346  |       N/A
2023-05-06 15:09:09,258 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.136  |     0.181
2023-05-06 15:09:09,259 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  20473.305  |       N/A





2023-05-06 15:09:09,657 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.817025
2023-05-06 15:09:09,658 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:28
2023-05-06 15:09:09,660 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:09:09,667 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 20G
2023-05-06 15:09:09,671 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 777M
2023-05-06 15:09:09,674 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9798, batch_loss: 0.0134, loss: 0.0582 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.62it/s]

2023-05-06 15:09:27,382 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9278, batch_loss: 0.0056, loss: 0.2287 ||: 100%|##########| 157/157 [00:00<00:00, 170.77it/s]

2023-05-06 15:09:28,308 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:09:28,310 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.980  |     0.928
2023-05-06 15:09:28,313 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   777.471  |       N/A
2023-05-06 15:09:28,314 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.058  |     0.229
2023-05-06 15:09:28,316 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  20473.500  |       N/A





2023-05-06 15:09:28,901 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.240906
2023-05-06 15:09:28,903 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:10
2023-05-06 15:09:28,905 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:09:28,906 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 20G
2023-05-06 15:09:28,908 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 777M
2023-05-06 15:09:28,910 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9963, batch_loss: 0.0663, loss: 0.0144 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.59it/s]


2023-05-06 15:09:46,626 - INFO - allennlp.training.gradient_descent_trainer - Validating


accuracy: 0.9240, batch_loss: 0.0064, loss: 0.2664 ||: 100%|##########| 157/157 [00:00<00:00, 173.42it/s]

2023-05-06 15:09:47,539 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:09:47,543 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.924
2023-05-06 15:09:47,545 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   777.287  |       N/A
2023-05-06 15:09:47,546 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.014  |     0.266
2023-05-06 15:09:47,548 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  20473.500  |       N/A





2023-05-06 15:09:48,125 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.220150
2023-05-06 15:09:48,133 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:53
2023-05-06 15:09:48,134 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:09:48,137 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 20G
2023-05-06 15:09:48,139 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 777M
2023-05-06 15:09:48,142 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0060, loss: 0.0023 ||: 100%|##########| 1250/1250 [00:17<00:00, 69.77it/s]

2023-05-06 15:10:06,067 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0062, loss: 0.3169 ||: 100%|##########| 157/157 [00:00<00:00, 159.24it/s]

2023-05-06 15:10:07,060 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:10:07,062 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.926
2023-05-06 15:10:07,064 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   777.456  |       N/A
2023-05-06 15:10:07,066 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.317
2023-05-06 15:10:07,068 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  20473.500  |       N/A





2023-05-06 15:10:07,660 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.526007
2023-05-06 15:10:07,666 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:10:07,726 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 20473.5,
  "peak_gpu_0_memory_MB": 887.46337890625,
  "training_duration": "0:01:34.892936",
  "epoch": 4,
  "training_accuracy": 0.99975,
  "training_loss": 0.0023168970493054075,
  "training_worker_0_memory_MB": 20473.5,
  "training_gpu_0_memory_MB": 777.4560546875,
  "validation_accuracy": 0.9258,
  "validation_loss": 0.31685664589593937,
  "best_validation_accuracy": 0.9302,
  "best_validation_loss": 0.18060435252679383
}
2023-05-06 15:10:07,729 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/23/model.tar.gz


[32m[I 2023-05-06 15:10:10,614][0m Trial 23 finished with value: 0.9302 and parameters: {'embedding_dim': 105, 'max_filter_size': 4, 'num_filters': 75}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:10:10,735 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:10:10,738 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:10:10,741 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:10:10,744 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:10:10,745 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:10:10,750 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:10:10,753 - INFO - allennlp.common.params - type = default
2023-05-06 15:10:10,760 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:10:10,762 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:10:10,765 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:10:10,766 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:10:10,768 - INFO - all

loading instances: 40000it [01:21, 492.07it/s]

2023-05-06 15:11:32,152 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess





2023-05-06 15:11:32,155 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:11:32,158 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:11:32,161 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:11:32,165 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:11:32,166 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:11:32,167 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:11:32,168 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:11:32,169 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:11:32,171 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 15:11:32,172 - INFO - allennlp.common.params - validation_data_loader.quiet = 

loading instances: 5000it [00:09, 538.07it/s]

2023-05-06 15:11:41,473 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:11:41,476 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:11:41,483 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:11:41,484 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:11:41,487 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:11:41,488 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:11:41,489 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:11:41,490 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:11:41,491 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:11:41,492 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:11:41,493 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:03, 11366.26it/s]


2023-05-06 15:11:45,173 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:11:45,175 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:11:45,180 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:11:45,182 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:11:45,184 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 139
2023-05-06 15:11:45,186 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:11:45,189 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:11:45,190 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:11:45,193 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:11:50,390 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:11:50,392 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:11:50,398 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9107, batch_loss: 0.0674, loss: 0.2398 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.89it/s]

2023-05-06 15:12:19,421 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:12:19,435 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:12:19,436 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:12:19,441 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9238, batch_loss: 0.0467, loss: 0.1993 ||: 100%|##########| 157/157 [00:01<00:00, 133.88it/s]

2023-05-06 15:12:20,599 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:12:20,600 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.924
2023-05-06 15:12:20,607 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   777.524  |       N/A
2023-05-06 15:12:20,608 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.240  |     0.199





2023-05-06 15:12:20,612 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21074.863  |       N/A
2023-05-06 15:12:21,209 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.941639
2023-05-06 15:12:21,210 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:32
2023-05-06 15:12:21,216 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:12:21,219 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 21G
2023-05-06 15:12:21,224 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 15:12:21,225 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9517, batch_loss: 0.2603, loss: 0.1273 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.03it/s]

2023-05-06 15:12:50,970 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9314, batch_loss: 0.0207, loss: 0.1960 ||: 100%|##########| 157/157 [00:01<00:00, 133.76it/s]

2023-05-06 15:12:52,151 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:12:52,152 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.952  |     0.931





2023-05-06 15:12:52,160 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1131.051  |       N/A
2023-05-06 15:12:52,162 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.127  |     0.196
2023-05-06 15:12:52,166 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21127.609  |       N/A
2023-05-06 15:12:52,755 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.539754
2023-05-06 15:12:52,759 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:07
2023-05-06 15:12:52,763 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:12:52,765 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 21G
2023-05-06 15:12:52,767 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 15:12:52,769 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9853, batch_loss: 0.0016, loss: 0.0431 ||: 100%|##########| 1250/1250 [00:30<00:00, 41.44it/s]

2023-05-06 15:13:22,946 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9288, batch_loss: 0.0016, loss: 0.2642 ||: 100%|##########| 157/157 [00:01<00:00, 125.02it/s]

2023-05-06 15:13:24,210 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:13:24,211 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.929





2023-05-06 15:13:24,219 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1131.176  |       N/A
2023-05-06 15:13:24,220 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.043  |     0.264
2023-05-06 15:13:24,221 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21127.609  |       N/A
2023-05-06 15:13:24,866 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.103456
2023-05-06 15:13:24,868 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:39
2023-05-06 15:13:24,875 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:13:24,879 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 21G
2023-05-06 15:13:24,883 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 15:13:24,886 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9975, batch_loss: 0.1314, loss: 0.0091 ||: 100%|##########| 1250/1250 [00:30<00:00, 41.51it/s]

2023-05-06 15:13:55,011 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9246, batch_loss: 0.0003, loss: 0.3515 ||: 100%|##########| 157/157 [00:01<00:00, 115.76it/s]

2023-05-06 15:13:56,378 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:13:56,380 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.925
2023-05-06 15:13:56,381 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1130.992  |       N/A
2023-05-06 15:13:56,383 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.009  |     0.351
2023-05-06 15:13:56,385 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21127.609  |       N/A





2023-05-06 15:13:57,209 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.334361
2023-05-06 15:13:57,212 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:09
2023-05-06 15:13:57,214 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:13:57,215 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 21G
2023-05-06 15:13:57,218 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 15:13:57,220 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0014, loss: 0.0015 ||: 100%|##########| 1250/1250 [00:30<00:00, 41.58it/s]

2023-05-06 15:14:27,290 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9248, batch_loss: 0.0000, loss: 0.3852 ||: 100%|##########| 157/157 [00:01<00:00, 119.68it/s]

2023-05-06 15:14:28,611 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:14:28,613 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 15:14:28,615 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1131.161  |       N/A
2023-05-06 15:14:28,617 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.385
2023-05-06 15:14:28,620 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21127.609  |       N/A





2023-05-06 15:14:29,403 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.189153
2023-05-06 15:14:29,406 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:14:29,481 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 21127.609375,
  "peak_gpu_0_memory_MB": 1131.17578125,
  "training_duration": "0:02:38.342891",
  "epoch": 4,
  "training_accuracy": 0.999825,
  "training_loss": 0.00146735792247091,
  "training_worker_0_memory_MB": 21127.609375,
  "training_gpu_0_memory_MB": 1131.1611328125,
  "validation_accuracy": 0.9248,
  "validation_loss": 0.38521592645768227,
  "best_validation_accuracy": 0.9314,
  "best_validation_loss": 0.19596654257980312
}
2023-05-06 15:14:29,483 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/24/model.tar.gz


[32m[I 2023-05-06 15:14:33,336][0m Trial 24 finished with value: 0.9314 and parameters: {'embedding_dim': 139, 'max_filter_size': 5, 'num_filters': 100}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:14:33,428 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:14:33,430 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:14:33,432 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:14:33,435 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:14:33,437 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:14:33,441 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:14:33,443 - INFO - allennlp.common.params - type = default
2023-05-06 15:14:33,445 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:14:33,447 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:14:33,448 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:14:33,449 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:14:33,454 - INFO - all

loading instances: 40000it [01:25, 467.74it/s]

2023-05-06 15:15:59,010 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:15:59,012 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:15:59,015 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:15:59,017 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:15:59,019 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:15:59,021 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:15:59,023 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:15:59,025 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:15:59,028 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:15:59,030 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 640.54it/s]

2023-05-06 15:16:06,846 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:16:06,848 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:16:06,850 - INFO - allennlp.common.params - min_count = None





2023-05-06 15:16:06,853 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:16:06,855 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:16:06,856 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:16:06,857 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:16:06,858 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:16:06,859 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:16:06,860 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:16:06,861 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15:16:06,862 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:03, 10125.60it/s]


2023-05-06 15:16:11,037 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:16:11,040 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:16:11,042 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:16:11,048 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:16:11,050 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 96
2023-05-06 15:16:11,051 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:16:11,053 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:16:11,054 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:16:11,058 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:16:18,089 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:16:18,092 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:16:18,099 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9055, batch_loss: 0.0649, loss: 0.2479 ||: 100%|##########| 1250/1250 [00:17<00:00, 72.44it/s]

2023-05-06 15:16:35,232 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:16:35,248 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:16:35,251 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:16:35,255 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9254, batch_loss: 0.0615, loss: 0.1887 ||: 100%|##########| 157/157 [00:00<00:00, 210.28it/s]

2023-05-06 15:16:35,990 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:16:35,991 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.905  |     0.925
2023-05-06 15:16:35,995 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1131.229  |       N/A
2023-05-06 15:16:35,998 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.248  |     0.189
2023-05-06 15:16:36,000 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21681.887  |       N/A





2023-05-06 15:16:36,393 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.425831
2023-05-06 15:16:36,399 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:42
2023-05-06 15:16:36,401 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:16:36,403 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 21G
2023-05-06 15:16:36,404 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 660M
2023-05-06 15:16:36,406 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9487, batch_loss: 0.2120, loss: 0.1355 ||: 100%|##########| 1250/1250 [00:17<00:00, 72.37it/s]

2023-05-06 15:16:53,685 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9330, batch_loss: 0.0212, loss: 0.1786 ||: 100%|##########| 157/157 [00:00<00:00, 217.99it/s]

2023-05-06 15:16:54,414 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:16:54,421 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.933
2023-05-06 15:16:54,425 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.200  |       N/A
2023-05-06 15:16:54,428 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.135  |     0.179
2023-05-06 15:16:54,430 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21718.695  |       N/A





2023-05-06 15:16:54,841 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.440606
2023-05-06 15:16:54,843 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:25
2023-05-06 15:16:54,847 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:16:54,852 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 21G
2023-05-06 15:16:54,854 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 660M
2023-05-06 15:16:54,858 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9812, batch_loss: 0.0065, loss: 0.0565 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.20it/s]

2023-05-06 15:17:12,418 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9288, batch_loss: 0.0074, loss: 0.2256 ||: 100%|##########| 157/157 [00:00<00:00, 208.48it/s]

2023-05-06 15:17:13,179 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:17:13,181 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.929
2023-05-06 15:17:13,181 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.200  |       N/A
2023-05-06 15:17:13,186 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.056  |     0.226
2023-05-06 15:17:13,187 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21718.695  |       N/A





2023-05-06 15:17:13,598 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.750818
2023-05-06 15:17:13,600 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:08
2023-05-06 15:17:13,602 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:17:13,604 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 21G
2023-05-06 15:17:13,607 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 660M
2023-05-06 15:17:13,609 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9964, batch_loss: 0.0366, loss: 0.0147 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.97it/s]

2023-05-06 15:17:30,981 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0017, loss: 0.2637 ||: 100%|##########| 157/157 [00:00<00:00, 216.73it/s]

2023-05-06 15:17:31,717 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:17:31,719 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.926
2023-05-06 15:17:31,720 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.200  |       N/A
2023-05-06 15:17:31,722 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.015  |     0.264
2023-05-06 15:17:31,724 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21718.695  |       N/A





2023-05-06 15:17:32,158 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.555742
2023-05-06 15:17:32,159 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:50
2023-05-06 15:17:32,168 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:17:32,170 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 21G
2023-05-06 15:17:32,173 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 660M
2023-05-06 15:17:32,174 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0046, loss: 0.0029 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.52it/s]

2023-05-06 15:17:49,657 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9248, batch_loss: 0.0016, loss: 0.3051 ||: 100%|##########| 157/157 [00:00<00:00, 217.32it/s]

2023-05-06 15:17:50,390 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:17:50,392 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 15:17:50,393 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.200  |       N/A
2023-05-06 15:17:50,395 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.305
2023-05-06 15:17:50,396 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  21718.695  |       N/A





2023-05-06 15:17:50,814 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.646782
2023-05-06 15:17:50,816 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:17:50,867 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 21718.6953125,
  "peak_gpu_0_memory_MB": 1131.22900390625,
  "training_duration": "0:01:32.422888",
  "epoch": 4,
  "training_accuracy": 0.99955,
  "training_loss": 0.0028527734886389226,
  "training_worker_0_memory_MB": 21718.6953125,
  "training_gpu_0_memory_MB": 660.19970703125,
  "validation_accuracy": 0.9248,
  "validation_loss": 0.30505607428346543,
  "best_validation_accuracy": 0.933,
  "best_validation_loss": 0.17856878083745006
}
2023-05-06 15:17:50,870 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/25/model.tar.gz


[32m[I 2023-05-06 15:17:52,818][0m Trial 25 finished with value: 0.933 and parameters: {'embedding_dim': 96, 'max_filter_size': 5, 'num_filters': 43}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:17:52,912 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:17:52,914 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:17:52,917 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:17:52,919 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:17:52,921 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:17:52,922 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:17:52,924 - INFO - allennlp.common.params - type = default
2023-05-06 15:17:52,925 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:17:52,926 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:17:52,928 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:17:52,928 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:17:52,930 - INFO - all

loading instances: 40000it [01:23, 476.80it/s]

2023-05-06 15:19:18,498 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:19:18,508 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:19:18,509 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:19:18,511 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:19:18,513 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:19:18,515 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:19:18,516 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:19:18,517 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:19:18,518 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:19:18,519 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 639.61it/s]

2023-05-06 15:19:26,343 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:19:26,345 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:19:26,350 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:19:26,352 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:19:26,354 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:19:26,356 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:19:26,359 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:19:26,361 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:19:26,363 - INFO - allennlp.common.params - min_pretrained_embeddings = None





2023-05-06 15:19:26,366 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:19:26,368 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15:19:26,370 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:05, 6959.89it/s]


2023-05-06 15:19:32,262 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:19:32,264 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:19:32,267 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:19:32,269 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:19:32,271 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 119
2023-05-06 15:19:32,273 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:19:32,275 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:19:32,276 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:19:32,277 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:19:36,931 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:19:36,933 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:19:36,939 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9051, batch_loss: 0.0778, loss: 0.2504 ||: 100%|##########| 1250/1250 [00:18<00:00, 67.25it/s]

2023-05-06 15:19:55,418 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:19:55,431 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:19:55,433 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:19:55,438 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9210, batch_loss: 0.0339, loss: 0.1989 ||: 100%|##########| 157/157 [00:00<00:00, 192.52it/s]

2023-05-06 15:19:56,241 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:19:56,243 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.905  |     0.921
2023-05-06 15:19:56,245 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.200  |       N/A
2023-05-06 15:19:56,247 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.250  |     0.199
2023-05-06 15:19:56,249 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22338.035  |       N/A





2023-05-06 15:19:56,725 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.903103
2023-05-06 15:19:56,727 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:54
2023-05-06 15:19:56,729 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:19:56,730 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 22G
2023-05-06 15:19:56,732 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 807M
2023-05-06 15:19:56,734 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9440, batch_loss: 0.2376, loss: 0.1417 ||: 100%|##########| 1250/1250 [00:18<00:00, 65.86it/s]

2023-05-06 15:20:15,719 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0268, loss: 0.1927 ||: 100%|##########| 157/157 [00:00<00:00, 195.75it/s]

2023-05-06 15:20:16,535 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:20:16,537 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.944  |     0.926
2023-05-06 15:20:16,539 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   807.344  |       N/A
2023-05-06 15:20:16,541 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.142  |     0.193
2023-05-06 15:20:16,543 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22383.289  |       N/A





2023-05-06 15:20:17,034 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.305259
2023-05-06 15:20:17,035 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:38
2023-05-06 15:20:17,042 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:20:17,046 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 22G
2023-05-06 15:20:17,049 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 807M
2023-05-06 15:20:17,052 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9769, batch_loss: 0.0030, loss: 0.0645 ||: 100%|##########| 1250/1250 [00:19<00:00, 64.82it/s]

2023-05-06 15:20:36,342 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9246, batch_loss: 0.0041, loss: 0.2445 ||: 100%|##########| 157/157 [00:00<00:00, 162.65it/s]

2023-05-06 15:20:37,314 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:20:37,316 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.977  |     0.925
2023-05-06 15:20:37,320 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   807.468  |       N/A
2023-05-06 15:20:37,321 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.064  |     0.244
2023-05-06 15:20:37,323 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22383.289  |       N/A





2023-05-06 15:20:37,968 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.925888
2023-05-06 15:20:37,976 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:21
2023-05-06 15:20:37,977 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:20:37,979 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 22G
2023-05-06 15:20:37,981 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 807M
2023-05-06 15:20:37,987 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9952, batch_loss: 0.0360, loss: 0.0180 ||: 100%|##########| 1250/1250 [00:19<00:00, 65.69it/s]

2023-05-06 15:20:57,022 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0102, loss: 0.3089 ||: 100%|##########| 157/157 [00:00<00:00, 160.45it/s]

2023-05-06 15:20:58,009 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:20:58,011 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.924
2023-05-06 15:20:58,013 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   807.284  |       N/A
2023-05-06 15:20:58,014 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.018  |     0.309
2023-05-06 15:20:58,017 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22383.289  |       N/A





2023-05-06 15:20:58,619 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.641432
2023-05-06 15:20:58,621 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:01
2023-05-06 15:20:58,623 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:20:58,625 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 22G
2023-05-06 15:20:58,627 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 807M
2023-05-06 15:20:58,630 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9994, batch_loss: 0.0006, loss: 0.0035 ||: 100%|##########| 1250/1250 [00:19<00:00, 65.51it/s]

2023-05-06 15:21:17,717 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9220, batch_loss: 0.0236, loss: 0.3596 ||: 100%|##########| 157/157 [00:00<00:00, 169.46it/s]

2023-05-06 15:21:18,652 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:21:18,660 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.922
2023-05-06 15:21:18,661 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   807.454  |       N/A
2023-05-06 15:21:18,663 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.004  |     0.360
2023-05-06 15:21:18,666 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22383.289  |       N/A





2023-05-06 15:21:19,150 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.526705
2023-05-06 15:21:19,151 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:21:19,210 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 22383.2890625,
  "peak_gpu_0_memory_MB": 807.46826171875,
  "training_duration": "0:01:41.829864",
  "epoch": 4,
  "training_accuracy": 0.9994,
  "training_loss": 0.003524077090926585,
  "training_worker_0_memory_MB": 22383.2890625,
  "training_gpu_0_memory_MB": 807.45361328125,
  "validation_accuracy": 0.922,
  "validation_loss": 0.3595891371502832,
  "best_validation_accuracy": 0.9258,
  "best_validation_loss": 0.1927464895519861
}
2023-05-06 15:21:19,211 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/26/model.tar.gz


[32m[I 2023-05-06 15:21:21,597][0m Trial 26 finished with value: 0.9258 and parameters: {'embedding_dim': 119, 'max_filter_size': 4, 'num_filters': 67}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:21:21,698 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:21:21,700 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:21:21,704 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:21:21,707 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:21:21,708 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:21:21,710 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:21:21,711 - INFO - allennlp.common.params - type = default
2023-05-06 15:21:21,713 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:21:21,715 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:21:21,716 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:21:21,717 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:21:21,718 - INFO - all

loading instances: 40000it [01:26, 461.66it/s]

2023-05-06 15:22:50,014 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:22:50,018 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:22:50,019 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:22:50,020 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:22:50,021 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:22:50,022 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:22:50,024 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:22:50,025 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:22:50,027 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:22:50,028 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 659.83it/s]


2023-05-06 15:22:57,612 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:22:57,618 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:22:57,620 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:22:57,621 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:22:57,623 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:22:57,624 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:22:57,627 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:22:57,628 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:22:57,629 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:22:57,630 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:22:57,633 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15

building vocab: 40000it [00:05, 6738.19it/s]


2023-05-06 15:23:03,815 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:23:03,820 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:23:03,823 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:23:03,826 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:23:03,828 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 98
2023-05-06 15:23:03,832 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:23:03,833 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:23:03,834 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:23:03,837 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:23:09,123 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:23:09,125 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:23:09,131 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9090, batch_loss: 0.0869, loss: 0.2437 ||: 100%|##########| 1250/1250 [00:23<00:00, 53.54it/s]

2023-05-06 15:23:32,352 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:23:32,366 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:23:32,367 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:23:32,374 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9230, batch_loss: 0.0406, loss: 0.1951 ||: 100%|##########| 157/157 [00:01<00:00, 134.96it/s]

2023-05-06 15:23:33,527 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:23:33,528 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.923
2023-05-06 15:23:33,532 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   807.521  |       N/A
2023-05-06 15:23:33,534 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.244  |     0.195
2023-05-06 15:23:33,535 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22953.750  |       N/A





2023-05-06 15:23:34,106 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.114982
2023-05-06 15:23:34,116 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:40
2023-05-06 15:23:34,118 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:23:34,120 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 22G
2023-05-06 15:23:34,122 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 966M
2023-05-06 15:23:34,123 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9500, batch_loss: 0.1885, loss: 0.1320 ||: 100%|##########| 1250/1250 [00:24<00:00, 51.89it/s]

2023-05-06 15:23:58,220 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9314, batch_loss: 0.0180, loss: 0.1895 ||: 100%|##########| 157/157 [00:01<00:00, 144.27it/s]

2023-05-06 15:23:59,314 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:23:59,315 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.931
2023-05-06 15:23:59,320 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   965.569  |       N/A
2023-05-06 15:23:59,321 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.132  |     0.189
2023-05-06 15:23:59,324 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22996.504  |       N/A





2023-05-06 15:23:59,736 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.618322
2023-05-06 15:23:59,743 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:21
2023-05-06 15:23:59,744 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:23:59,745 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 22G
2023-05-06 15:23:59,747 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 966M
2023-05-06 15:23:59,750 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9819, batch_loss: 0.0023, loss: 0.0507 ||: 100%|##########| 1250/1250 [00:23<00:00, 53.38it/s]

2023-05-06 15:24:23,170 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9252, batch_loss: 0.0040, loss: 0.2505 ||: 100%|##########| 157/157 [00:01<00:00, 152.26it/s]

2023-05-06 15:24:24,208 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:24:24,210 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.982  |     0.925
2023-05-06 15:24:24,212 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   965.693  |       N/A
2023-05-06 15:24:24,213 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.051  |     0.251
2023-05-06 15:24:24,215 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22996.516  |       N/A





2023-05-06 15:24:24,656 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.911879
2023-05-06 15:24:24,658 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:55
2023-05-06 15:24:24,664 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:24:24,667 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 22G
2023-05-06 15:24:24,669 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 966M
2023-05-06 15:24:24,671 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.0228, loss: 0.0114 ||: 100%|##########| 1250/1250 [00:23<00:00, 53.15it/s]


2023-05-06 15:24:48,199 - INFO - allennlp.training.gradient_descent_trainer - Validating


accuracy: 0.9248, batch_loss: 0.0033, loss: 0.3194 ||: 100%|##########| 157/157 [00:01<00:00, 123.47it/s]

2023-05-06 15:24:49,480 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:24:49,481 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.925
2023-05-06 15:24:49,483 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   965.509  |       N/A
2023-05-06 15:24:49,485 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.319
2023-05-06 15:24:49,486 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22996.516  |       N/A





2023-05-06 15:24:49,991 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.326622
2023-05-06 15:24:49,993 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:30
2023-05-06 15:24:49,995 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:24:49,997 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 22G
2023-05-06 15:24:49,999 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 966M
2023-05-06 15:24:50,003 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0012, loss: 0.0021 ||: 100%|##########| 1250/1250 [00:23<00:00, 52.59it/s]

2023-05-06 15:25:13,780 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9240, batch_loss: 0.0002, loss: 0.3930 ||: 100%|##########| 157/157 [00:01<00:00, 149.39it/s]

2023-05-06 15:25:14,838 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:25:14,839 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924





2023-05-06 15:25:14,847 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   965.679  |       N/A
2023-05-06 15:25:14,850 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.393
2023-05-06 15:25:14,851 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  22996.516  |       N/A
2023-05-06 15:25:15,318 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.322327
2023-05-06 15:25:15,319 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:25:15,378 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 22996.515625,
  "peak_gpu_0_memory_MB": 965.693359375,
  "training_duration": "0:02:05.846035",
  "epoch": 4,
  "training_accuracy": 0.999725,
  "training_loss": 0.0021321935927830055,
  "training_worker_0_memory_MB": 22996.515625,
  "training_gpu_0_memory_MB": 965.6787109375,
  "validation

[32m[I 2023-05-06 15:25:17,392][0m Trial 27 finished with value: 0.9314 and parameters: {'embedding_dim': 98, 'max_filter_size': 5, 'num_filters': 103}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:25:17,499 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:25:17,502 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:25:17,504 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:25:17,506 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:25:17,508 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:25:17,510 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:25:17,512 - INFO - allennlp.common.params - type = default
2023-05-06 15:25:17,513 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:25:17,515 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:25:17,516 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:25:17,517 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:25:17,518 - INFO - all

loading instances: 40000it [01:27, 457.31it/s]

2023-05-06 15:26:45,026 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:26:45,029 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:26:45,030 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:26:45,032 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:26:45,034 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:26:45,036 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:26:45,038 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:26:45,041 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:26:45,045 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:26:45,046 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:08, 561.81it/s]

2023-05-06 15:26:53,958 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:26:53,962 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:26:53,964 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:26:53,968 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:26:53,969 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:26:53,970 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:26:53,972 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:26:53,973 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:26:53,974 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:26:53,975 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:26:53,976 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:05, 7939.81it/s] 


2023-05-06 15:26:59,169 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:26:59,170 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:26:59,173 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:26:59,175 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:26:59,179 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 137
2023-05-06 15:26:59,180 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:26:59,182 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:26:59,183 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:26:59,185 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:27:03,807 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:27:03,810 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:27:03,816 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9083, batch_loss: 0.0779, loss: 0.2462 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.68it/s]

2023-05-06 15:27:23,333 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:27:23,344 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:27:23,345 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:27:23,350 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9226, batch_loss: 0.0391, loss: 0.1928 ||: 100%|##########| 157/157 [00:00<00:00, 205.83it/s]

2023-05-06 15:27:24,103 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:27:24,105 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.923
2023-05-06 15:27:24,107 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   965.747  |       N/A
2023-05-06 15:27:24,109 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.246  |     0.193
2023-05-06 15:27:24,111 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  23617.316  |       N/A





2023-05-06 15:27:24,688 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.993463
2023-05-06 15:27:24,696 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:03
2023-05-06 15:27:24,697 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:27:24,698 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 23G
2023-05-06 15:27:24,703 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 867M
2023-05-06 15:27:24,705 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9465, batch_loss: 0.2453, loss: 0.1382 ||: 100%|##########| 1250/1250 [00:19<00:00, 62.52it/s]

2023-05-06 15:27:44,711 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9246, batch_loss: 0.0122, loss: 0.1887 ||: 100%|##########| 157/157 [00:00<00:00, 181.06it/s]

2023-05-06 15:27:45,586 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:27:45,588 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.946  |     0.925
2023-05-06 15:27:45,589 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   866.926  |       N/A
2023-05-06 15:27:45,591 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.138  |     0.189
2023-05-06 15:27:45,592 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  23669.137  |       N/A





2023-05-06 15:27:46,319 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.621522
2023-05-06 15:27:46,326 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:47
2023-05-06 15:27:46,328 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:27:46,329 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 23G
2023-05-06 15:27:46,331 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 867M
2023-05-06 15:27:46,333 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9787, batch_loss: 0.0112, loss: 0.0614 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.43it/s]

2023-05-06 15:28:06,047 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0131, loss: 0.2291 ||: 100%|##########| 157/157 [00:00<00:00, 159.70it/s]

2023-05-06 15:28:07,040 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:28:07,043 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.979  |     0.923
2023-05-06 15:28:07,046 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   866.926  |       N/A
2023-05-06 15:28:07,047 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.061  |     0.229
2023-05-06 15:28:07,049 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  23669.137  |       N/A





2023-05-06 15:28:07,815 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.487360
2023-05-06 15:28:07,817 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:27
2023-05-06 15:28:07,821 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:28:07,822 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 23G
2023-05-06 15:28:07,826 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 867M
2023-05-06 15:28:07,829 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9955, batch_loss: 0.0426, loss: 0.0167 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.42it/s]

2023-05-06 15:28:27,545 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9206, batch_loss: 0.0040, loss: 0.2816 ||: 100%|##########| 157/157 [00:00<00:00, 217.65it/s]

2023-05-06 15:28:28,273 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:28:28,274 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.921
2023-05-06 15:28:28,280 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   866.926  |       N/A
2023-05-06 15:28:28,282 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.017  |     0.282
2023-05-06 15:28:28,285 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  23669.137  |       N/A





2023-05-06 15:28:28,853 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.032906
2023-05-06 15:28:28,855 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:06
2023-05-06 15:28:28,857 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:28:28,859 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 23G
2023-05-06 15:28:28,861 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 867M
2023-05-06 15:28:28,863 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9994, batch_loss: 0.0046, loss: 0.0036 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.50it/s]

2023-05-06 15:28:48,556 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9162, batch_loss: 0.0106, loss: 0.3270 ||: 100%|##########| 157/157 [00:00<00:00, 217.71it/s]

2023-05-06 15:28:49,288 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:28:49,291 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.916
2023-05-06 15:28:49,291 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   866.926  |       N/A
2023-05-06 15:28:49,292 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.004  |     0.327
2023-05-06 15:28:49,293 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  23669.137  |       N/A





2023-05-06 15:28:49,905 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.047503
2023-05-06 15:28:49,906 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:28:49,976 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 23669.13671875,
  "peak_gpu_0_memory_MB": 965.74658203125,
  "training_duration": "0:01:45.593198",
  "epoch": 4,
  "training_accuracy": 0.99945,
  "training_loss": 0.003638402843475342,
  "training_worker_0_memory_MB": 23669.13671875,
  "training_gpu_0_memory_MB": 866.92626953125,
  "validation_accuracy": 0.9162,
  "validation_loss": 0.3270320732526149,
  "best_validation_accuracy": 0.9246,
  "best_validation_loss": 0.18866893487751105
}
2023-05-06 15:28:49,978 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/28/model.tar.gz


[32m[I 2023-05-06 15:28:52,778][0m Trial 28 finished with value: 0.9246 and parameters: {'embedding_dim': 137, 'max_filter_size': 4, 'num_filters': 55}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:28:52,877 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:28:52,879 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:28:52,887 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:28:52,892 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:28:52,893 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:28:52,895 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:28:52,900 - INFO - allennlp.common.params - type = default
2023-05-06 15:28:52,903 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:28:52,904 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:28:52,905 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:28:52,907 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:28:52,909 - INFO - all

loading instances: 40000it [01:26, 461.85it/s]

2023-05-06 15:30:21,197 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:30:21,200 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:30:21,202 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:30:21,204 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:30:21,206 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:30:21,207 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:30:21,208 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:30:21,209 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:30:21,210 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:30:21,211 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 504.07it/s]

2023-05-06 15:30:31,138 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:30:31,140 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:30:31,142 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:30:31,144 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:30:31,146 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:30:31,147 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:30:31,149 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:30:31,151 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:30:31,152 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:30:31,153 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:30:31,154 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:04, 8968.08it/s] 


2023-05-06 15:30:35,783 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:30:35,787 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:30:35,795 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:30:35,798 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:30:35,799 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 118
2023-05-06 15:30:35,800 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:30:35,802 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:30:35,804 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:30:35,805 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:30:40,828 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:30:40,830 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:30:40,837 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9084, batch_loss: 0.0603, loss: 0.2467 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.66it/s]

2023-05-06 15:31:00,980 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:31:00,993 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:31:00,994 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:31:01,008 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9212, batch_loss: 0.0428, loss: 0.1956 ||: 100%|##########| 157/157 [00:00<00:00, 159.74it/s]

2023-05-06 15:31:01,970 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:31:01,974 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.921
2023-05-06 15:31:01,977 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   866.926  |       N/A
2023-05-06 15:31:01,979 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.196
2023-05-06 15:31:01,982 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24244.887  |       N/A





2023-05-06 15:31:02,629 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.931160
2023-05-06 15:31:02,636 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:11
2023-05-06 15:31:02,638 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:31:02,639 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 24G
2023-05-06 15:31:02,641 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 754M
2023-05-06 15:31:02,647 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9466, batch_loss: 0.2255, loss: 0.1387 ||: 100%|##########| 1250/1250 [00:20<00:00, 60.91it/s]

2023-05-06 15:31:23,177 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9284, batch_loss: 0.0132, loss: 0.1841 ||: 100%|##########| 157/157 [00:01<00:00, 154.29it/s]

2023-05-06 15:31:24,201 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:31:24,208 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.928
2023-05-06 15:31:24,209 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   754.208  |       N/A
2023-05-06 15:31:24,211 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.139  |     0.184
2023-05-06 15:31:24,214 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24293.910  |       N/A





2023-05-06 15:31:24,877 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.239040
2023-05-06 15:31:24,879 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:54
2023-05-06 15:31:24,881 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:31:24,883 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 24G
2023-05-06 15:31:24,886 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 754M
2023-05-06 15:31:24,888 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9780, batch_loss: 0.0020, loss: 0.0623 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.39it/s]

2023-05-06 15:31:45,261 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9296, batch_loss: 0.0028, loss: 0.2222 ||: 100%|##########| 157/157 [00:00<00:00, 186.67it/s]

2023-05-06 15:31:46,115 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:31:46,118 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.978  |     0.930
2023-05-06 15:31:46,121 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   754.208  |       N/A
2023-05-06 15:31:46,124 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.062  |     0.222
2023-05-06 15:31:46,126 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24293.910  |       N/A





2023-05-06 15:31:46,653 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.772239
2023-05-06 15:31:46,655 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:32
2023-05-06 15:31:46,658 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:31:46,660 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 24G
2023-05-06 15:31:46,662 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 754M
2023-05-06 15:31:46,664 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9951, batch_loss: 0.0656, loss: 0.0179 ||: 100%|##########| 1250/1250 [00:19<00:00, 62.85it/s]

2023-05-06 15:32:06,560 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9222, batch_loss: 0.0008, loss: 0.2683 ||: 100%|##########| 157/157 [00:00<00:00, 195.36it/s]

2023-05-06 15:32:07,372 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:32:07,380 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.922
2023-05-06 15:32:07,381 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   754.208  |       N/A
2023-05-06 15:32:07,384 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.018  |     0.268
2023-05-06 15:32:07,387 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24293.910  |       N/A





2023-05-06 15:32:07,904 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.245832
2023-05-06 15:32:07,905 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:10
2023-05-06 15:32:07,911 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:32:07,919 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 24G
2023-05-06 15:32:07,922 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 754M
2023-05-06 15:32:07,923 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0008, loss: 0.0035 ||: 100%|##########| 1250/1250 [00:20<00:00, 62.30it/s]

2023-05-06 15:32:27,996 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9230, batch_loss: 0.0003, loss: 0.3307 ||: 100%|##########| 157/157 [00:00<00:00, 192.56it/s]

2023-05-06 15:32:28,822 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:32:28,828 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.923
2023-05-06 15:32:28,829 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   754.208  |       N/A
2023-05-06 15:32:28,831 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.004  |     0.331
2023-05-06 15:32:28,835 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24293.910  |       N/A





2023-05-06 15:32:29,382 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.471395
2023-05-06 15:32:29,384 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:32:29,443 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 24293.91015625,
  "peak_gpu_0_memory_MB": 866.92626953125,
  "training_duration": "0:01:48.124100",
  "epoch": 4,
  "training_accuracy": 0.99955,
  "training_loss": 0.0035285144547116945,
  "training_worker_0_memory_MB": 24293.91015625,
  "training_gpu_0_memory_MB": 754.20751953125,
  "validation_accuracy": 0.923,
  "validation_loss": 0.33072657888617285,
  "best_validation_accuracy": 0.9284,
  "best_validation_loss": 0.18406028328404114
}
2023-05-06 15:32:29,445 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/29/model.tar.gz


[32m[I 2023-05-06 15:32:32,082][0m Trial 29 finished with value: 0.9284 and parameters: {'embedding_dim': 118, 'max_filter_size': 5, 'num_filters': 34}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:32:32,218 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:32:32,222 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:32:32,225 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:32:32,228 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:32:32,230 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:32:32,234 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:32:32,236 - INFO - allennlp.common.params - type = default
2023-05-06 15:32:32,239 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:32:32,240 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:32:32,241 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:32:32,242 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:32:32,245 - INFO - all

loading instances: 40000it [01:30, 441.99it/s]

2023-05-06 15:34:02,793 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:34:02,795 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:34:02,799 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:34:02,803 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:34:02,804 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:34:02,805 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:34:02,807 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:34:02,809 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:34:02,810 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:34:02,812 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:10, 477.92it/s]

2023-05-06 15:34:13,281 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.





2023-05-06 15:34:13,290 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:34:13,293 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:34:13,297 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:34:13,299 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:34:13,302 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:34:13,304 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:34:13,305 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:34:13,307 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:34:13,313 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:34:13,313 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15:34:13,314 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:03, 10621.84it/s]


2023-05-06 15:34:17,236 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:34:17,238 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:34:17,245 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:34:17,249 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:34:17,253 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 160
2023-05-06 15:34:17,256 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:34:17,257 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:34:17,259 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:34:17,260 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:34:22,295 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:34:22,296 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:34:22,307 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9081, batch_loss: 0.0544, loss: 0.2454 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.88it/s]

2023-05-06 15:34:47,247 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:34:47,260 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:34:47,265 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:34:47,272 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9204, batch_loss: 0.0509, loss: 0.2013 ||: 100%|##########| 157/157 [00:01<00:00, 136.84it/s]

2023-05-06 15:34:48,401 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:34:48,402 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.920
2023-05-06 15:34:48,404 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   754.208  |       N/A
2023-05-06 15:34:48,405 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.245  |     0.201
2023-05-06 15:34:48,407 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24826.520  |       N/A





2023-05-06 15:34:49,236 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.059975
2023-05-06 15:34:49,243 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:56
2023-05-06 15:34:49,245 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:34:49,251 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 24G
2023-05-06 15:34:49,255 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 15:34:49,256 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9488, batch_loss: 0.2042, loss: 0.1336 ||: 100%|##########| 1250/1250 [00:24<00:00, 50.02it/s]

2023-05-06 15:35:14,257 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9284, batch_loss: 0.0153, loss: 0.1858 ||: 100%|##########| 157/157 [00:00<00:00, 161.23it/s]

2023-05-06 15:35:15,242 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:35:15,245 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.928
2023-05-06 15:35:15,246 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1034.078  |       N/A
2023-05-06 15:35:15,248 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.134  |     0.186
2023-05-06 15:35:15,250 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24886.824  |       N/A





2023-05-06 15:35:15,985 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.739333
2023-05-06 15:35:15,986 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:32
2023-05-06 15:35:15,999 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:35:16,001 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 24G
2023-05-06 15:35:16,004 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 15:35:16,006 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9828, batch_loss: 0.0027, loss: 0.0509 ||: 100%|##########| 1250/1250 [00:24<00:00, 50.23it/s]

2023-05-06 15:35:40,895 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9278, batch_loss: 0.0098, loss: 0.2416 ||: 100%|##########| 157/157 [00:01<00:00, 134.27it/s]

2023-05-06 15:35:42,074 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:35:42,075 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.928
2023-05-06 15:35:42,077 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1034.078  |       N/A
2023-05-06 15:35:42,079 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.051  |     0.242





2023-05-06 15:35:42,081 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24886.824  |       N/A
2023-05-06 15:35:42,937 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.938706
2023-05-06 15:35:42,939 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:06
2023-05-06 15:35:42,945 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:35:42,949 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 24G
2023-05-06 15:35:42,953 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 15:35:42,956 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.0452, loss: 0.0106 ||: 100%|##########| 1250/1250 [00:24<00:00, 50.54it/s]

2023-05-06 15:36:07,700 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9252, batch_loss: 0.0015, loss: 0.2955 ||: 100%|##########| 157/157 [00:00<00:00, 162.21it/s]

2023-05-06 15:36:08,678 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:36:08,682 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.925
2023-05-06 15:36:08,686 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1034.078  |       N/A
2023-05-06 15:36:08,688 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.295
2023-05-06 15:36:08,689 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24886.824  |       N/A





2023-05-06 15:36:09,379 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.434632
2023-05-06 15:36:09,381 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:39
2023-05-06 15:36:09,390 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:36:09,392 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 24G
2023-05-06 15:36:09,394 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 15:36:09,395 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0010, loss: 0.0019 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.87it/s]

2023-05-06 15:36:34,469 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9252, batch_loss: 0.0007, loss: 0.3703 ||: 100%|##########| 157/157 [00:01<00:00, 131.45it/s]

2023-05-06 15:36:35,675 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:36:35,680 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 15:36:35,682 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1034.078  |       N/A
2023-05-06 15:36:35,686 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.370
2023-05-06 15:36:35,688 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  24886.824  |       N/A





2023-05-06 15:36:36,682 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.292153
2023-05-06 15:36:36,684 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:36:36,775 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 24886.82421875,
  "peak_gpu_0_memory_MB": 1034.07763671875,
  "training_duration": "0:02:13.498930",
  "epoch": 4,
  "training_accuracy": 0.999875,
  "training_loss": 0.0018793810138718983,
  "training_worker_0_memory_MB": 24886.82421875,
  "training_gpu_0_memory_MB": 1034.07763671875,
  "validation_accuracy": 0.9252,
  "validation_loss": 0.37032137423064887,
  "best_validation_accuracy": 0.9284,
  "best_validation_loss": 0.1858407511037721
}
2023-05-06 15:36:36,777 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/30/model.tar.gz


[32m[I 2023-05-06 15:36:40,981][0m Trial 30 finished with value: 0.9284 and parameters: {'embedding_dim': 160, 'max_filter_size': 4, 'num_filters': 75}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:36:41,081 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:36:41,083 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:36:41,087 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:36:41,089 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:36:41,092 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:36:41,094 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:36:41,097 - INFO - allennlp.common.params - type = default
2023-05-06 15:36:41,102 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:36:41,104 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:36:41,106 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:36:41,108 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:36:41,110 - INFO - all

loading instances: 40000it [01:29, 446.36it/s]

2023-05-06 15:38:10,787 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:38:10,794 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:38:10,798 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:38:10,801 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:38:10,804 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:38:10,807 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None





2023-05-06 15:38:10,809 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:38:10,810 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:38:10,811 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:38:10,812 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 15:38:10,813 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 15:38:10,814 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:07, 632.68it/s]

2023-05-06 15:38:18,724 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:38:18,726 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:38:18,728 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:38:18,730 - INFO - allennlp.common.params - max_vocab_size = None





2023-05-06 15:38:18,732 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:38:18,733 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:38:18,737 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:38:18,738 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:38:18,739 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:38:18,742 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:38:18,743 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15:38:18,744 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:06, 6527.82it/s]


2023-05-06 15:38:25,104 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:38:25,107 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:38:25,109 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:38:25,112 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:38:25,114 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 90
2023-05-06 15:38:25,116 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:38:25,118 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:38:25,119 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:38:25,120 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:38:30,036 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:38:30,038 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:38:30,046 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9087, batch_loss: 0.0768, loss: 0.2459 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.48it/s]

2023-05-06 15:38:47,406 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:38:47,422 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:38:47,425 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:38:47,428 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9238, batch_loss: 0.0359, loss: 0.1919 ||: 100%|##########| 157/157 [00:00<00:00, 204.05it/s]

2023-05-06 15:38:48,187 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:38:48,189 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.924
2023-05-06 15:38:48,195 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1034.078  |       N/A
2023-05-06 15:38:48,200 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.246  |     0.192
2023-05-06 15:38:48,201 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  25344.914  |       N/A





2023-05-06 15:38:48,547 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.637900
2023-05-06 15:38:48,549 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:44
2023-05-06 15:38:48,555 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:38:48,558 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 25G
2023-05-06 15:38:48,562 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 649M
2023-05-06 15:38:48,565 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9463, batch_loss: 0.2323, loss: 0.1372 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.66it/s]

2023-05-06 15:39:06,265 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9320, batch_loss: 0.0153, loss: 0.1846 ||: 100%|##########| 157/157 [00:00<00:00, 212.85it/s]

2023-05-06 15:39:07,017 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:39:07,019 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.946  |     0.932
2023-05-06 15:39:07,021 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   648.642  |       N/A
2023-05-06 15:39:07,023 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.137  |     0.185
2023-05-06 15:39:07,024 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  25379.359  |       N/A





2023-05-06 15:39:07,459 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.904483
2023-05-06 15:39:07,461 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:28
2023-05-06 15:39:07,464 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:39:07,476 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 25G
2023-05-06 15:39:07,478 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 649M
2023-05-06 15:39:07,480 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9778, batch_loss: 0.0028, loss: 0.0622 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.30it/s]

2023-05-06 15:39:25,017 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9280, batch_loss: 0.0063, loss: 0.2358 ||: 100%|##########| 157/157 [00:00<00:00, 207.68it/s]

2023-05-06 15:39:25,784 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:39:25,786 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.978  |     0.928
2023-05-06 15:39:25,789 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   648.766  |       N/A
2023-05-06 15:39:25,790 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.062  |     0.236
2023-05-06 15:39:25,791 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  25379.359  |       N/A





2023-05-06 15:39:26,208 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.744942
2023-05-06 15:39:26,210 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:10
2023-05-06 15:39:26,214 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:39:26,216 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 25G
2023-05-06 15:39:26,219 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 649M
2023-05-06 15:39:26,220 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9949, batch_loss: 0.0611, loss: 0.0185 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.70it/s]

2023-05-06 15:39:43,659 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9240, batch_loss: 0.0030, loss: 0.2938 ||: 100%|##########| 157/157 [00:00<00:00, 209.63it/s]

2023-05-06 15:39:44,419 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:39:44,421 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.924
2023-05-06 15:39:44,423 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   648.582  |       N/A
2023-05-06 15:39:44,425 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.018  |     0.294
2023-05-06 15:39:44,427 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  25379.359  |       N/A





2023-05-06 15:39:44,825 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.611106
2023-05-06 15:39:44,828 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:51
2023-05-06 15:39:44,831 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:39:44,833 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 25G
2023-05-06 15:39:44,835 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 649M
2023-05-06 15:39:44,838 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0024, loss: 0.0034 ||: 100%|##########| 1250/1250 [00:17<00:00, 72.38it/s]

2023-05-06 15:40:02,111 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9252, batch_loss: 0.0141, loss: 0.3445 ||: 100%|##########| 157/157 [00:00<00:00, 214.56it/s]

2023-05-06 15:40:02,855 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:40:02,862 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 15:40:02,863 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   648.751  |       N/A
2023-05-06 15:40:02,864 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.344
2023-05-06 15:40:02,865 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  25379.359  |       N/A





2023-05-06 15:40:03,241 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.409781
2023-05-06 15:40:03,242 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:40:03,287 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 25379.359375,
  "peak_gpu_0_memory_MB": 1034.07763671875,
  "training_duration": "0:01:32.945427",
  "epoch": 4,
  "training_accuracy": 0.999625,
  "training_loss": 0.003445892000308959,
  "training_worker_0_memory_MB": 25379.359375,
  "training_gpu_0_memory_MB": 648.75146484375,
  "validation_accuracy": 0.9252,
  "validation_loss": 0.3444970616311499,
  "best_validation_accuracy": 0.932,
  "best_validation_loss": 0.18455920316231478
}
2023-05-06 15:40:03,289 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/31/model.tar.gz


[32m[I 2023-05-06 15:40:05,197][0m Trial 31 finished with value: 0.932 and parameters: {'embedding_dim': 90, 'max_filter_size': 5, 'num_filters': 48}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:40:05,299 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:40:05,301 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:40:05,304 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:40:05,305 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:40:05,307 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:40:05,310 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:40:05,312 - INFO - allennlp.common.params - type = default
2023-05-06 15:40:05,314 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:40:05,316 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:40:05,316 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:40:05,317 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:40:05,318 - INFO - all

loading instances: 40000it [01:27, 458.38it/s]

2023-05-06 15:41:32,620 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:41:32,622 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:41:32,624 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:41:32,625 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:41:32,627 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:41:32,629 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:41:32,631 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:41:32,631 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:41:32,632 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:41:32,633 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 520.25it/s]

2023-05-06 15:41:42,253 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:41:42,254 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:41:42,256 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:41:42,258 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:41:42,261 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:41:42,264 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:41:42,269 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:41:42,269 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:41:42,270 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:41:42,272 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:41:42,272 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:04, 8851.32it/s] 


2023-05-06 15:41:46,954 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:41:46,966 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:41:46,969 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:41:46,971 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:41:46,974 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 104
2023-05-06 15:41:46,978 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:41:46,979 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:41:46,981 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:41:46,982 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:41:55,966 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:41:55,968 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:41:55,973 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9031, batch_loss: 0.1103, loss: 0.2481 ||: 100%|##########| 1250/1250 [00:18<00:00, 67.53it/s]

2023-05-06 15:42:14,362 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:42:14,379 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:42:14,381 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:42:14,387 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9260, batch_loss: 0.0527, loss: 0.1912 ||: 100%|##########| 157/157 [00:00<00:00, 159.42it/s]

2023-05-06 15:42:15,356 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:42:15,358 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.903  |     0.926
2023-05-06 15:42:15,360 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   648.819  |       N/A
2023-05-06 15:42:15,361 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.248  |     0.191
2023-05-06 15:42:15,365 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  25989.043  |       N/A





2023-05-06 15:42:15,965 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.120716
2023-05-06 15:42:15,967 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:55
2023-05-06 15:42:15,969 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:42:15,971 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 25G
2023-05-06 15:42:15,973 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 676M
2023-05-06 15:42:15,975 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9468, batch_loss: 0.2896, loss: 0.1386 ||: 100%|##########| 1250/1250 [00:18<00:00, 67.68it/s]

2023-05-06 15:42:34,451 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9316, batch_loss: 0.0191, loss: 0.1824 ||: 100%|##########| 157/157 [00:00<00:00, 157.80it/s]

2023-05-06 15:42:35,455 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:42:35,457 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.932
2023-05-06 15:42:35,459 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   675.880  |       N/A
2023-05-06 15:42:35,461 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.139  |     0.182
2023-05-06 15:42:35,462 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  26028.750  |       N/A





2023-05-06 15:42:36,034 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.065196
2023-05-06 15:42:36,037 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:38
2023-05-06 15:42:36,039 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:42:36,043 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 25G
2023-05-06 15:42:36,045 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 676M
2023-05-06 15:42:36,047 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9780, batch_loss: 0.0046, loss: 0.0630 ||: 100%|##########| 1250/1250 [00:18<00:00, 68.12it/s]

2023-05-06 15:42:54,403 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9298, batch_loss: 0.0154, loss: 0.2263 ||: 100%|##########| 157/157 [00:00<00:00, 159.33it/s]

2023-05-06 15:42:55,396 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:42:55,398 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.978  |     0.930
2023-05-06 15:42:55,400 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   675.880  |       N/A
2023-05-06 15:42:55,402 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.063  |     0.226
2023-05-06 15:42:55,406 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  26028.750  |       N/A





2023-05-06 15:42:56,010 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.971646
2023-05-06 15:42:56,013 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:18
2023-05-06 15:42:56,015 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:42:56,017 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 25G
2023-05-06 15:42:56,020 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 676M
2023-05-06 15:42:56,021 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9949, batch_loss: 0.0566, loss: 0.0182 ||: 100%|##########| 1250/1250 [00:18<00:00, 68.76it/s]

2023-05-06 15:43:14,206 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0027, loss: 0.2831 ||: 100%|##########| 157/157 [00:00<00:00, 164.34it/s]

2023-05-06 15:43:15,170 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:43:15,171 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.926
2023-05-06 15:43:15,173 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   675.880  |       N/A
2023-05-06 15:43:15,178 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.018  |     0.283
2023-05-06 15:43:15,179 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  26028.750  |       N/A





2023-05-06 15:43:15,762 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.747394
2023-05-06 15:43:15,764 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:58
2023-05-06 15:43:15,766 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:43:15,768 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 25G
2023-05-06 15:43:15,773 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 676M
2023-05-06 15:43:15,775 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9993, batch_loss: 0.0051, loss: 0.0040 ||: 100%|##########| 1250/1250 [00:18<00:00, 68.62it/s]

2023-05-06 15:43:33,996 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9208, batch_loss: 0.0102, loss: 0.3123 ||: 100%|##########| 157/157 [00:00<00:00, 160.22it/s]

2023-05-06 15:43:34,987 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:43:34,989 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.921
2023-05-06 15:43:34,991 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   675.880  |       N/A
2023-05-06 15:43:34,992 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.004  |     0.312
2023-05-06 15:43:34,994 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  26028.750  |       N/A





2023-05-06 15:43:35,678 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.911316
2023-05-06 15:43:35,680 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:43:35,741 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 26028.75,
  "peak_gpu_0_memory_MB": 675.8798828125,
  "training_duration": "0:01:39.142803",
  "epoch": 4,
  "training_accuracy": 0.99935,
  "training_loss": 0.004010869752269355,
  "training_worker_0_memory_MB": 26028.75,
  "training_gpu_0_memory_MB": 675.8798828125,
  "validation_accuracy": 0.9208,
  "validation_loss": 0.31234160087599305,
  "best_validation_accuracy": 0.9316,
  "best_validation_loss": 0.1824325436405885
}
2023-05-06 15:43:35,745 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/32/model.tar.gz


[32m[I 2023-05-06 15:43:37,918][0m Trial 32 finished with value: 0.9316 and parameters: {'embedding_dim': 104, 'max_filter_size': 5, 'num_filters': 33}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:43:38,024 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:43:38,026 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:43:38,032 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:43:38,034 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:43:38,039 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:43:38,041 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:43:38,042 - INFO - allennlp.common.params - type = default
2023-05-06 15:43:38,043 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:43:38,045 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:43:38,048 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:43:38,049 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:43:38,050 - INFO - all

loading instances: 40000it [01:22, 487.59it/s]

2023-05-06 15:45:00,128 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:45:00,130 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:45:00,133 - INFO - allennlp.common.params - validation_data_loader.drop_last = False





2023-05-06 15:45:00,135 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:45:00,137 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:45:00,138 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:45:00,139 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:45:00,140 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:45:00,141 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:45:00,142 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 15:45:00,143 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 15:45:00,144 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:12, 398.81it/s]

2023-05-06 15:45:12,686 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:45:12,688 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:45:12,691 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:45:12,693 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:45:12,695 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:45:12,698 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:45:12,699 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:45:12,700 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:45:12,701 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:45:12,702 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:45:12,703 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:03, 10050.30it/s]


2023-05-06 15:45:16,903 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:45:16,905 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:45:16,908 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:45:16,913 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:45:16,915 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 91
2023-05-06 15:45:16,917 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:45:16,918 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:45:16,919 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:45:16,922 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:45:24,351 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:45:24,353 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:45:24,359 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9066, batch_loss: 0.0720, loss: 0.2449 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.00it/s]

2023-05-06 15:45:42,095 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:45:42,111 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:45:42,113 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:45:42,118 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9242, batch_loss: 0.0655, loss: 0.1910 ||: 100%|##########| 157/157 [00:00<00:00, 197.60it/s]

2023-05-06 15:45:42,898 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:45:42,904 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.924
2023-05-06 15:45:42,908 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   675.880  |       N/A
2023-05-06 15:45:42,911 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.245  |     0.191
2023-05-06 15:45:42,913 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  26562.832  |       N/A





2023-05-06 15:45:43,294 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.066449
2023-05-06 15:45:43,298 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:48
2023-05-06 15:45:43,303 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:45:43,304 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 26G
2023-05-06 15:45:43,306 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 653M
2023-05-06 15:45:43,308 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9486, batch_loss: 0.1477, loss: 0.1333 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.32it/s]

2023-05-06 15:46:01,091 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9282, batch_loss: 0.0347, loss: 0.1810 ||: 100%|##########| 157/157 [00:00<00:00, 208.97it/s]

2023-05-06 15:46:01,858 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:46:01,860 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.928
2023-05-06 15:46:01,861 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   653.156  |       N/A
2023-05-06 15:46:01,864 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.133  |     0.181
2023-05-06 15:46:01,867 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  26597.473  |       N/A





2023-05-06 15:46:02,273 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.970210
2023-05-06 15:46:02,278 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:30
2023-05-06 15:46:02,281 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:46:02,283 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 26G
2023-05-06 15:46:02,285 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 653M
2023-05-06 15:46:02,286 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9813, batch_loss: 0.0052, loss: 0.0548 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.79it/s]

2023-05-06 15:46:19,950 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9252, batch_loss: 0.0561, loss: 0.2361 ||: 100%|##########| 157/157 [00:00<00:00, 200.78it/s]

2023-05-06 15:46:20,747 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:46:20,749 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.925
2023-05-06 15:46:20,753 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   653.280  |       N/A
2023-05-06 15:46:20,755 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.055  |     0.236
2023-05-06 15:46:20,759 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  26597.473  |       N/A





2023-05-06 15:46:21,226 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.944438
2023-05-06 15:46:21,231 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:11
2023-05-06 15:46:21,239 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:46:21,241 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 26G
2023-05-06 15:46:21,245 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 653M
2023-05-06 15:46:21,247 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9960, batch_loss: 0.0718, loss: 0.0150 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.37it/s]

2023-05-06 15:46:38,766 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0238, loss: 0.3026 ||: 100%|##########| 157/157 [00:00<00:00, 208.98it/s]

2023-05-06 15:46:39,524 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:46:39,525 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.924
2023-05-06 15:46:39,531 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   653.096  |       N/A
2023-05-06 15:46:39,532 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.015  |     0.303
2023-05-06 15:46:39,533 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  26597.473  |       N/A





2023-05-06 15:46:39,915 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.682817
2023-05-06 15:46:39,917 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:52
2023-05-06 15:46:39,920 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:46:39,922 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 26G
2023-05-06 15:46:39,924 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 653M
2023-05-06 15:46:39,926 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9995, batch_loss: 0.0017, loss: 0.0031 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.71it/s]

2023-05-06 15:46:57,610 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0082, loss: 0.3521 ||: 100%|##########| 157/157 [00:00<00:00, 205.88it/s]

2023-05-06 15:46:58,379 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:46:58,380 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.923
2023-05-06 15:46:58,383 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   653.266  |       N/A
2023-05-06 15:46:58,386 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.352
2023-05-06 15:46:58,388 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  26597.473  |       N/A





2023-05-06 15:46:58,815 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.894563
2023-05-06 15:46:58,816 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:46:58,870 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 26597.47265625,
  "peak_gpu_0_memory_MB": 675.8798828125,
  "training_duration": "0:01:34.151120",
  "epoch": 4,
  "training_accuracy": 0.999525,
  "training_loss": 0.003133486740688386,
  "training_worker_0_memory_MB": 26597.47265625,
  "training_gpu_0_memory_MB": 653.265625,
  "validation_accuracy": 0.9234,
  "validation_loss": 0.35211509899985177,
  "best_validation_accuracy": 0.9282,
  "best_validation_loss": 0.18104918324833463
}
2023-05-06 15:46:58,873 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/33/model.tar.gz


[32m[I 2023-05-06 15:47:00,804][0m Trial 33 finished with value: 0.9282 and parameters: {'embedding_dim': 91, 'max_filter_size': 5, 'num_filters': 48}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:47:00,903 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:47:00,905 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:47:00,907 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:47:00,914 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:47:00,915 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:47:00,917 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:47:00,920 - INFO - allennlp.common.params - type = default
2023-05-06 15:47:00,922 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:47:00,924 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:47:00,926 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:47:00,930 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:47:00,934 - INFO - all

loading instances: 40000it [01:28, 453.66it/s]

2023-05-06 15:48:29,206 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:48:29,207 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:48:29,209 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:48:29,215 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:48:29,216 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:48:29,219 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:48:29,222 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:48:29,225 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:48:29,230 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:48:29,230 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 628.95it/s]

2023-05-06 15:48:37,187 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:48:37,189 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:48:37,195 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:48:37,197 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:48:37,199 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:48:37,200 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:48:37,201 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:48:37,202 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:48:37,203 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:48:37,206 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:48:37,207 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:06, 6455.97it/s]


2023-05-06 15:48:43,643 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:48:43,645 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:48:43,648 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:48:43,650 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:48:43,655 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 125
2023-05-06 15:48:43,657 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:48:43,658 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:48:43,662 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:48:43,663 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:48:48,613 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:48:48,615 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:48:48,622 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9105, batch_loss: 0.0598, loss: 0.2422 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.24it/s]

2023-05-06 15:49:10,728 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:49:10,741 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:49:10,743 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:49:10,748 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9238, batch_loss: 0.0376, loss: 0.1916 ||: 100%|##########| 157/157 [00:00<00:00, 174.84it/s]

2023-05-06 15:49:11,633 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:49:11,636 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.924
2023-05-06 15:49:11,637 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   653.333  |       N/A
2023-05-06 15:49:11,639 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.242  |     0.192
2023-05-06 15:49:11,640 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27180.145  |       N/A





2023-05-06 15:49:12,208 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.712454
2023-05-06 15:49:12,209 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:28
2023-05-06 15:49:12,212 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:49:12,214 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 27G
2023-05-06 15:49:12,216 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 882M
2023-05-06 15:49:12,218 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9502, batch_loss: 0.1916, loss: 0.1298 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.05it/s]

2023-05-06 15:49:34,934 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9318, batch_loss: 0.0105, loss: 0.1846 ||: 100%|##########| 157/157 [00:01<00:00, 147.63it/s]

2023-05-06 15:49:36,005 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:49:36,006 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.932
2023-05-06 15:49:36,008 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   881.896  |       N/A
2023-05-06 15:49:36,010 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.185
2023-05-06 15:49:36,012 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27227.531  |       N/A





2023-05-06 15:49:36,693 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.480675
2023-05-06 15:49:36,696 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:10
2023-05-06 15:49:36,699 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:49:36,700 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 27G
2023-05-06 15:49:36,703 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 882M
2023-05-06 15:49:36,705 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9829, batch_loss: 0.0014, loss: 0.0498 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.28it/s]

2023-05-06 15:49:58,921 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0013, loss: 0.2463 ||: 100%|##########| 157/157 [00:00<00:00, 183.65it/s]

2023-05-06 15:49:59,782 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:49:59,787 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.925
2023-05-06 15:49:59,793 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   882.021  |       N/A
2023-05-06 15:49:59,795 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.050  |     0.246
2023-05-06 15:49:59,795 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27227.531  |       N/A





2023-05-06 15:50:00,350 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.650980
2023-05-06 15:50:00,351 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:46
2023-05-06 15:50:00,354 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:50:00,356 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 27G
2023-05-06 15:50:00,358 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 882M
2023-05-06 15:50:00,360 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9975, batch_loss: 0.0552, loss: 0.0111 ||: 100%|##########| 1250/1250 [00:21<00:00, 57.38it/s]

2023-05-06 15:50:22,150 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9226, batch_loss: 0.0008, loss: 0.2991 ||: 100%|##########| 157/157 [00:00<00:00, 182.07it/s]

2023-05-06 15:50:23,021 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:50:23,023 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.923
2023-05-06 15:50:23,025 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   881.836  |       N/A
2023-05-06 15:50:23,027 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.299
2023-05-06 15:50:23,028 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27227.531  |       N/A





2023-05-06 15:50:23,562 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.208644
2023-05-06 15:50:23,564 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:21
2023-05-06 15:50:23,570 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:50:23,573 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 27G
2023-05-06 15:50:23,579 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 882M
2023-05-06 15:50:23,583 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0015, loss: 0.0020 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.12it/s]

2023-05-06 15:50:45,859 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9212, batch_loss: 0.0003, loss: 0.3452 ||: 100%|##########| 157/157 [00:01<00:00, 148.02it/s]

2023-05-06 15:50:46,927 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:50:46,933 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.921
2023-05-06 15:50:46,935 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   882.006  |       N/A
2023-05-06 15:50:46,936 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.345
2023-05-06 15:50:46,937 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27227.531  |       N/A





2023-05-06 15:50:47,701 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.131097
2023-05-06 15:50:47,706 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:50:47,788 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 27227.53125,
  "peak_gpu_0_memory_MB": 882.0205078125,
  "training_duration": "0:01:58.431398",
  "epoch": 4,
  "training_accuracy": 0.999775,
  "training_loss": 0.001979082696294063,
  "training_worker_0_memory_MB": 27227.53125,
  "training_gpu_0_memory_MB": 882.005859375,
  "validation_accuracy": 0.9212,
  "validation_loss": 0.34517873341112343,
  "best_validation_accuracy": 0.9318,
  "best_validation_loss": 0.1846124625224976
}
2023-05-06 15:50:47,791 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/34/model.tar.gz


[32m[I 2023-05-06 15:50:51,208][0m Trial 34 finished with value: 0.9318 and parameters: {'embedding_dim': 125, 'max_filter_size': 5, 'num_filters': 63}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:50:51,315 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:50:51,317 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:50:51,323 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:50:51,325 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:50:51,326 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:50:51,328 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:50:51,333 - INFO - allennlp.common.params - type = default
2023-05-06 15:50:51,334 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:50:51,338 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:50:51,339 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:50:51,341 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:50:51,343 - INFO - all

loading instances: 40000it [01:27, 455.45it/s]

2023-05-06 15:52:19,234 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:52:19,239 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:52:19,240 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:52:19,242 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:52:19,243 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:52:19,247 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:52:19,249 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:52:19,253 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:52:19,254 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:52:19,256 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 649.60it/s]

2023-05-06 15:52:26,965 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:52:26,970 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:52:26,971 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:52:26,972 - INFO - allennlp.common.params - max_vocab_size = None





2023-05-06 15:52:26,977 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:52:26,978 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:52:26,979 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:52:26,979 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:52:26,980 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:52:26,981 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:52:26,985 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15:52:26,986 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:04, 9460.33it/s]


2023-05-06 15:52:31,449 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:52:31,451 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:52:31,453 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:52:31,455 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:52:31,457 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 81
2023-05-06 15:52:31,460 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:52:31,462 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:52:31,463 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:52:31,467 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:52:38,730 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:52:38,732 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:52:38,738 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9090, batch_loss: 0.0686, loss: 0.2441 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.38it/s]

2023-05-06 15:53:00,782 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:53:00,797 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:53:00,799 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:53:00,804 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9212, batch_loss: 0.0401, loss: 0.1955 ||: 100%|##########| 157/157 [00:01<00:00, 152.32it/s]

2023-05-06 15:53:01,821 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:53:01,823 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.921
2023-05-06 15:53:01,824 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   882.074  |       N/A
2023-05-06 15:53:01,826 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.244  |     0.195
2023-05-06 15:53:01,828 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27770.090  |       N/A





2023-05-06 15:53:02,152 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.560599
2023-05-06 15:53:02,154 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:29
2023-05-06 15:53:02,156 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:53:02,158 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 27G
2023-05-06 15:53:02,168 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 972M
2023-05-06 15:53:02,178 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9478, batch_loss: 0.1787, loss: 0.1357 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.58it/s]

2023-05-06 15:53:24,675 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9336, batch_loss: 0.0148, loss: 0.1818 ||: 100%|##########| 157/157 [00:01<00:00, 137.31it/s]

2023-05-06 15:53:25,826 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:53:25,828 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.948  |     0.934
2023-05-06 15:53:25,829 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   971.790  |       N/A
2023-05-06 15:53:25,831 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.136  |     0.182
2023-05-06 15:53:25,836 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27831.770  |       N/A





2023-05-06 15:53:26,259 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.102940
2023-05-06 15:53:26,263 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:08
2023-05-06 15:53:26,265 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:53:26,268 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 27G
2023-05-06 15:53:26,270 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 972M
2023-05-06 15:53:26,273 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9815, batch_loss: 0.0013, loss: 0.0541 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.66it/s]

2023-05-06 15:53:48,737 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9302, batch_loss: 0.0026, loss: 0.2516 ||: 100%|##########| 157/157 [00:01<00:00, 124.42it/s]

2023-05-06 15:53:50,007 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:53:50,009 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.930
2023-05-06 15:53:50,010 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   971.915  |       N/A
2023-05-06 15:53:50,012 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.054  |     0.252
2023-05-06 15:53:50,014 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27831.770  |       N/A





2023-05-06 15:53:50,444 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.179414
2023-05-06 15:53:50,446 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:46
2023-05-06 15:53:50,448 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:53:50,455 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 27G
2023-05-06 15:53:50,457 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 972M
2023-05-06 15:53:50,463 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9958, batch_loss: 0.0607, loss: 0.0136 ||: 100%|##########| 1250/1250 [00:21<00:00, 56.92it/s]

2023-05-06 15:54:12,429 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9270, batch_loss: 0.0014, loss: 0.3417 ||: 100%|##########| 157/157 [00:01<00:00, 153.30it/s]

2023-05-06 15:54:13,462 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:54:13,464 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.927
2023-05-06 15:54:13,470 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   971.730  |       N/A
2023-05-06 15:54:13,472 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.014  |     0.342
2023-05-06 15:54:13,474 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27831.770  |       N/A





2023-05-06 15:54:13,821 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.372429
2023-05-06 15:54:13,839 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:22
2023-05-06 15:54:13,851 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:54:13,857 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 27G
2023-05-06 15:54:13,861 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 972M
2023-05-06 15:54:13,863 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0022, loss: 0.0024 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.22it/s]

2023-05-06 15:54:36,100 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9266, batch_loss: 0.0013, loss: 0.4174 ||: 100%|##########| 157/157 [00:01<00:00, 152.46it/s]

2023-05-06 15:54:37,138 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:54:37,140 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.927
2023-05-06 15:54:37,141 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   971.900  |       N/A
2023-05-06 15:54:37,143 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.417
2023-05-06 15:54:37,145 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  27831.770  |       N/A





2023-05-06 15:54:37,508 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.656423
2023-05-06 15:54:37,515 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:54:37,580 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 27831.76953125,
  "peak_gpu_0_memory_MB": 971.91455078125,
  "training_duration": "0:01:58.546443",
  "epoch": 4,
  "training_accuracy": 0.9996,
  "training_loss": 0.002397591895524238,
  "training_worker_0_memory_MB": 27831.76953125,
  "training_gpu_0_memory_MB": 971.89990234375,
  "validation_accuracy": 0.9266,
  "validation_loss": 0.4173579868789359,
  "best_validation_accuracy": 0.9336,
  "best_validation_loss": 0.18176064102252007
}
2023-05-06 15:54:37,583 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/35/model.tar.gz


[32m[I 2023-05-06 15:54:39,915][0m Trial 35 finished with value: 0.9336 and parameters: {'embedding_dim': 81, 'max_filter_size': 5, 'num_filters': 114}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:54:40,057 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:54:40,059 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:54:40,064 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:54:40,065 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:54:40,068 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:54:40,070 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:54:40,071 - INFO - allennlp.common.params - type = default
2023-05-06 15:54:40,075 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:54:40,078 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:54:40,081 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:54:40,082 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:54:40,084 - INFO - all

loading instances: 40000it [01:35, 421.04it/s]

2023-05-06 15:56:15,183 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:56:15,185 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:56:15,187 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:56:15,192 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:56:15,194 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:56:15,195 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:56:15,197 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:56:15,198 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:56:15,199 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:56:15,200 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 651.84it/s]

2023-05-06 15:56:22,880 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:56:22,882 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:56:22,884 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:56:22,885 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:56:22,886 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:56:22,888 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:56:22,890 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:56:22,891 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:56:22,894 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:56:22,895 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:56:22,896 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:04, 9303.23it/s]


2023-05-06 15:56:27,424 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:56:27,428 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:56:27,430 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:56:27,433 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:56:27,435 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 62
2023-05-06 15:56:27,439 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:56:27,440 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:56:27,442 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:56:27,444 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:56:34,606 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:56:34,607 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:56:34,612 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9044, batch_loss: 0.0796, loss: 0.2532 ||: 100%|##########| 1250/1250 [00:14<00:00, 84.64it/s]

2023-05-06 15:56:49,262 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 15:56:49,276 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:56:49,279 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:56:49,286 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9222, batch_loss: 0.0564, loss: 0.1943 ||: 100%|##########| 157/157 [00:01<00:00, 155.75it/s]

2023-05-06 15:56:50,277 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:56:50,285 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.904  |     0.922
2023-05-06 15:56:50,286 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   971.968  |       N/A
2023-05-06 15:56:50,288 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.253  |     0.194
2023-05-06 15:56:50,289 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  28396.879  |       N/A





2023-05-06 15:56:50,528 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:16.049725
2023-05-06 15:56:50,534 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:22
2023-05-06 15:56:50,536 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 15:56:50,537 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 28G
2023-05-06 15:56:50,539 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 785M
2023-05-06 15:56:50,544 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9412, batch_loss: 0.2349, loss: 0.1487 ||: 100%|##########| 1250/1250 [00:14<00:00, 85.29it/s]

2023-05-06 15:57:05,213 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0110, loss: 0.1805 ||: 100%|##########| 157/157 [00:00<00:00, 161.07it/s]

2023-05-06 15:57:06,195 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 15:57:06,197 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.941  |     0.926
2023-05-06 15:57:06,199 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   785.361  |       N/A
2023-05-06 15:57:06,201 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.149  |     0.180
2023-05-06 15:57:06,202 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  28420.855  |       N/A
2023-05-06 15:57:06,419 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.882890
2023-05-06 15:57:06,427 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:06
2023-05-06 15:57:06,431 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 15:57:06,432 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 28G
2023-05-06 15:57:06,439 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9735, batch_loss: 0.0063, loss: 0.0731 ||: 100%|##########| 1250/1250 [00:14<00:00, 84.36it/s]

2023-05-06 15:57:21,278 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0077, loss: 0.2285 ||: 100%|##########| 157/157 [00:00<00:00, 164.68it/s]

2023-05-06 15:57:22,237 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:57:22,242 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.973  |     0.923
2023-05-06 15:57:22,243 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   785.486  |       N/A
2023-05-06 15:57:22,245 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.073  |     0.229
2023-05-06 15:57:22,247 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  28420.855  |       N/A





2023-05-06 15:57:22,447 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:16.016685
2023-05-06 15:57:22,456 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:51
2023-05-06 15:57:22,458 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 15:57:22,464 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 28G
2023-05-06 15:57:22,466 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 785M
2023-05-06 15:57:22,472 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9935, batch_loss: 0.1262, loss: 0.0231 ||: 100%|##########| 1250/1250 [00:14<00:00, 84.07it/s]

2023-05-06 15:57:37,347 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9216, batch_loss: 0.0005, loss: 0.2805 ||: 100%|##########| 157/157 [00:00<00:00, 200.16it/s]

2023-05-06 15:57:38,138 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:57:38,143 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.994  |     0.922
2023-05-06 15:57:38,145 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   785.302  |       N/A
2023-05-06 15:57:38,146 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.023  |     0.280
2023-05-06 15:57:38,147 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  28420.855  |       N/A





2023-05-06 15:57:38,378 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.919971
2023-05-06 15:57:38,384 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:35
2023-05-06 15:57:38,386 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 15:57:38,388 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 28G
2023-05-06 15:57:38,391 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 785M
2023-05-06 15:57:38,393 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9992, batch_loss: 0.0057, loss: 0.0051 ||: 100%|##########| 1250/1250 [00:15<00:00, 82.74it/s]

2023-05-06 15:57:53,507 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9176, batch_loss: 0.0013, loss: 0.3193 ||: 100%|##########| 157/157 [00:00<00:00, 198.97it/s]

2023-05-06 15:57:54,302 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 15:57:54,308 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.918
2023-05-06 15:57:54,309 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   785.471  |       N/A
2023-05-06 15:57:54,310 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.005  |     0.319
2023-05-06 15:57:54,311 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  28420.855  |       N/A





2023-05-06 15:57:54,529 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:16.142810
2023-05-06 15:57:54,534 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 15:57:54,566 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 28420.85546875,
  "peak_gpu_0_memory_MB": 971.9677734375,
  "training_duration": "0:01:19.823524",
  "epoch": 4,
  "training_accuracy": 0.9992,
  "training_loss": 0.005113874752624543,
  "training_worker_0_memory_MB": 28420.85546875,
  "training_gpu_0_memory_MB": 785.47119140625,
  "validation_accuracy": 0.9176,
  "validation_loss": 0.319255755252537,
  "best_validation_accuracy": 0.9262,
  "best_validation_loss": 0.1804517170200777
}
2023-05-06 15:57:54,568 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/36/model.tar.gz


[32m[I 2023-05-06 15:57:55,919][0m Trial 36 finished with value: 0.9262 and parameters: {'embedding_dim': 62, 'max_filter_size': 4, 'num_filters': 113}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 15:57:56,018 - INFO - allennlp.common.params - evaluation = None
2023-05-06 15:57:56,020 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 15:57:56,022 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 15:57:56,025 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 15:57:56,028 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 15:57:56,029 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 15:57:56,034 - INFO - allennlp.common.params - type = default
2023-05-06 15:57:56,038 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 15:57:56,040 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 15:57:56,042 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 15:57:56,043 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 15:57:56,048 - INFO - all

loading instances: 40000it [01:32, 432.49it/s]

2023-05-06 15:59:28,618 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 15:59:28,621 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 15:59:28,625 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 15:59:28,626 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 15:59:28,628 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 15:59:28,632 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 15:59:28,633 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 15:59:28,634 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 15:59:28,635 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 15:59:28,636 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 528.44it/s]

2023-05-06 15:59:38,107 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 15:59:38,111 - INFO - allennlp.common.params - type = from_instances
2023-05-06 15:59:38,114 - INFO - allennlp.common.params - min_count = None
2023-05-06 15:59:38,118 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 15:59:38,121 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 15:59:38,123 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 15:59:38,126 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 15:59:38,127 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 15:59:38,128 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 15:59:38,133 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 15:59:38,135 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 15


building vocab: 40000it [00:03, 10628.84it/s]


2023-05-06 15:59:42,075 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 15:59:42,077 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 15:59:42,082 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 15:59:42,084 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 15:59:42,086 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 84
2023-05-06 15:59:42,088 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 15:59:42,090 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 15:59:42,092 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 15:59:42,093 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 15:59:48,428 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 15:59:48,430 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 15:59:48,438 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9094, batch_loss: 0.0867, loss: 0.2432 ||: 100%|##########| 1250/1250 [00:21<00:00, 58.65it/s]

2023-05-06 16:00:09,592 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:00:09,607 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:00:09,609 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:00:09,617 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9224, batch_loss: 0.0655, loss: 0.1949 ||: 100%|##########| 157/157 [00:01<00:00, 129.01it/s]

2023-05-06 16:00:10,820 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:00:10,822 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.922
2023-05-06 16:00:10,825 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   785.539  |       N/A
2023-05-06 16:00:10,827 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.243  |     0.195
2023-05-06 16:00:10,828 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29084.176  |       N/A





2023-05-06 16:00:11,221 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.950283
2023-05-06 16:00:11,224 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:22
2023-05-06 16:00:11,228 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:00:11,230 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 28G
2023-05-06 16:00:11,233 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 802M
2023-05-06 16:00:11,237 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9490, batch_loss: 0.1835, loss: 0.1330 ||: 100%|##########| 1250/1250 [00:20<00:00, 60.20it/s]

2023-05-06 16:00:32,010 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9314, batch_loss: 0.0482, loss: 0.1807 ||: 100%|##########| 157/157 [00:00<00:00, 167.36it/s]

2023-05-06 16:00:32,955 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 16:00:32,958 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.931
2023-05-06 16:00:32,959 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   802.076  |       N/A
2023-05-06 16:00:32,961 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.133  |     0.181
2023-05-06 16:00:32,963 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29084.176  |       N/A
2023-05-06 16:00:33,261 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.032885
2023-05-06 16:00:33,272 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:58
2023-05-06 16:00:33,281 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:00:33,284 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 28G
2023-05-06 16:00:33,290 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9813, batch_loss: 0.0057, loss: 0.0538 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.12it/s]

2023-05-06 16:00:53,756 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9290, batch_loss: 0.0081, loss: 0.2395 ||: 100%|##########| 157/157 [00:00<00:00, 169.40it/s]

2023-05-06 16:00:54,689 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:00:54,695 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.929
2023-05-06 16:00:54,696 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   802.200  |       N/A
2023-05-06 16:00:54,698 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.054  |     0.240
2023-05-06 16:00:54,699 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29084.176  |       N/A





2023-05-06 16:00:55,023 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.741752
2023-05-06 16:00:55,026 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:34
2023-05-06 16:00:55,028 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:00:55,030 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 28G
2023-05-06 16:00:55,032 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 802M
2023-05-06 16:00:55,034 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9965, batch_loss: 0.0456, loss: 0.0129 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.10it/s]

2023-05-06 16:01:15,501 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9296, batch_loss: 0.0156, loss: 0.2938 ||: 100%|##########| 157/157 [00:00<00:00, 168.86it/s]

2023-05-06 16:01:16,441 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:01:16,442 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.930
2023-05-06 16:01:16,449 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   802.016  |       N/A





2023-05-06 16:01:16,451 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.013  |     0.294
2023-05-06 16:01:16,458 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29084.176  |       N/A
2023-05-06 16:01:16,769 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.741498
2023-05-06 16:01:16,779 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:12
2023-05-06 16:01:16,781 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:01:16,790 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 28G
2023-05-06 16:01:16,794 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 802M
2023-05-06 16:01:16,799 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0016, loss: 0.0024 ||: 100%|##########| 1250/1250 [00:20<00:00, 59.66it/s]

2023-05-06 16:01:37,759 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9270, batch_loss: 0.0026, loss: 0.3848 ||: 100%|##########| 157/157 [00:01<00:00, 141.24it/s]

2023-05-06 16:01:38,879 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:01:38,880 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.927
2023-05-06 16:01:38,881 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   802.186  |       N/A
2023-05-06 16:01:38,884 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.385
2023-05-06 16:01:38,886 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29084.176  |       N/A





2023-05-06 16:01:39,370 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.589039
2023-05-06 16:01:39,373 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:01:39,408 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 29084.17578125,
  "peak_gpu_0_memory_MB": 802.2001953125,
  "training_duration": "0:01:50.606554",
  "epoch": 4,
  "training_accuracy": 0.9996,
  "training_loss": 0.0024327474638819696,
  "training_worker_0_memory_MB": 29084.17578125,
  "training_gpu_0_memory_MB": 802.185546875,
  "validation_accuracy": 0.927,
  "validation_loss": 0.3847509747123417,
  "best_validation_accuracy": 0.9314,
  "best_validation_loss": 0.18067477356381478
}
2023-05-06 16:01:39,411 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/37/model.tar.gz


[32m[I 2023-05-06 16:01:41,824][0m Trial 37 finished with value: 0.9314 and parameters: {'embedding_dim': 84, 'max_filter_size': 5, 'num_filters': 84}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 16:01:41,961 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:01:41,972 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:01:41,976 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:01:41,980 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:01:41,982 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:01:41,983 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:01:41,984 - INFO - allennlp.common.params - type = default
2023-05-06 16:01:41,986 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:01:41,992 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:01:41,993 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:01:41,994 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:01:41,995 - INFO - all

loading instances: 40000it [01:24, 471.06it/s]

2023-05-06 16:03:06,987 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:03:06,989 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:03:06,991 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:03:06,993 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:03:06,994 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:03:06,996 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:03:06,997 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0





2023-05-06 16:03:07,000 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:03:07,001 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:03:07,002 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 16:03:07,003 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 16:03:07,004 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:13, 360.44it/s]

2023-05-06 16:03:20,886 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:03:20,893 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:03:20,894 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:03:20,895 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:03:20,896 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:03:20,897 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:03:20,898 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:03:20,899 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:03:20,903 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:03:20,904 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:03:20,905 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:03, 10509.36it/s]


2023-05-06 16:03:24,946 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:03:24,952 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:03:24,954 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:03:24,957 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:03:24,960 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 74
2023-05-06 16:03:24,963 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:03:24,965 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:03:24,966 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:03:24,969 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:03:32,336 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:03:32,338 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:03:32,343 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9049, batch_loss: 0.1202, loss: 0.2557 ||: 100%|##########| 1250/1250 [00:14<00:00, 86.06it/s]

2023-05-06 16:03:46,742 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:03:46,753 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:03:46,755 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:03:46,760 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9220, batch_loss: 0.0507, loss: 0.1979 ||: 100%|##########| 157/157 [00:01<00:00, 146.06it/s]

2023-05-06 16:03:47,823 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:03:47,825 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.905  |     0.922
2023-05-06 16:03:47,828 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   802.253  |       N/A
2023-05-06 16:03:47,829 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.256  |     0.198
2023-05-06 16:03:47,831 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29624.543  |       N/A





2023-05-06 16:03:48,157 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.952796
2023-05-06 16:03:48,161 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:20
2023-05-06 16:03:48,163 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:03:48,166 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 29G
2023-05-06 16:03:48,168 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 794M
2023-05-06 16:03:48,169 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9401, batch_loss: 0.2764, loss: 0.1521 ||: 100%|##########| 1250/1250 [00:14<00:00, 86.50it/s]

2023-05-06 16:04:02,628 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9296, batch_loss: 0.0128, loss: 0.1854 ||: 100%|##########| 157/157 [00:00<00:00, 177.94it/s]

2023-05-06 16:04:03,519 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:04:03,521 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.940  |     0.930
2023-05-06 16:04:03,523 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   794.220  |       N/A
2023-05-06 16:04:03,525 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.152  |     0.185
2023-05-06 16:04:03,527 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29707.816  |       N/A





2023-05-06 16:04:03,841 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.678447
2023-05-06 16:04:03,844 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:05
2023-05-06 16:04:03,846 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:04:03,854 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 29G
2023-05-06 16:04:03,872 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 794M
2023-05-06 16:04:03,880 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9716, batch_loss: 0.0045, loss: 0.0778 ||: 100%|##########| 1250/1250 [00:14<00:00, 85.72it/s]

2023-05-06 16:04:18,467 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9266, batch_loss: 0.0018, loss: 0.2325 ||: 100%|##########| 157/157 [00:00<00:00, 214.29it/s]

2023-05-06 16:04:19,206 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:04:19,207 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.972  |     0.927
2023-05-06 16:04:19,215 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   794.344  |       N/A
2023-05-06 16:04:19,217 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.078  |     0.233
2023-05-06 16:04:19,220 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29708.008  |       N/A





2023-05-06 16:04:19,523 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.677201
2023-05-06 16:04:19,526 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:49
2023-05-06 16:04:19,542 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:04:19,543 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 29G
2023-05-06 16:04:19,545 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 794M
2023-05-06 16:04:19,548 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9919, batch_loss: 0.0422, loss: 0.0263 ||: 100%|##########| 1250/1250 [00:14<00:00, 85.09it/s]

2023-05-06 16:04:34,245 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9240, batch_loss: 0.0003, loss: 0.2981 ||: 100%|##########| 157/157 [00:00<00:00, 219.55it/s]

2023-05-06 16:04:34,972 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:04:34,974 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.992  |     0.924
2023-05-06 16:04:34,977 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   794.160  |       N/A
2023-05-06 16:04:34,979 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.026  |     0.298
2023-05-06 16:04:34,980 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29708.008  |       N/A





2023-05-06 16:04:35,259 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.717164
2023-05-06 16:04:35,262 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:34
2023-05-06 16:04:35,264 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:04:35,266 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 29G
2023-05-06 16:04:35,268 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 794M
2023-05-06 16:04:35,270 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9990, batch_loss: 0.0060, loss: 0.0056 ||: 100%|##########| 1250/1250 [00:14<00:00, 85.41it/s]

2023-05-06 16:04:49,910 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9190, batch_loss: 0.0016, loss: 0.3232 ||: 100%|##########| 157/157 [00:00<00:00, 217.71it/s]

2023-05-06 16:04:50,641 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:04:50,643 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.919
2023-05-06 16:04:50,645 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   794.330  |       N/A
2023-05-06 16:04:50,646 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.006  |     0.323
2023-05-06 16:04:50,648 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  29708.008  |       N/A





2023-05-06 16:04:50,955 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.691180
2023-05-06 16:04:50,964 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:04:51,010 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 29708.0078125,
  "peak_gpu_0_memory_MB": 802.25341796875,
  "training_duration": "0:01:18.435881",
  "epoch": 4,
  "training_accuracy": 0.99905,
  "training_loss": 0.005630256962624845,
  "training_worker_0_memory_MB": 29708.0078125,
  "training_gpu_0_memory_MB": 794.32958984375,
  "validation_accuracy": 0.919,
  "validation_loss": 0.32315027454251033,
  "best_validation_accuracy": 0.9296,
  "best_validation_loss": 0.18544829665285767
}
2023-05-06 16:04:51,014 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/38/model.tar.gz


[32m[I 2023-05-06 16:04:52,617][0m Trial 38 finished with value: 0.9296 and parameters: {'embedding_dim': 74, 'max_filter_size': 3, 'num_filters': 133}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 16:04:52,724 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:04:52,726 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:04:52,733 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:04:52,737 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:04:52,741 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:04:52,742 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:04:52,744 - INFO - allennlp.common.params - type = default
2023-05-06 16:04:52,747 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:04:52,749 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:04:52,752 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:04:52,753 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:04:52,754 - INFO - all

loading instances: 40000it [01:22, 482.68it/s]

2023-05-06 16:06:17,402 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess





2023-05-06 16:06:17,405 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:06:17,408 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:06:17,410 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:06:17,412 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:06:17,413 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:06:17,414 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:06:17,415 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:06:17,416 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:06:17,417 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 16:06:17,418 - INFO - allennlp.common.params - validation_data_loader.quiet = 

loading instances: 5000it [00:11, 416.69it/s]


2023-05-06 16:06:29,423 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:06:29,427 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:06:29,432 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:06:29,434 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:06:29,435 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:06:29,436 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:06:29,437 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:06:29,438 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:06:29,439 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:06:29,440 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:06:29,441 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16

building vocab: 40000it [00:06, 6641.58it/s]


2023-05-06 16:06:35,682 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:06:35,687 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:06:35,689 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:06:35,691 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:06:35,695 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 65
2023-05-06 16:06:35,697 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:06:35,699 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:06:35,700 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:06:35,701 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:06:40,514 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:06:40,517 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:06:40,522 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9074, batch_loss: 0.0731, loss: 0.2479 ||: 100%|##########| 1250/1250 [00:18<00:00, 66.88it/s]

2023-05-06 16:06:59,073 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:06:59,087 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:06:59,093 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:06:59,098 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9228, batch_loss: 0.0563, loss: 0.1949 ||: 100%|##########| 157/157 [00:00<00:00, 174.56it/s]

2023-05-06 16:06:59,979 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:06:59,982 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.923
2023-05-06 16:06:59,984 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   794.397  |       N/A
2023-05-06 16:06:59,985 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.248  |     0.195
2023-05-06 16:06:59,987 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  30338.520  |       N/A





2023-05-06 16:07:00,245 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.868264
2023-05-06 16:07:00,260 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:56
2023-05-06 16:07:00,262 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:07:00,268 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 30G
2023-05-06 16:07:00,271 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 693M
2023-05-06 16:07:00,275 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9454, batch_loss: 0.2781, loss: 0.1422 ||: 100%|##########| 1250/1250 [00:18<00:00, 67.88it/s]

2023-05-06 16:07:18,697 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9308, batch_loss: 0.0139, loss: 0.1818 ||: 100%|##########| 157/157 [00:00<00:00, 187.18it/s]


2023-05-06 16:07:19,549 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:07:19,551 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.945  |     0.931
2023-05-06 16:07:19,553 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   693.049  |       N/A
2023-05-06 16:07:19,554 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.142  |     0.182
2023-05-06 16:07:19,556 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  30339.293  |       N/A
2023-05-06 16:07:19,778 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.515079
2023-05-06 16:07:19,784 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:36
2023-05-06 16:07:19,788 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:07:19,790 - INFO - allennlp.training.gradient_descent_t

accuracy: 0.9772, batch_loss: 0.0011, loss: 0.0658 ||: 100%|##########| 1250/1250 [00:18<00:00, 68.33it/s]

2023-05-06 16:07:38,096 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0066, loss: 0.2431 ||: 100%|##########| 157/157 [00:00<00:00, 187.98it/s]

2023-05-06 16:07:38,937 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 16:07:38,938 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.977  |     0.926
2023-05-06 16:07:38,947 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   693.173  |       N/A
2023-05-06 16:07:38,951 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.066  |     0.243
2023-05-06 16:07:38,952 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  30339.293  |       N/A
2023-05-06 16:07:39,141 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.352732
2023-05-06 16:07:39,151 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:16
2023-05-06 16:07:39,155 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:07:39,162 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 30G
2023-05-06 16:07:39,165 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9943, batch_loss: 0.0369, loss: 0.0194 ||: 100%|##########| 1250/1250 [00:18<00:00, 68.84it/s]

2023-05-06 16:07:57,334 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0051, loss: 0.2968 ||: 100%|##########| 157/157 [00:00<00:00, 189.62it/s]

2023-05-06 16:07:58,170 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:07:58,174 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.994  |     0.924
2023-05-06 16:07:58,176 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   692.989  |       N/A
2023-05-06 16:07:58,177 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.019  |     0.297
2023-05-06 16:07:58,179 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  30339.293  |       N/A





2023-05-06 16:07:58,441 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.286451
2023-05-06 16:07:58,445 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:56
2023-05-06 16:07:58,449 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:07:58,452 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 30G
2023-05-06 16:07:58,455 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 693M
2023-05-06 16:07:58,457 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9993, batch_loss: 0.0042, loss: 0.0041 ||: 100%|##########| 1250/1250 [00:18<00:00, 67.85it/s]

2023-05-06 16:08:16,887 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0013, loss: 0.3655 ||: 100%|##########| 157/157 [00:01<00:00, 150.76it/s]

2023-05-06 16:08:17,936 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:08:17,937 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.924
2023-05-06 16:08:17,939 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   693.159  |       N/A
2023-05-06 16:08:17,941 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.004  |     0.366
2023-05-06 16:08:17,942 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  30339.551  |       N/A





2023-05-06 16:08:18,180 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.730634
2023-05-06 16:08:18,187 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:08:18,213 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 30339.55078125,
  "peak_gpu_0_memory_MB": 794.3974609375,
  "training_duration": "0:01:37.558254",
  "epoch": 4,
  "training_accuracy": 0.999325,
  "training_loss": 0.0040829369927814696,
  "training_worker_0_memory_MB": 30339.55078125,
  "training_gpu_0_memory_MB": 693.15869140625,
  "validation_accuracy": 0.9238,
  "validation_loss": 0.3655270770463535,
  "best_validation_accuracy": 0.9308,
  "best_validation_loss": 0.18182825003483682
}
2023-05-06 16:08:18,215 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/39/model.tar.gz


[32m[I 2023-05-06 16:08:20,134][0m Trial 39 finished with value: 0.9308 and parameters: {'embedding_dim': 65, 'max_filter_size': 5, 'num_filters': 77}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 16:08:20,284 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:08:20,286 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:08:20,289 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:08:20,290 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:08:20,292 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:08:20,294 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:08:20,295 - INFO - allennlp.common.params - type = default
2023-05-06 16:08:20,297 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:08:20,299 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:08:20,302 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:08:20,303 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:08:20,304 - INFO - all

loading instances: 40000it [01:30, 443.55it/s]

2023-05-06 16:09:50,542 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:09:50,544 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:09:50,547 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:09:50,549 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:09:50,555 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:09:50,556 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:09:50,557 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:09:50,559 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:09:50,564 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:09:50,565 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 539.37it/s]

2023-05-06 16:09:59,844 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:09:59,846 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:09:59,848 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:09:59,850 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:09:59,853 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:09:59,854 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:09:59,855 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:09:59,858 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:09:59,859 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:09:59,860 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:09:59,862 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:03, 10825.56it/s]


2023-05-06 16:10:03,731 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:10:03,733 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:10:03,735 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:10:03,737 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:10:03,740 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 107
2023-05-06 16:10:03,746 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:10:03,748 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:10:03,749 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:10:03,750 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:10:10,510 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:10:10,511 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:10:10,517 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9035, batch_loss: 0.1075, loss: 0.2638 ||: 100%|##########| 1250/1250 [00:11<00:00, 105.53it/s]


2023-05-06 16:10:22,223 - INFO - allennlp.training.gradient_descent_trainer - Validating


  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:10:22,234 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:10:22,236 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:10:22,242 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9164, batch_loss: 0.0437, loss: 0.2074 ||: 100%|##########| 157/157 [00:00<00:00, 262.55it/s]

2023-05-06 16:10:22,829 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:10:22,831 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.904  |     0.916
2023-05-06 16:10:22,837 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   693.227  |       N/A
2023-05-06 16:10:22,838 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.264  |     0.207





2023-05-06 16:10:22,840 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31200.355  |       N/A
2023-05-06 16:10:23,262 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:12.902143
2023-05-06 16:10:23,263 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:52
2023-05-06 16:10:23,267 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:10:23,270 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 30G
2023-05-06 16:10:23,272 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 702M
2023-05-06 16:10:23,279 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9334, batch_loss: 0.1844, loss: 0.1704 ||: 100%|##########| 1250/1250 [00:12<00:00, 103.09it/s]


2023-05-06 16:10:35,415 - INFO - allennlp.training.gradient_descent_trainer - Validating


accuracy: 0.9202, batch_loss: 0.0245, loss: 0.1977 ||: 100%|##########| 157/157 [00:00<00:00, 276.70it/s]

2023-05-06 16:10:35,989 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:10:35,990 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.933  |     0.920
2023-05-06 16:10:35,994 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   701.541  |       N/A
2023-05-06 16:10:35,999 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.170  |     0.198
2023-05-06 16:10:36,003 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31200.355  |       N/A





2023-05-06 16:10:36,464 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:13.196399
2023-05-06 16:10:36,465 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:42
2023-05-06 16:10:36,468 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:10:36,469 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 30G
2023-05-06 16:10:36,471 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 702M
2023-05-06 16:10:36,473 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9614, batch_loss: 0.0200, loss: 0.1040 ||: 100%|##########| 1250/1250 [00:11<00:00, 105.47it/s]

2023-05-06 16:10:48,331 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9200, batch_loss: 0.0097, loss: 0.2335 ||: 100%|##########| 157/157 [00:00<00:00, 210.49it/s]

2023-05-06 16:10:49,084 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:10:49,086 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.961  |     0.920
2023-05-06 16:10:49,087 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   701.666  |       N/A
2023-05-06 16:10:49,089 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.104  |     0.233
2023-05-06 16:10:49,091 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31200.355  |       N/A





2023-05-06 16:10:49,674 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:13.206198
2023-05-06 16:10:49,677 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:30
2023-05-06 16:10:49,679 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:10:49,680 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 30G
2023-05-06 16:10:49,684 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 701M
2023-05-06 16:10:49,686 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9822, batch_loss: 0.0865, loss: 0.0535 ||: 100%|##########| 1250/1250 [00:11<00:00, 109.38it/s]

2023-05-06 16:11:01,120 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9194, batch_loss: 0.0025, loss: 0.2667 ||: 100%|##########| 157/157 [00:00<00:00, 243.44it/s]

2023-05-06 16:11:01,773 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:11:01,775 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.982  |     0.919
2023-05-06 16:11:01,777 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   701.481  |       N/A
2023-05-06 16:11:01,778 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.053  |     0.267
2023-05-06 16:11:01,780 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31200.355  |       N/A





2023-05-06 16:11:02,327 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:12.647962
2023-05-06 16:11:02,329 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:17
2023-05-06 16:11:02,331 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:11:02,334 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 30G
2023-05-06 16:11:02,336 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 702M
2023-05-06 16:11:02,338 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9937, batch_loss: 0.0291, loss: 0.0213 ||: 100%|##########| 1250/1250 [00:12<00:00, 103.94it/s]

2023-05-06 16:11:14,372 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9068, batch_loss: 0.0018, loss: 0.3097 ||: 100%|##########| 157/157 [00:00<00:00, 281.58it/s]

2023-05-06 16:11:14,937 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:11:14,939 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.994  |     0.907
2023-05-06 16:11:14,941 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   701.651  |       N/A
2023-05-06 16:11:14,942 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.021  |     0.310
2023-05-06 16:11:14,942 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31200.355  |       N/A





2023-05-06 16:11:15,421 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:13.089878
2023-05-06 16:11:15,423 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:11:15,478 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 31200.35546875,
  "peak_gpu_0_memory_MB": 701.66552734375,
  "training_duration": "0:01:04.576784",
  "epoch": 4,
  "training_accuracy": 0.9937,
  "training_loss": 0.021304220247035845,
  "training_worker_0_memory_MB": 31200.35546875,
  "training_gpu_0_memory_MB": 701.65087890625,
  "validation_accuracy": 0.9068,
  "validation_loss": 0.3097140628556798,
  "best_validation_accuracy": 0.9202,
  "best_validation_loss": 0.1976642285230433
}
2023-05-06 16:11:15,480 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/40/model.tar.gz


[32m[I 2023-05-06 16:11:17,686][0m Trial 40 finished with value: 0.9202 and parameters: {'embedding_dim': 107, 'max_filter_size': 2, 'num_filters': 108}. Best is trial 8 with value: 0.9336.[0m


2023-05-06 16:11:17,796 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:11:17,798 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:11:17,804 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:11:17,807 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:11:17,812 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:11:17,813 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:11:17,815 - INFO - allennlp.common.params - type = default
2023-05-06 16:11:17,816 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:11:17,822 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:11:17,822 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:11:17,823 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:11:17,824 - INFO - all

loading instances: 40000it [01:28, 453.33it/s]

2023-05-06 16:12:46,133 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:12:46,136 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:12:46,138 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:12:46,139 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:12:46,141 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:12:46,144 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:12:46,145 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:12:46,146 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None





2023-05-06 16:12:46,147 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:12:46,149 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 16:12:46,149 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 16:12:46,151 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:15, 332.36it/s]

2023-05-06 16:13:01,199 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:13:01,202 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:13:01,204 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:13:01,206 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:13:01,209 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:13:01,211 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:13:01,212 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:13:01,213 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:13:01,214 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:13:01,215 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:13:01,216 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:04, 9036.39it/s] 


2023-05-06 16:13:05,815 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:13:05,818 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:13:05,820 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:13:05,823 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:13:05,831 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 87
2023-05-06 16:13:05,833 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:13:05,835 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:13:05,836 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:13:05,838 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:13:10,919 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:13:10,921 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:13:10,926 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9052, batch_loss: 0.0717, loss: 0.2476 ||: 100%|##########| 1250/1250 [00:17<00:00, 69.74it/s]

2023-05-06 16:13:28,733 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:13:28,744 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:13:28,748 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:13:28,755 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9252, batch_loss: 0.0356, loss: 0.1872 ||: 100%|##########| 157/157 [00:00<00:00, 191.58it/s]

2023-05-06 16:13:29,559 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:13:29,567 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.905  |     0.925
2023-05-06 16:13:29,569 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   701.719  |       N/A
2023-05-06 16:13:29,571 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.248  |     0.187
2023-05-06 16:13:29,574 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31582.484  |       N/A





2023-05-06 16:13:29,947 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.145894
2023-05-06 16:13:29,948 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:48
2023-05-06 16:13:29,951 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:13:29,954 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 31G
2023-05-06 16:13:29,956 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 660M
2023-05-06 16:13:29,958 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9469, batch_loss: 0.1826, loss: 0.1365 ||: 100%|##########| 1250/1250 [00:18<00:00, 68.65it/s]

2023-05-06 16:13:48,172 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9342, batch_loss: 0.0186, loss: 0.1794 ||: 100%|##########| 157/157 [00:00<00:00, 197.16it/s]

2023-05-06 16:13:48,985 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:13:48,986 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.934
2023-05-06 16:13:48,989 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.222  |       N/A
2023-05-06 16:13:48,996 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.136  |     0.179
2023-05-06 16:13:48,998 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31648.961  |       N/A





2023-05-06 16:13:49,392 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.441138
2023-05-06 16:13:49,395 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:32
2023-05-06 16:13:49,397 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:13:49,399 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 31G
2023-05-06 16:13:49,400 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 660M
2023-05-06 16:13:49,402 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9786, batch_loss: 0.0066, loss: 0.0611 ||: 100%|##########| 1250/1250 [00:18<00:00, 69.10it/s]

2023-05-06 16:14:07,500 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9286, batch_loss: 0.0169, loss: 0.2261 ||: 100%|##########| 157/157 [00:00<00:00, 198.60it/s]

2023-05-06 16:14:08,303 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:14:08,305 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.979  |     0.929
2023-05-06 16:14:08,306 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.347  |       N/A
2023-05-06 16:14:08,308 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.061  |     0.226
2023-05-06 16:14:08,310 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31648.961  |       N/A





2023-05-06 16:14:08,693 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.296016
2023-05-06 16:14:08,695 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:14
2023-05-06 16:14:08,701 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:14:08,704 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 31G
2023-05-06 16:14:08,708 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 660M
2023-05-06 16:14:08,711 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9954, batch_loss: 0.0981, loss: 0.0172 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.63it/s]

2023-05-06 16:14:26,419 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9282, batch_loss: 0.0043, loss: 0.2805 ||: 100%|##########| 157/157 [00:00<00:00, 201.26it/s]

2023-05-06 16:14:27,211 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:14:27,213 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.928
2023-05-06 16:14:27,216 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.163  |       N/A
2023-05-06 16:14:27,217 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.017  |     0.280
2023-05-06 16:14:27,218 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31648.961  |       N/A





2023-05-06 16:14:27,586 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.885308
2023-05-06 16:14:27,590 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:54
2023-05-06 16:14:27,592 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:14:27,594 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 31G
2023-05-06 16:14:27,596 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 660M
2023-05-06 16:14:27,599 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0042, loss: 0.0033 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.18it/s]

2023-05-06 16:14:45,418 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0029, loss: 0.3426 ||: 100%|##########| 157/157 [00:00<00:00, 197.68it/s]

2023-05-06 16:14:46,219 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:14:46,221 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.926
2023-05-06 16:14:46,223 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.332  |       N/A
2023-05-06 16:14:46,224 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.343
2023-05-06 16:14:46,226 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  31648.961  |       N/A





2023-05-06 16:14:46,628 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.035830
2023-05-06 16:14:46,632 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:14:46,680 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 31648.9609375,
  "peak_gpu_0_memory_MB": 701.71875,
  "training_duration": "0:01:35.418067",
  "epoch": 4,
  "training_accuracy": 0.999625,
  "training_loss": 0.0033328230737301057,
  "training_worker_0_memory_MB": 31648.9609375,
  "training_gpu_0_memory_MB": 660.33203125,
  "validation_accuracy": 0.9258,
  "validation_loss": 0.3425561346641629,
  "best_validation_accuracy": 0.9342,
  "best_validation_loss": 0.1793539782238614
}
2023-05-06 16:14:46,687 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/41/model.tar.gz


[32m[I 2023-05-06 16:14:49,081][0m Trial 41 finished with value: 0.9342 and parameters: {'embedding_dim': 87, 'max_filter_size': 5, 'num_filters': 53}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:14:49,215 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:14:49,217 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:14:49,220 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:14:49,222 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:14:49,224 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:14:49,226 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:14:49,227 - INFO - allennlp.common.params - type = default
2023-05-06 16:14:49,230 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:14:49,232 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:14:49,233 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:14:49,233 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:14:49,235 - INFO - all

loading instances: 40000it [01:26, 463.63it/s]

2023-05-06 16:16:15,545 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:16:15,552 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:16:15,553 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:16:15,554 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:16:15,555 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:16:15,556 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:16:15,561 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:16:15,562 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:16:15,562 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:16:15,563 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:13, 380.86it/s]

2023-05-06 16:16:28,702 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:16:28,704 - INFO - allennlp.common.params - type = from_instances





2023-05-06 16:16:28,708 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:16:28,710 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:16:28,717 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:16:28,718 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:16:28,720 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:16:28,721 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:16:28,723 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:16:28,725 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:16:28,727 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16:16:28,728 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:03, 10339.41it/s]


2023-05-06 16:16:32,767 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:16:32,769 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:16:32,773 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:16:32,778 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:16:32,782 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 83
2023-05-06 16:16:32,783 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:16:32,787 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:16:32,788 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:16:32,790 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:16:40,207 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:16:40,210 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:16:40,218 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9078, batch_loss: 0.0634, loss: 0.2466 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.40it/s]

2023-05-06 16:16:57,799 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:16:57,813 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:16:57,815 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:16:57,820 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9234, batch_loss: 0.0405, loss: 0.1928 ||: 100%|##########| 157/157 [00:01<00:00, 146.57it/s]

2023-05-06 16:16:58,877 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:16:58,879 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.923
2023-05-06 16:16:58,881 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   660.400  |       N/A
2023-05-06 16:16:58,883 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.193
2023-05-06 16:16:58,884 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  32211.355  |       N/A





2023-05-06 16:16:59,244 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.225282
2023-05-06 16:16:59,247 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:49
2023-05-06 16:16:59,249 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:16:59,250 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 32G
2023-05-06 16:16:59,262 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 672M
2023-05-06 16:16:59,266 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9460, batch_loss: 0.1481, loss: 0.1378 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.64it/s]

2023-05-06 16:17:16,967 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9304, batch_loss: 0.0377, loss: 0.1845 ||: 100%|##########| 157/157 [00:01<00:00, 152.50it/s]

2023-05-06 16:17:18,004 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:17:18,006 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.946  |     0.930
2023-05-06 16:17:18,007 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   672.216  |       N/A
2023-05-06 16:17:18,009 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.138  |     0.185
2023-05-06 16:17:18,011 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  32267.559  |       N/A





2023-05-06 16:17:18,333 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.083708
2023-05-06 16:17:18,336 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:31
2023-05-06 16:17:18,344 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:17:18,348 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 32G
2023-05-06 16:17:18,356 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 672M
2023-05-06 16:17:18,367 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9783, batch_loss: 0.0026, loss: 0.0605 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.26it/s]

2023-05-06 16:17:36,166 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9272, batch_loss: 0.0556, loss: 0.2337 ||: 100%|##########| 157/157 [00:01<00:00, 148.95it/s]

2023-05-06 16:17:37,227 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:17:37,230 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.978  |     0.927
2023-05-06 16:17:37,231 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   672.340  |       N/A
2023-05-06 16:17:37,233 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.061  |     0.234





2023-05-06 16:17:37,235 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  32267.559  |       N/A
2023-05-06 16:17:37,563 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.219802
2023-05-06 16:17:37,566 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:13
2023-05-06 16:17:37,568 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:17:37,569 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 32G
2023-05-06 16:17:37,572 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 672M
2023-05-06 16:17:37,576 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9951, batch_loss: 0.0300, loss: 0.0167 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.47it/s]

2023-05-06 16:17:55,073 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9202, batch_loss: 0.1579, loss: 0.2826 ||: 100%|##########| 157/157 [00:01<00:00, 153.96it/s]

2023-05-06 16:17:56,099 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:17:56,100 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.920
2023-05-06 16:17:56,102 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   672.156  |       N/A
2023-05-06 16:17:56,104 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.017  |     0.283
2023-05-06 16:17:56,106 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  32267.559  |       N/A





2023-05-06 16:17:56,504 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.935966
2023-05-06 16:17:56,507 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:54
2023-05-06 16:17:56,511 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:17:56,515 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 32G
2023-05-06 16:17:56,522 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 672M
2023-05-06 16:17:56,523 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0026, loss: 0.0032 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.33it/s]

2023-05-06 16:18:14,053 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9248, batch_loss: 0.0919, loss: 0.3635 ||: 100%|##########| 157/157 [00:01<00:00, 153.81it/s]

2023-05-06 16:18:15,081 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:18:15,082 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 16:18:15,084 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   672.326  |       N/A
2023-05-06 16:18:15,086 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.364
2023-05-06 16:18:15,087 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  32267.559  |       N/A





2023-05-06 16:18:15,486 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.975015
2023-05-06 16:18:15,491 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:18:15,515 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 32267.55859375,
  "peak_gpu_0_memory_MB": 672.34033203125,
  "training_duration": "0:01:35.060843",
  "epoch": 4,
  "training_accuracy": 0.999625,
  "training_loss": 0.0032211916333413685,
  "training_worker_0_memory_MB": 32267.55859375,
  "training_gpu_0_memory_MB": 672.32568359375,
  "validation_accuracy": 0.9248,
  "validation_loss": 0.3635317509175296,
  "best_validation_accuracy": 0.9304,
  "best_validation_loss": 0.18454197968006325
}
2023-05-06 16:18:15,517 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/42/model.tar.gz


[32m[I 2023-05-06 16:18:17,627][0m Trial 42 finished with value: 0.9304 and parameters: {'embedding_dim': 83, 'max_filter_size': 5, 'num_filters': 59}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:18:17,731 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:18:17,733 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:18:17,735 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:18:17,737 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:18:17,739 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:18:17,740 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:18:17,744 - INFO - allennlp.common.params - type = default
2023-05-06 16:18:17,746 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:18:17,747 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:18:17,748 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:18:17,749 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:18:17,750 - INFO - all

loading instances: 40000it [01:32, 433.88it/s]

2023-05-06 16:19:49,995 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess





2023-05-06 16:19:49,998 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:19:49,999 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:19:50,002 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:19:50,003 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:19:50,006 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:19:50,007 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:19:50,009 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:19:50,010 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:19:50,011 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 16:19:50,012 - INFO - allennlp.common.params - validation_data_loader.quiet = 

loading instances: 5000it [00:13, 380.17it/s]

2023-05-06 16:20:03,170 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:20:03,172 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:20:03,174 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:20:03,177 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:20:03,179 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')





2023-05-06 16:20:03,181 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:20:03,183 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:20:03,184 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:20:03,185 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:20:03,186 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:20:03,187 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16:20:03,188 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:05, 7693.79it/s] 


2023-05-06 16:20:08,548 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:20:08,550 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:20:08,553 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:20:08,555 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:20:08,557 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 117
2023-05-06 16:20:08,558 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:20:08,560 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:20:08,562 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:20:08,563 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:20:13,528 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:20:13,529 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:20:13,536 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9053, batch_loss: 0.0667, loss: 0.2470 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.81it/s]

2023-05-06 16:20:33,644 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:20:33,662 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:20:33,665 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:20:33,670 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9238, batch_loss: 0.0710, loss: 0.1950 ||: 100%|##########| 157/157 [00:00<00:00, 190.29it/s]

2023-05-06 16:20:34,480 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:20:34,482 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.905  |     0.924
2023-05-06 16:20:34,484 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   672.394  |       N/A
2023-05-06 16:20:34,486 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.195
2023-05-06 16:20:34,488 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33017.996  |       N/A





2023-05-06 16:20:34,977 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.573636
2023-05-06 16:20:34,978 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:09
2023-05-06 16:20:34,991 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:20:34,993 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 32G
2023-05-06 16:20:34,998 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 782M
2023-05-06 16:20:35,005 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9466, batch_loss: 0.2237, loss: 0.1378 ||: 100%|##########| 1250/1250 [00:20<00:00, 60.37it/s]

2023-05-06 16:20:55,720 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9334, batch_loss: 0.0184, loss: 0.1827 ||: 100%|##########| 157/157 [00:00<00:00, 161.03it/s]

2023-05-06 16:20:56,703 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:20:56,706 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.933
2023-05-06 16:20:56,710 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   782.049  |       N/A
2023-05-06 16:20:56,713 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.138  |     0.183
2023-05-06 16:20:56,716 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33017.996  |       N/A





2023-05-06 16:20:57,354 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.362619
2023-05-06 16:20:57,356 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:53
2023-05-06 16:20:57,358 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:20:57,360 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 32G
2023-05-06 16:20:57,362 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 782M
2023-05-06 16:20:57,363 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9797, batch_loss: 0.0012, loss: 0.0580 ||: 100%|##########| 1250/1250 [00:20<00:00, 60.71it/s]

2023-05-06 16:21:17,957 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9284, batch_loss: 0.0044, loss: 0.2385 ||: 100%|##########| 157/157 [00:01<00:00, 145.39it/s]

2023-05-06 16:21:19,046 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:21:19,048 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.980  |     0.928
2023-05-06 16:21:19,051 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   782.049  |       N/A
2023-05-06 16:21:19,053 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.058  |     0.239
2023-05-06 16:21:19,056 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33017.996  |       N/A





2023-05-06 16:21:19,719 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.361068
2023-05-06 16:21:19,722 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:33
2023-05-06 16:21:19,725 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:21:19,727 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 32G
2023-05-06 16:21:19,729 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 782M
2023-05-06 16:21:19,733 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9959, batch_loss: 0.1431, loss: 0.0154 ||: 100%|##########| 1250/1250 [00:20<00:00, 60.99it/s]

2023-05-06 16:21:40,236 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9218, batch_loss: 0.0022, loss: 0.2858 ||: 100%|##########| 157/157 [00:00<00:00, 192.92it/s]

2023-05-06 16:21:41,057 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:21:41,059 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.922
2023-05-06 16:21:41,060 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   782.049  |       N/A
2023-05-06 16:21:41,062 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.015  |     0.286
2023-05-06 16:21:41,064 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33017.996  |       N/A





2023-05-06 16:21:41,517 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.792329
2023-05-06 16:21:41,518 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:11
2023-05-06 16:21:41,522 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:21:41,525 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 32G
2023-05-06 16:21:41,527 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 782M
2023-05-06 16:21:41,529 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0003, loss: 0.0028 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.27it/s]

2023-05-06 16:22:01,936 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9244, batch_loss: 0.0010, loss: 0.3382 ||: 100%|##########| 157/157 [00:00<00:00, 195.98it/s]

2023-05-06 16:22:02,746 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:22:02,748 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 16:22:02,755 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   782.049  |       N/A
2023-05-06 16:22:02,759 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.338
2023-05-06 16:22:02,761 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33017.996  |       N/A





2023-05-06 16:22:03,237 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.715051
2023-05-06 16:22:03,238 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:22:03,273 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 33017.99609375,
  "peak_gpu_0_memory_MB": 782.04931640625,
  "training_duration": "0:01:49.342509",
  "epoch": 4,
  "training_accuracy": 0.99955,
  "training_loss": 0.0028344995134626516,
  "training_worker_0_memory_MB": 33017.99609375,
  "training_gpu_0_memory_MB": 782.04931640625,
  "validation_accuracy": 0.9244,
  "validation_loss": 0.3381836037388795,
  "best_validation_accuracy": 0.9334,
  "best_validation_loss": 0.1827316168209265
}
2023-05-06 16:22:03,277 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/43/model.tar.gz


[32m[I 2023-05-06 16:22:05,683][0m Trial 43 finished with value: 0.9334 and parameters: {'embedding_dim': 117, 'max_filter_size': 5, 'num_filters': 45}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:22:05,796 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:22:05,798 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:22:05,801 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:22:05,808 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:22:05,809 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:22:05,811 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:22:05,814 - INFO - allennlp.common.params - type = default
2023-05-06 16:22:05,818 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:22:05,820 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:22:05,822 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:22:05,825 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:22:05,828 - INFO - all

loading instances: 40000it [01:33, 429.69it/s]

2023-05-06 16:23:38,992 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:23:38,995 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:23:38,999 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:23:39,002 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:23:39,003 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:23:39,005 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:23:39,006 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:23:39,007 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:23:39,008 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:23:39,009 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:10, 495.78it/s]

2023-05-06 16:23:49,100 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:23:49,103 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:23:49,109 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:23:49,111 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:23:49,113 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:23:49,114 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:23:49,118 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:23:49,120 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:23:49,121 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:23:49,122 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:23:49,123 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:04, 9831.78it/s]


2023-05-06 16:23:53,359 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:23:53,360 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:23:53,363 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:23:53,365 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:23:53,368 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 113
2023-05-06 16:23:53,370 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:23:53,372 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:23:53,373 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:23:53,375 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:23:58,264 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:23:58,266 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:23:58,274 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9035, batch_loss: 0.0783, loss: 0.2479 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.70it/s]

2023-05-06 16:24:17,753 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:24:17,764 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:24:17,766 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:24:17,771 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9246, batch_loss: 0.0354, loss: 0.1915 ||: 100%|##########| 157/157 [00:00<00:00, 192.29it/s]

2023-05-06 16:24:18,576 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:24:18,578 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.903  |     0.925
2023-05-06 16:24:18,580 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   782.049  |       N/A
2023-05-06 16:24:18,581 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.248  |     0.191
2023-05-06 16:24:18,583 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33671.824  |       N/A





2023-05-06 16:24:19,084 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.961326
2023-05-06 16:24:19,088 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:04
2023-05-06 16:24:19,090 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:24:19,092 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 33G
2023-05-06 16:24:19,094 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 751M
2023-05-06 16:24:19,098 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9473, batch_loss: 0.2405, loss: 0.1357 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.01it/s]

2023-05-06 16:24:38,944 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9300, batch_loss: 0.0150, loss: 0.1783 ||: 100%|##########| 157/157 [00:00<00:00, 177.00it/s]

2023-05-06 16:24:39,839 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:24:39,842 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.930
2023-05-06 16:24:39,845 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   750.859  |       N/A
2023-05-06 16:24:39,847 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.136  |     0.178
2023-05-06 16:24:39,848 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33671.824  |       N/A





2023-05-06 16:24:40,391 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.300855
2023-05-06 16:24:40,394 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:46
2023-05-06 16:24:40,400 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:24:40,404 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 33G
2023-05-06 16:24:40,405 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 751M
2023-05-06 16:24:40,408 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9811, batch_loss: 0.0041, loss: 0.0554 ||: 100%|##########| 1250/1250 [00:19<00:00, 62.71it/s]

2023-05-06 16:25:00,347 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0041, loss: 0.2199 ||: 100%|##########| 157/157 [00:01<00:00, 152.73it/s]

2023-05-06 16:25:01,385 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:25:01,387 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.926
2023-05-06 16:25:01,389 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   750.859  |       N/A
2023-05-06 16:25:01,391 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.055  |     0.220
2023-05-06 16:25:01,392 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33671.824  |       N/A





2023-05-06 16:25:01,963 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.562130
2023-05-06 16:25:01,970 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:27
2023-05-06 16:25:01,971 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:25:01,973 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 33G
2023-05-06 16:25:01,976 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 751M
2023-05-06 16:25:01,978 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9968, batch_loss: 0.0643, loss: 0.0138 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.53it/s]

2023-05-06 16:25:21,664 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0006, loss: 0.2675 ||: 100%|##########| 157/157 [00:01<00:00, 155.28it/s]

2023-05-06 16:25:22,681 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:25:22,688 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.924
2023-05-06 16:25:22,691 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   750.859  |       N/A
2023-05-06 16:25:22,693 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.014  |     0.267
2023-05-06 16:25:22,694 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33671.824  |       N/A





2023-05-06 16:25:23,282 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.310442
2023-05-06 16:25:23,284 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:06
2023-05-06 16:25:23,290 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:25:23,292 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 33G
2023-05-06 16:25:23,296 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 751M
2023-05-06 16:25:23,297 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0009, loss: 0.0023 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.12it/s]

2023-05-06 16:25:43,106 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9278, batch_loss: 0.0002, loss: 0.3251 ||: 100%|##########| 157/157 [00:00<00:00, 200.40it/s]

2023-05-06 16:25:43,897 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:25:43,899 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.928
2023-05-06 16:25:43,901 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   750.859  |       N/A
2023-05-06 16:25:43,903 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.325
2023-05-06 16:25:43,904 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  33671.824  |       N/A





2023-05-06 16:25:44,373 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.083322
2023-05-06 16:25:44,374 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:25:44,405 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 33671.82421875,
  "peak_gpu_0_memory_MB": 782.04931640625,
  "training_duration": "0:01:45.774634",
  "epoch": 4,
  "training_accuracy": 0.9998,
  "training_loss": 0.0023217178061197045,
  "training_worker_0_memory_MB": 33671.82421875,
  "training_gpu_0_memory_MB": 750.859375,
  "validation_accuracy": 0.9278,
  "validation_loss": 0.3251263658752647,
  "best_validation_accuracy": 0.93,
  "best_validation_loss": 0.17826439109245304
}
2023-05-06 16:25:44,407 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/44/model.tar.gz


[32m[I 2023-05-06 16:25:46,803][0m Trial 44 finished with value: 0.93 and parameters: {'embedding_dim': 113, 'max_filter_size': 5, 'num_filters': 42}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:25:46,906 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:25:46,909 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:25:46,911 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:25:46,913 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:25:46,915 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:25:46,917 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:25:46,919 - INFO - allennlp.common.params - type = default
2023-05-06 16:25:46,922 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:25:46,924 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:25:46,925 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:25:46,925 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:25:46,927 - INFO - all

loading instances: 40000it [01:30, 441.58it/s]

2023-05-06 16:27:17,561 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:27:17,567 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:27:17,571 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:27:17,573 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:27:17,577 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:27:17,578 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:27:17,580 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:27:17,583 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:27:17,584 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:27:17,585 - INFO - allennlp.common.params - validation_data_loader.cuda_de




2023-05-06 16:27:17,591 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:07, 629.07it/s]

2023-05-06 16:27:25,546 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:27:25,549 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:27:25,551 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:27:25,552 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:27:25,554 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:27:25,556 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:27:25,558 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:27:25,558 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:27:25,560 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:27:25,560 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:27:25,561 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:06, 6340.25it/s]


2023-05-06 16:27:32,089 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:27:32,091 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:27:32,093 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:27:32,095 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:27:32,097 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 55
2023-05-06 16:27:32,099 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:27:32,101 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:27:32,103 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:27:32,104 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:27:37,421 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:27:37,423 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:27:37,429 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9085, batch_loss: 0.0795, loss: 0.2479 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.97it/s]

2023-05-06 16:27:54,668 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:27:54,680 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:27:54,682 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:27:54,687 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9226, batch_loss: 0.0582, loss: 0.1986 ||: 100%|##########| 157/157 [00:00<00:00, 175.98it/s]

2023-05-06 16:27:55,567 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:27:55,570 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.923
2023-05-06 16:27:55,572 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   750.859  |       N/A
2023-05-06 16:27:55,573 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.248  |     0.199
2023-05-06 16:27:55,580 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34144.492  |       N/A





2023-05-06 16:27:55,761 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.470605
2023-05-06 16:27:55,763 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:44
2023-05-06 16:27:55,765 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:27:55,769 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 33G
2023-05-06 16:27:55,771 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 742M
2023-05-06 16:27:55,772 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9450, batch_loss: 0.1997, loss: 0.1403 ||: 100%|##########| 1250/1250 [00:17<00:00, 72.31it/s]

2023-05-06 16:28:13,065 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9280, batch_loss: 0.0129, loss: 0.1840 ||: 100%|##########| 157/157 [00:00<00:00, 182.29it/s]

2023-05-06 16:28:13,934 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:28:13,936 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.945  |     0.928
2023-05-06 16:28:13,939 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   741.953  |       N/A
2023-05-06 16:28:13,941 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.140  |     0.184
2023-05-06 16:28:13,943 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34164.852  |       N/A





2023-05-06 16:28:14,095 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.330142
2023-05-06 16:28:14,097 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:26
2023-05-06 16:28:14,099 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:28:14,100 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 33G
2023-05-06 16:28:14,102 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 742M
2023-05-06 16:28:14,104 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9770, batch_loss: 0.0029, loss: 0.0638 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.21it/s]

2023-05-06 16:28:31,662 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0082, loss: 0.2353 ||: 100%|##########| 157/157 [00:00<00:00, 182.13it/s]

2023-05-06 16:28:32,537 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:28:32,541 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.977  |     0.924
2023-05-06 16:28:32,542 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   742.077  |       N/A
2023-05-06 16:28:32,544 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.064  |     0.235
2023-05-06 16:28:32,545 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34164.852  |       N/A





2023-05-06 16:28:32,717 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.618272
2023-05-06 16:28:32,720 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:08
2023-05-06 16:28:32,721 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:28:32,723 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 33G
2023-05-06 16:28:32,725 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 742M
2023-05-06 16:28:32,727 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9945, batch_loss: 0.0834, loss: 0.0195 ||: 100%|##########| 1250/1250 [00:17<00:00, 72.43it/s]

2023-05-06 16:28:49,990 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9212, batch_loss: 0.0018, loss: 0.2874 ||: 100%|##########| 157/157 [00:00<00:00, 183.11it/s]

2023-05-06 16:28:50,859 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:28:50,860 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.921
2023-05-06 16:28:50,862 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   741.893  |       N/A
2023-05-06 16:28:50,864 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.020  |     0.287
2023-05-06 16:28:50,866 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34164.852  |       N/A





2023-05-06 16:28:51,025 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.303655
2023-05-06 16:28:51,027 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:50
2023-05-06 16:28:51,029 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:28:51,031 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 33G
2023-05-06 16:28:51,033 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 742M
2023-05-06 16:28:51,035 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9992, batch_loss: 0.0037, loss: 0.0042 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.59it/s]

2023-05-06 16:29:08,501 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9220, batch_loss: 0.0005, loss: 0.3727 ||: 100%|##########| 157/157 [00:00<00:00, 181.19it/s]

2023-05-06 16:29:09,376 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:29:09,378 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.922
2023-05-06 16:29:09,380 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   742.062  |       N/A
2023-05-06 16:29:09,381 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.004  |     0.373
2023-05-06 16:29:09,382 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34164.852  |       N/A





2023-05-06 16:29:09,555 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.526248
2023-05-06 16:29:09,557 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:29:09,575 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 34164.8515625,
  "peak_gpu_0_memory_MB": 750.859375,
  "training_duration": "0:01:32.085274",
  "epoch": 4,
  "training_accuracy": 0.999175,
  "training_loss": 0.004218071251473157,
  "training_worker_0_memory_MB": 34164.8515625,
  "training_gpu_0_memory_MB": 742.0625,
  "validation_accuracy": 0.922,
  "validation_loss": 0.37272666876944327,
  "best_validation_accuracy": 0.928,
  "best_validation_loss": 0.18398432217087526
}
2023-05-06 16:29:09,577 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/45/model.tar.gz


[32m[I 2023-05-06 16:29:10,851][0m Trial 45 finished with value: 0.928 and parameters: {'embedding_dim': 55, 'max_filter_size': 5, 'num_filters': 91}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:29:10,951 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:29:10,953 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:29:10,955 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:29:10,961 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:29:10,962 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:29:10,963 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:29:10,964 - INFO - allennlp.common.params - type = default
2023-05-06 16:29:10,966 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:29:10,967 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:29:10,969 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:29:10,972 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:29:10,973 - INFO - all

loading instances: 40000it [01:28, 449.94it/s]

2023-05-06 16:30:39,918 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:30:39,921 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:30:39,927 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:30:39,929 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:30:39,930 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:30:39,932 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:30:39,934 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:30:39,935 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:30:39,937 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:30:39,939 - INFO - allennlp.common.params - validation_data_loader.cuda_de




2023-05-06 16:30:39,942 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 16:30:39,945 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:08, 590.09it/s]

2023-05-06 16:30:48,428 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:30:48,431 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:30:48,433 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:30:48,437 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:30:48,439 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:30:48,440 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:30:48,442 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:30:48,443 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:30:48,445 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:30:48,446 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:30:48,447 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:06, 6131.04it/s]


2023-05-06 16:30:55,190 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:30:55,194 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:30:55,200 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:30:55,202 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:30:55,205 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 78
2023-05-06 16:30:55,207 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:30:55,210 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:30:55,212 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:30:55,213 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:31:00,450 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:31:00,452 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:31:00,457 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9025, batch_loss: 0.0907, loss: 0.2546 ||: 100%|##########| 1250/1250 [00:13<00:00, 90.21it/s]

2023-05-06 16:31:14,194 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:31:14,209 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:31:14,213 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:31:14,220 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9256, batch_loss: 0.0540, loss: 0.1925 ||: 100%|##########| 157/157 [00:01<00:00, 156.92it/s]

2023-05-06 16:31:15,204 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:31:15,206 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.902  |     0.926
2023-05-06 16:31:15,208 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   742.130  |       N/A
2023-05-06 16:31:15,210 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.255  |     0.192
2023-05-06 16:31:15,212 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34791.059  |       N/A





2023-05-06 16:31:15,525 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.194016
2023-05-06 16:31:15,527 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:13
2023-05-06 16:31:15,530 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:31:15,531 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 34G
2023-05-06 16:31:15,533 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 550M
2023-05-06 16:31:15,534 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9416, batch_loss: 0.2628, loss: 0.1480 ||: 100%|##########| 1250/1250 [00:13<00:00, 93.87it/s]

2023-05-06 16:31:28,868 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9310, batch_loss: 0.0211, loss: 0.1782 ||: 100%|##########| 157/157 [00:00<00:00, 189.46it/s]

2023-05-06 16:31:29,704 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:31:29,705 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.942  |     0.931
2023-05-06 16:31:29,707 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   550.270  |       N/A
2023-05-06 16:31:29,710 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.148  |     0.178
2023-05-06 16:31:29,712 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34799.824  |       N/A





2023-05-06 16:31:30,009 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.478887
2023-05-06 16:31:30,012 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:57
2023-05-06 16:31:30,014 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:31:30,016 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 34G
2023-05-06 16:31:30,018 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 550M
2023-05-06 16:31:30,020 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9733, batch_loss: 0.0143, loss: 0.0751 ||: 100%|##########| 1250/1250 [00:13<00:00, 90.10it/s]

2023-05-06 16:31:43,898 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9290, batch_loss: 0.0125, loss: 0.2164 ||: 100%|##########| 157/157 [00:00<00:00, 242.46it/s]

2023-05-06 16:31:44,552 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:31:44,554 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.973  |     0.929
2023-05-06 16:31:44,556 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   550.395  |       N/A
2023-05-06 16:31:44,558 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.075  |     0.216
2023-05-06 16:31:44,560 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34799.957  |       N/A





2023-05-06 16:31:44,894 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.879972
2023-05-06 16:31:44,895 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:43
2023-05-06 16:31:44,897 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:31:44,902 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 34G
2023-05-06 16:31:44,905 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 550M
2023-05-06 16:31:44,906 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9921, batch_loss: 0.1358, loss: 0.0266 ||: 100%|##########| 1250/1250 [00:13<00:00, 90.77it/s] 

2023-05-06 16:31:58,687 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0028, loss: 0.2583 ||: 100%|##########| 157/157 [00:00<00:00, 238.53it/s]

2023-05-06 16:31:59,355 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:31:59,357 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.992  |     0.923
2023-05-06 16:31:59,360 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   550.210  |       N/A
2023-05-06 16:31:59,362 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.027  |     0.258
2023-05-06 16:31:59,363 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34799.957  |       N/A





2023-05-06 16:31:59,671 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.774094
2023-05-06 16:31:59,673 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:28
2023-05-06 16:31:59,680 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:31:59,682 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 34G
2023-05-06 16:31:59,684 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 550M
2023-05-06 16:31:59,686 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9988, batch_loss: 0.0032, loss: 0.0068 ||: 100%|##########| 1250/1250 [00:13<00:00, 90.43it/s]

2023-05-06 16:32:13,520 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9196, batch_loss: 0.1086, loss: 0.2939 ||: 100%|##########| 157/157 [00:00<00:00, 246.86it/s]

2023-05-06 16:32:14,170 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:32:14,172 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.920
2023-05-06 16:32:14,173 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   550.380  |       N/A
2023-05-06 16:32:14,175 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.007  |     0.294
2023-05-06 16:32:14,176 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  34799.957  |       N/A





2023-05-06 16:32:14,465 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:14.784937
2023-05-06 16:32:14,468 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:32:14,492 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 34799.95703125,
  "peak_gpu_0_memory_MB": 742.13037109375,
  "training_duration": "0:01:13.838885",
  "epoch": 4,
  "training_accuracy": 0.9988,
  "training_loss": 0.006849856099576573,
  "training_worker_0_memory_MB": 34799.95703125,
  "training_gpu_0_memory_MB": 550.3798828125,
  "validation_accuracy": 0.9196,
  "validation_loss": 0.29390747843838183,
  "best_validation_accuracy": 0.931,
  "best_validation_loss": 0.17818958569721433
}
2023-05-06 16:32:14,494 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/46/model.tar.gz


[32m[I 2023-05-06 16:32:16,137][0m Trial 46 finished with value: 0.931 and parameters: {'embedding_dim': 78, 'max_filter_size': 4, 'num_filters': 49}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:32:16,239 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:32:16,240 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:32:16,245 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:32:16,247 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:32:16,249 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:32:16,250 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:32:16,252 - INFO - allennlp.common.params - type = default
2023-05-06 16:32:16,253 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:32:16,255 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:32:16,256 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:32:16,257 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:32:16,258 - INFO - all

loading instances: 40000it [01:31, 435.07it/s]

2023-05-06 16:33:48,233 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:33:48,236 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:33:48,241 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:33:48,243 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:33:48,246 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:33:48,247 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:33:48,250 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:33:48,251 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:33:48,256 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:33:48,258 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:08, 604.55it/s]

2023-05-06 16:33:56,539 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:33:56,541 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:33:56,545 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:33:56,547 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:33:56,548 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:33:56,550 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:33:56,552 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:33:56,553 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:33:56,554 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:33:56,554 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:33:56,555 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:06, 6079.41it/s]


2023-05-06 16:34:03,419 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:34:03,423 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:34:03,428 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:34:03,430 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:34:03,431 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 68
2023-05-06 16:34:03,433 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:34:03,435 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:34:03,437 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:34:03,440 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:34:09,708 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:34:09,710 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:34:09,715 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9081, batch_loss: 0.0536, loss: 0.2437 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.33it/s]

2023-05-06 16:34:36,560 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:34:36,581 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:34:36,587 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:34:36,597 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9202, batch_loss: 0.0374, loss: 0.2004 ||: 100%|##########| 157/157 [00:01<00:00, 110.49it/s]

2023-05-06 16:34:37,991 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:34:37,992 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.920
2023-05-06 16:34:37,995 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   550.448  |       N/A
2023-05-06 16:34:37,999 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.244  |     0.200
2023-05-06 16:34:38,000 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  35425.500  |       N/A





2023-05-06 16:34:38,277 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.705996
2023-05-06 16:34:38,282 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:15
2023-05-06 16:34:38,283 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:34:38,285 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 35G
2023-05-06 16:34:38,288 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 16:34:38,290 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9483, batch_loss: 0.2083, loss: 0.1350 ||: 100%|##########| 1250/1250 [00:27<00:00, 45.45it/s]

2023-05-06 16:35:05,797 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9310, batch_loss: 0.0180, loss: 0.1761 ||: 100%|##########| 157/157 [00:01<00:00, 127.70it/s]

2023-05-06 16:35:07,033 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:35:07,034 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.948  |     0.931
2023-05-06 16:35:07,040 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1310.547  |       N/A
2023-05-06 16:35:07,042 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.135  |     0.176
2023-05-06 16:35:07,043 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  35448.445  |       N/A





2023-05-06 16:35:07,326 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.043577
2023-05-06 16:35:07,330 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:49
2023-05-06 16:35:07,331 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:35:07,336 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 35G
2023-05-06 16:35:07,338 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 16:35:07,342 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9806, batch_loss: 0.0015, loss: 0.0547 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.36it/s]


2023-05-06 16:35:34,311 - INFO - allennlp.training.gradient_descent_trainer - Validating


accuracy: 0.9300, batch_loss: 0.0019, loss: 0.2416 ||: 100%|##########| 157/157 [00:01<00:00, 110.99it/s]

2023-05-06 16:35:35,736 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 16:35:35,738 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.930
2023-05-06 16:35:35,740 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1310.671  |       N/A
2023-05-06 16:35:35,741 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.055  |     0.242
2023-05-06 16:35:35,744 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  35448.445  |       N/A
2023-05-06 16:35:36,035 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.703428
2023-05-06 16:35:36,040 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:21
2023-05-06 16:35:36,041 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:35:36,043 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 35G
2023-05-06 16:35:36,045 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9962, batch_loss: 0.0372, loss: 0.0134 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.56it/s]

2023-05-06 16:36:02,909 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0024, loss: 0.3038 ||: 100%|##########| 157/157 [00:01<00:00, 127.01it/s]

2023-05-06 16:36:04,151 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:36:04,155 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.925
2023-05-06 16:36:04,156 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1310.487  |       N/A
2023-05-06 16:36:04,157 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.013  |     0.304
2023-05-06 16:36:04,159 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  35448.445  |       N/A





2023-05-06 16:36:04,412 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.370632
2023-05-06 16:36:04,418 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:51
2023-05-06 16:36:04,422 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:36:04,423 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 35G
2023-05-06 16:36:04,425 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 16:36:04,427 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0022, loss: 0.0026 ||: 100%|##########| 1250/1250 [00:27<00:00, 45.81it/s]

2023-05-06 16:36:31,721 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0005, loss: 0.3736 ||: 100%|##########| 157/157 [00:01<00:00, 102.43it/s]

2023-05-06 16:36:33,261 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:36:33,263 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 16:36:33,265 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1310.657  |       N/A
2023-05-06 16:36:33,267 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.374
2023-05-06 16:36:33,268 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  35448.445  |       N/A





2023-05-06 16:36:33,627 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.205158
2023-05-06 16:36:33,631 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:36:33,659 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 35448.4453125,
  "peak_gpu_0_memory_MB": 1310.67138671875,
  "training_duration": "0:02:23.689209",
  "epoch": 4,
  "training_accuracy": 0.999575,
  "training_loss": 0.0025657311948001733,
  "training_worker_0_memory_MB": 35448.4453125,
  "training_gpu_0_memory_MB": 1310.65673828125,
  "validation_accuracy": 0.9254,
  "validation_loss": 0.3736191365536403,
  "best_validation_accuracy": 0.931,
  "best_validation_loss": 0.17606392458887996
}
2023-05-06 16:36:33,664 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/47/model.tar.gz


[32m[I 2023-05-06 16:36:35,369][0m Trial 47 finished with value: 0.931 and parameters: {'embedding_dim': 68, 'max_filter_size': 5, 'num_filters': 178}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:36:35,473 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:36:35,474 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:36:35,482 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:36:35,485 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:36:35,486 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:36:35,488 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:36:35,489 - INFO - allennlp.common.params - type = default
2023-05-06 16:36:35,491 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:36:35,493 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:36:35,494 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:36:35,495 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:36:35,496 - INFO - all

loading instances: 40000it [01:29, 447.28it/s]

2023-05-06 16:38:04,967 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:38:04,969 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:38:04,971 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:38:04,973 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:38:04,975 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:38:04,978 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:38:04,979 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:38:04,981 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:38:04,982 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:38:04,983 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:07, 637.00it/s]

2023-05-06 16:38:12,838 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:38:12,841 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:38:12,843 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:38:12,845 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:38:12,847 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:38:12,848 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:38:12,849 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:38:12,850 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:38:12,852 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:38:12,852 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:38:12,853 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:04, 9007.79it/s]


2023-05-06 16:38:17,513 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:38:17,516 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:38:17,518 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:38:17,520 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:38:17,522 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 101
2023-05-06 16:38:17,524 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:38:17,525 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:38:17,526 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:38:17,527 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:38:24,703 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:38:24,705 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:38:24,710 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9010, batch_loss: 0.0827, loss: 0.2546 ||: 100%|##########| 1250/1250 [00:13<00:00, 91.67it/s]

2023-05-06 16:38:38,230 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:38:38,239 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:38:38,241 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:38:38,246 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9242, batch_loss: 0.0296, loss: 0.1986 ||: 100%|##########| 157/157 [00:00<00:00, 179.76it/s]

2023-05-06 16:38:39,109 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:38:39,111 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.901  |     0.924
2023-05-06 16:38:39,114 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1310.725  |       N/A
2023-05-06 16:38:39,116 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.255  |     0.199
2023-05-06 16:38:39,117 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36084.391  |       N/A





2023-05-06 16:38:39,590 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.004694
2023-05-06 16:38:39,592 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:10
2023-05-06 16:38:39,594 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:38:39,595 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 35G
2023-05-06 16:38:39,598 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 622M
2023-05-06 16:38:39,600 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9419, batch_loss: 0.2608, loss: 0.1486 ||: 100%|##########| 1250/1250 [00:13<00:00, 90.89it/s]

2023-05-06 16:38:53,359 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9316, batch_loss: 0.0163, loss: 0.1828 ||: 100%|##########| 157/157 [00:00<00:00, 200.05it/s]

2023-05-06 16:38:54,155 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:38:54,157 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.942  |     0.932
2023-05-06 16:38:54,158 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   622.392  |       N/A
2023-05-06 16:38:54,160 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.149  |     0.183
2023-05-06 16:38:54,162 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36122.672  |       N/A





2023-05-06 16:38:54,661 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.066949
2023-05-06 16:38:54,665 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:58
2023-05-06 16:38:54,669 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:38:54,672 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 35G
2023-05-06 16:38:54,681 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 622M
2023-05-06 16:38:54,683 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9730, batch_loss: 0.0064, loss: 0.0761 ||: 100%|##########| 1250/1250 [00:14<00:00, 87.94it/s]

2023-05-06 16:39:08,902 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9292, batch_loss: 0.0246, loss: 0.2126 ||: 100%|##########| 157/157 [00:00<00:00, 253.34it/s]

2023-05-06 16:39:09,528 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:39:09,533 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.973  |     0.929
2023-05-06 16:39:09,534 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   622.392  |       N/A
2023-05-06 16:39:09,535 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.076  |     0.213
2023-05-06 16:39:09,536 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36122.672  |       N/A





2023-05-06 16:39:09,954 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.285222
2023-05-06 16:39:09,955 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:44
2023-05-06 16:39:09,960 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:39:09,963 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 35G
2023-05-06 16:39:09,965 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 622M
2023-05-06 16:39:09,970 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9916, batch_loss: 0.1465, loss: 0.0279 ||: 100%|##########| 1250/1250 [00:14<00:00, 87.76it/s]

2023-05-06 16:39:24,219 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9272, batch_loss: 0.0154, loss: 0.2598 ||: 100%|##########| 157/157 [00:00<00:00, 247.23it/s]

2023-05-06 16:39:24,865 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:39:24,866 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.992  |     0.927
2023-05-06 16:39:24,868 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   622.392  |       N/A
2023-05-06 16:39:24,869 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.028  |     0.260
2023-05-06 16:39:24,873 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36122.672  |       N/A





2023-05-06 16:39:25,321 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.361017
2023-05-06 16:39:25,322 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:30
2023-05-06 16:39:25,324 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:39:25,330 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 35G
2023-05-06 16:39:25,334 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 622M
2023-05-06 16:39:25,337 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9984, batch_loss: 0.0159, loss: 0.0077 ||: 100%|##########| 1250/1250 [00:13<00:00, 89.36it/s]

2023-05-06 16:39:39,332 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9198, batch_loss: 0.0221, loss: 0.2934 ||: 100%|##########| 157/157 [00:00<00:00, 185.41it/s]

2023-05-06 16:39:40,186 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:39:40,188 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.920
2023-05-06 16:39:40,189 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   622.392  |       N/A
2023-05-06 16:39:40,191 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.008  |     0.293
2023-05-06 16:39:40,193 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36122.672  |       N/A





2023-05-06 16:39:40,667 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:15.342892
2023-05-06 16:39:40,675 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:39:40,736 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 36122.671875,
  "peak_gpu_0_memory_MB": 1310.724609375,
  "training_duration": "0:01:15.599695",
  "epoch": 4,
  "training_accuracy": 0.9984,
  "training_loss": 0.007676932953082724,
  "training_worker_0_memory_MB": 36122.671875,
  "training_gpu_0_memory_MB": 622.39208984375,
  "validation_accuracy": 0.9198,
  "validation_loss": 0.29342388593040075,
  "best_validation_accuracy": 0.9316,
  "best_validation_loss": 0.1828214806878263
}
2023-05-06 16:39:40,745 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/48/model.tar.gz


[32m[I 2023-05-06 16:39:42,912][0m Trial 48 finished with value: 0.9316 and parameters: {'embedding_dim': 101, 'max_filter_size': 4, 'num_filters': 32}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:39:43,023 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:39:43,025 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:39:43,028 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:39:43,034 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:39:43,035 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:39:43,037 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:39:43,038 - INFO - allennlp.common.params - type = default
2023-05-06 16:39:43,041 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:39:43,043 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:39:43,047 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:39:43,048 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:39:43,050 - INFO - all

loading instances: 40000it [01:26, 461.93it/s]

2023-05-06 16:41:09,706 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:41:09,708 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:41:09,709 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:41:09,712 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:41:09,714 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:41:09,716 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:41:09,719 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:41:09,720 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:41:09,722 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:41:09,725 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:13, 381.90it/s]

2023-05-06 16:41:22,826 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:41:22,829 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:41:22,831 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:41:22,833 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:41:22,835 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:41:22,837 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:41:22,838 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:41:22,840 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:41:22,841 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:41:22,842 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:41:22,843 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:05, 6973.44it/s]


2023-05-06 16:41:28,802 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:41:28,808 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:41:28,811 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:41:28,813 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:41:28,816 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 92
2023-05-06 16:41:28,818 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:41:28,820 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:41:28,821 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:41:28,822 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:41:35,190 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:41:35,192 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:41:35,199 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9099, batch_loss: 0.0597, loss: 0.2405 ||: 100%|##########| 1250/1250 [00:33<00:00, 37.64it/s]

2023-05-06 16:42:08,276 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:42:08,294 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:42:08,295 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:42:08,301 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9232, batch_loss: 0.0349, loss: 0.2012 ||: 100%|##########| 157/157 [00:01<00:00, 91.09it/s]

2023-05-06 16:42:10,006 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:42:10,008 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.923





2023-05-06 16:42:10,010 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   622.392  |       N/A
2023-05-06 16:42:10,012 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.201
2023-05-06 16:42:10,013 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36748.770  |       N/A
2023-05-06 16:42:10,529 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.488565
2023-05-06 16:42:10,530 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:14
2023-05-06 16:42:10,533 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:42:10,535 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 36G
2023-05-06 16:42:10,537 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 16:42:10,539 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9510, batch_loss: 0.2343, loss: 0.1289 ||: 100%|##########| 1250/1250 [00:33<00:00, 37.63it/s]

2023-05-06 16:42:43,762 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9310, batch_loss: 0.0086, loss: 0.1931 ||: 100%|##########| 157/157 [00:01<00:00, 90.60it/s]

2023-05-06 16:42:45,502 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:42:45,504 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.931
2023-05-06 16:42:45,506 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1527.826  |       N/A
2023-05-06 16:42:45,508 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.129  |     0.193
2023-05-06 16:42:45,509 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36783.980  |       N/A





2023-05-06 16:42:46,012 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.478749
2023-05-06 16:42:46,016 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:41
2023-05-06 16:42:46,019 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:42:46,020 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 36G
2023-05-06 16:42:46,023 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 16:42:46,025 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9839, batch_loss: 0.0025, loss: 0.0465 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.02it/s]

2023-05-06 16:43:18,911 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9292, batch_loss: 0.0011, loss: 0.2501 ||: 100%|##########| 157/157 [00:01<00:00, 90.98it/s]

2023-05-06 16:43:20,646 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:43:20,648 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.929
2023-05-06 16:43:20,650 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1527.950  |       N/A
2023-05-06 16:43:20,655 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.046  |     0.250
2023-05-06 16:43:20,657 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36784.168  |       N/A





2023-05-06 16:43:21,220 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.201383
2023-05-06 16:43:21,224 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:06
2023-05-06 16:43:21,227 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:43:21,229 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 36G
2023-05-06 16:43:21,234 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 16:43:21,235 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.0475, loss: 0.0097 ||: 100%|##########| 1250/1250 [00:32<00:00, 37.88it/s]

2023-05-06 16:43:54,239 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9276, batch_loss: 0.0002, loss: 0.3615 ||: 100%|##########| 157/157 [00:01<00:00, 91.30it/s]

2023-05-06 16:43:55,966 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:43:55,970 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.928
2023-05-06 16:43:55,972 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1527.766  |       N/A
2023-05-06 16:43:55,974 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.361
2023-05-06 16:43:55,976 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36784.168  |       N/A





2023-05-06 16:43:56,573 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.345413
2023-05-06 16:43:56,575 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:31
2023-05-06 16:43:56,577 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:43:56,579 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 36G
2023-05-06 16:43:56,581 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 16:43:56,583 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0004, loss: 0.0018 ||: 100%|##########| 1250/1250 [00:33<00:00, 37.42it/s]

2023-05-06 16:44:29,993 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0001, loss: 0.3863 ||: 100%|##########| 157/157 [00:01<00:00, 94.10it/s]

2023-05-06 16:44:31,679 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:44:31,680 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 16:44:31,684 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1527.936  |       N/A
2023-05-06 16:44:31,686 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.386
2023-05-06 16:44:31,687 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  36784.168  |       N/A





2023-05-06 16:44:32,219 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.642161
2023-05-06 16:44:32,225 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:44:32,273 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 36784.16796875,
  "peak_gpu_0_memory_MB": 1527.9501953125,
  "training_duration": "0:02:56.638090",
  "epoch": 4,
  "training_accuracy": 0.999725,
  "training_loss": 0.0018006176986091304,
  "training_worker_0_memory_MB": 36784.16796875,
  "training_gpu_0_memory_MB": 1527.935546875,
  "validation_accuracy": 0.9254,
  "validation_loss": 0.3863234447088622,
  "best_validation_accuracy": 0.931,
  "best_validation_loss": 0.19306855062082126
}
2023-05-06 16:44:32,275 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/49/model.tar.gz


[32m[I 2023-05-06 16:44:34,942][0m Trial 49 finished with value: 0.931 and parameters: {'embedding_dim': 92, 'max_filter_size': 5, 'num_filters': 200}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:44:35,108 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:44:35,110 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:44:35,113 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:44:35,116 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:44:35,120 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:44:35,122 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:44:35,125 - INFO - allennlp.common.params - type = default
2023-05-06 16:44:35,130 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:44:35,132 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:44:35,133 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:44:35,135 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:44:35,137 - INFO - all

loading instances: 40000it [01:33, 429.27it/s]

2023-05-06 16:46:08,397 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:46:08,400 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:46:08,402 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:46:08,406 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:46:08,409 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:46:08,410 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:46:08,412 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:46:08,415 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:46:08,417 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:46:08,419 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:13, 384.24it/s]

2023-05-06 16:46:21,444 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:46:21,447 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:46:21,450 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:46:21,451 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:46:21,453 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:46:21,455 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:46:21,456 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:46:21,457 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:46:21,458 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:46:21,460 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:46:21,461 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:06, 6663.89it/s]


2023-05-06 16:46:27,682 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:46:27,686 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:46:27,690 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:46:27,694 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:46:27,696 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 187
2023-05-06 16:46:27,700 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:46:27,702 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:46:27,704 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:46:27,706 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:46:34,230 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:46:34,232 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:46:34,241 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9124, batch_loss: 0.0596, loss: 0.2361 ||: 100%|##########| 1250/1250 [00:47<00:00, 26.53it/s]

2023-05-06 16:47:21,199 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:47:21,225 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:47:21,228 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:47:21,237 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9228, batch_loss: 0.0237, loss: 0.1959 ||: 100%|##########| 157/157 [00:02<00:00, 77.91it/s]

2023-05-06 16:47:23,223 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:47:23,225 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.912  |     0.923
2023-05-06 16:47:23,227 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1528.003  |       N/A
2023-05-06 16:47:23,228 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.236  |     0.196
2023-05-06 16:47:23,231 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  37556.887  |       N/A





2023-05-06 16:47:24,282 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:50.200304
2023-05-06 16:47:24,285 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:07:22
2023-05-06 16:47:24,286 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:47:24,288 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 37G
2023-05-06 16:47:24,290 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 16:47:24,292 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9545, batch_loss: 0.2148, loss: 0.1213 ||: 100%|##########| 1250/1250 [00:46<00:00, 26.92it/s]

2023-05-06 16:48:10,729 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9338, batch_loss: 0.0113, loss: 0.2013 ||: 100%|##########| 157/157 [00:01<00:00, 87.86it/s]

2023-05-06 16:48:12,524 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:48:12,530 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.954  |     0.934
2023-05-06 16:48:12,532 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1549.590  |       N/A
2023-05-06 16:48:12,540 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.121  |     0.201
2023-05-06 16:48:12,541 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  37556.887  |       N/A





2023-05-06 16:48:13,502 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:49.216008
2023-05-06 16:48:13,506 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:06:33
2023-05-06 16:48:13,510 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:48:13,514 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 37G
2023-05-06 16:48:13,517 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 16:48:13,519 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9892, batch_loss: 0.0008, loss: 0.0344 ||: 100%|##########| 1250/1250 [00:46<00:00, 26.60it/s]

2023-05-06 16:49:00,518 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9278, batch_loss: 0.0145, loss: 0.2504 ||: 100%|##########| 157/157 [00:01<00:00, 87.69it/s]

2023-05-06 16:49:02,319 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:49:02,321 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.989  |     0.928
2023-05-06 16:49:02,322 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1549.715  |       N/A
2023-05-06 16:49:02,323 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.034  |     0.250
2023-05-06 16:49:02,325 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  37556.887  |       N/A





2023-05-06 16:49:03,275 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:49.764345
2023-05-06 16:49:03,276 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:45
2023-05-06 16:49:03,279 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:49:03,281 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 37G
2023-05-06 16:49:03,283 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.5G
2023-05-06 16:49:03,285 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9988, batch_loss: 0.0241, loss: 0.0054 ||: 100%|##########| 1250/1250 [00:46<00:00, 26.92it/s]

2023-05-06 16:49:49,722 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9232, batch_loss: 0.0132, loss: 0.3364 ||: 100%|##########| 157/157 [00:01<00:00, 80.02it/s]

2023-05-06 16:49:51,689 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:49:51,691 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.923
2023-05-06 16:49:51,693 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1549.531  |       N/A
2023-05-06 16:49:51,696 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.005  |     0.336
2023-05-06 16:49:51,698 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  37556.887  |       N/A





2023-05-06 16:49:52,693 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:49.413961
2023-05-06 16:49:52,695 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:49:52,792 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 0,
  "peak_worker_0_memory_MB": 37556.88671875,
  "peak_gpu_0_memory_MB": 1549.71484375,
  "training_duration": "0:03:17.606465",
  "epoch": 3,
  "training_accuracy": 0.998775,
  "training_loss": 0.00536914540121652,
  "training_worker_0_memory_MB": 37556.88671875,
  "training_gpu_0_memory_MB": 1549.53076171875,
  "validation_accuracy": 0.9232,
  "validation_loss": 0.33643172719889575,
  "best_validation_accuracy": 0.9228,
  "best_validation_loss": 0.19594951159065696
}
2023-05-06 16:49:52,794 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/50/model.tar.gz


[32m[I 2023-05-06 16:49:56,555][0m Trial 50 finished with value: 0.9228 and parameters: {'embedding_dim': 187, 'max_filter_size': 5, 'num_filters': 140}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:49:56,677 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:49:56,679 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:49:56,685 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:49:56,686 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:49:56,688 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:49:56,694 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:49:56,697 - INFO - allennlp.common.params - type = default
2023-05-06 16:49:56,701 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:49:56,704 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:49:56,705 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:49:56,708 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:49:56,710 - INFO - all

loading instances: 40000it [01:26, 463.12it/s]

2023-05-06 16:51:24,960 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:51:24,965 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:51:24,967 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:51:24,969 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:51:24,970 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:51:24,971 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:51:24,972 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:51:24,973 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:51:24,974 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:51:24,975 - INFO - allennlp.common.params - validation_data_loader.cuda_de




2023-05-06 16:51:24,978 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:07, 639.50it/s]

2023-05-06 16:51:32,807 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:51:32,810 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:51:32,812 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:51:32,816 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:51:32,817 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:51:32,818 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:51:32,819 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:51:32,820 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:51:32,821 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:51:32,822 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:51:32,823 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:04, 8026.67it/s]


2023-05-06 16:51:38,047 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:51:38,050 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:51:38,054 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:51:38,056 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:51:38,058 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 121
2023-05-06 16:51:38,062 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:51:38,063 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:51:38,067 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:51:38,068 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:51:45,113 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:51:45,115 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:51:45,121 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9103, batch_loss: 0.0609, loss: 0.2415 ||: 100%|##########| 1250/1250 [00:24<00:00, 51.03it/s]

2023-05-06 16:52:09,493 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:52:09,510 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:52:09,513 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:52:09,520 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9230, batch_loss: 0.0319, loss: 0.1940 ||: 100%|##########| 157/157 [00:01<00:00, 145.94it/s]

2023-05-06 16:52:10,579 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:52:10,581 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.923
2023-05-06 16:52:10,583 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1549.700  |       N/A
2023-05-06 16:52:10,586 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.194
2023-05-06 16:52:10,587 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38018.617  |       N/A





2023-05-06 16:52:11,117 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.128491
2023-05-06 16:52:11,129 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:50
2023-05-06 16:52:11,133 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:52:11,137 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 37G
2023-05-06 16:52:11,139 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 894M
2023-05-06 16:52:11,140 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9497, batch_loss: 0.1956, loss: 0.1314 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.34it/s]

2023-05-06 16:52:36,482 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9318, batch_loss: 0.0118, loss: 0.1872 ||: 100%|##########| 157/157 [00:01<00:00, 118.18it/s]

2023-05-06 16:52:37,824 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:52:37,825 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.932
2023-05-06 16:52:37,826 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   893.968  |       N/A
2023-05-06 16:52:37,827 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.131  |     0.187
2023-05-06 16:52:37,829 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38064.695  |       N/A





2023-05-06 16:52:38,558 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.424479
2023-05-06 16:52:38,560 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:31
2023-05-06 16:52:38,564 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:52:38,566 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 37G
2023-05-06 16:52:38,570 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 894M
2023-05-06 16:52:38,573 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9826, batch_loss: 0.0033, loss: 0.0512 ||: 100%|##########| 1250/1250 [00:24<00:00, 50.58it/s]

2023-05-06 16:53:03,293 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0072, loss: 0.2357 ||: 100%|##########| 157/157 [00:01<00:00, 151.58it/s]

2023-05-06 16:53:04,341 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:53:04,346 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.926
2023-05-06 16:53:04,350 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   894.093  |       N/A
2023-05-06 16:53:04,353 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.051  |     0.236





2023-05-06 16:53:04,356 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38064.695  |       N/A
2023-05-06 16:53:04,970 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.406404
2023-05-06 16:53:04,973 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:05
2023-05-06 16:53:04,974 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:53:04,976 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 37G
2023-05-06 16:53:04,978 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 894M
2023-05-06 16:53:04,980 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9969, batch_loss: 0.0352, loss: 0.0120 ||: 100%|##########| 1250/1250 [00:24<00:00, 50.97it/s]

2023-05-06 16:53:29,507 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9246, batch_loss: 0.0277, loss: 0.3024 ||: 100%|##########| 157/157 [00:01<00:00, 129.88it/s]

2023-05-06 16:53:30,722 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:53:30,724 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.925
2023-05-06 16:53:30,726 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   893.909  |       N/A
2023-05-06 16:53:30,728 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.012  |     0.302
2023-05-06 16:53:30,731 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38064.695  |       N/A





2023-05-06 16:53:31,434 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.459687
2023-05-06 16:53:31,438 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:38
2023-05-06 16:53:31,441 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:53:31,446 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 37G
2023-05-06 16:53:31,448 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 894M
2023-05-06 16:53:31,450 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0008, loss: 0.0019 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.67it/s]

2023-05-06 16:53:56,620 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0090, loss: 0.3532 ||: 100%|##########| 157/157 [00:01<00:00, 150.93it/s]

2023-05-06 16:53:57,669 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:53:57,674 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925





2023-05-06 16:53:57,682 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   894.078  |       N/A
2023-05-06 16:53:57,684 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.353
2023-05-06 16:53:57,686 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38064.695  |       N/A
2023-05-06 16:53:58,237 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.795413
2023-05-06 16:53:58,238 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:53:58,300 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 38064.6953125,
  "peak_gpu_0_memory_MB": 1549.7001953125,
  "training_duration": "0:02:12.680304",
  "epoch": 4,
  "training_accuracy": 0.9999,
  "training_loss": 0.0018603010884900868,
  "training_worker_0_memory_MB": 38064.6953125,
  "training_gpu_0_memory_MB": 894.078125,
  "validation_a

[32m[I 2023-05-06 16:54:00,787][0m Trial 51 finished with value: 0.9318 and parameters: {'embedding_dim': 121, 'max_filter_size': 5, 'num_filters': 69}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:54:00,898 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:54:00,900 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:54:00,902 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:54:00,904 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:54:00,906 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:54:00,907 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:54:00,909 - INFO - allennlp.common.params - type = default
2023-05-06 16:54:00,911 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:54:00,913 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:54:00,914 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:54:00,915 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:54:00,916 - INFO - all

loading instances: 40000it [01:28, 451.99it/s]

2023-05-06 16:55:31,259 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:55:31,263 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:55:31,265 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:55:31,267 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:55:31,269 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:55:31,271 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:55:31,272 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:55:31,273 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:55:31,275 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:55:31,276 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:13, 373.04it/s]

2023-05-06 16:55:44,686 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:55:44,689 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:55:44,691 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:55:44,693 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:55:44,694 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:55:44,698 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:55:44,700 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:55:44,701 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:55:44,703 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:55:44,705 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:55:44,707 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:04, 9404.98it/s] 


2023-05-06 16:55:49,126 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:55:49,128 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:55:49,129 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:55:49,136 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:55:49,140 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 109
2023-05-06 16:55:49,141 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:55:49,148 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:55:49,149 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:55:49,150 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:55:54,168 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:55:54,170 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:55:54,176 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9057, batch_loss: 0.0816, loss: 0.2466 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.20it/s]

2023-05-06 16:56:14,486 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 16:56:14,498 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:56:14,500 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:56:14,504 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9232, batch_loss: 0.0443, loss: 0.1939 ||: 100%|##########| 157/157 [00:00<00:00, 178.15it/s]

2023-05-06 16:56:15,375 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:56:15,376 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.906  |     0.923
2023-05-06 16:56:15,378 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   894.146  |       N/A
2023-05-06 16:56:15,382 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.194
2023-05-06 16:56:15,384 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38644.008  |       N/A





2023-05-06 16:56:16,007 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.955461
2023-05-06 16:56:16,009 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:11
2023-05-06 16:56:16,010 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 16:56:16,012 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 38G
2023-05-06 16:56:16,014 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 770M
2023-05-06 16:56:16,018 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9477, batch_loss: 0.1911, loss: 0.1356 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.25it/s]

2023-05-06 16:56:36,432 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9312, batch_loss: 0.0139, loss: 0.1831 ||: 100%|##########| 157/157 [00:01<00:00, 148.56it/s]

2023-05-06 16:56:37,495 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:56:37,497 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.948  |     0.931
2023-05-06 16:56:37,498 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   769.537  |       N/A
2023-05-06 16:56:37,500 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.136  |     0.183
2023-05-06 16:56:37,502 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38690.848  |       N/A





2023-05-06 16:56:38,152 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.141893
2023-05-06 16:56:38,154 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:53
2023-05-06 16:56:38,156 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 16:56:38,158 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 38G
2023-05-06 16:56:38,160 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 770M
2023-05-06 16:56:38,162 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9810, batch_loss: 0.0019, loss: 0.0547 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.26it/s]

2023-05-06 16:56:58,571 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0048, loss: 0.2530 ||: 100%|##########| 157/157 [00:01<00:00, 145.62it/s]

2023-05-06 16:56:59,657 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:56:59,659 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.925
2023-05-06 16:56:59,660 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   769.661  |       N/A
2023-05-06 16:56:59,663 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.055  |     0.253
2023-05-06 16:56:59,664 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38690.848  |       N/A





2023-05-06 16:57:00,326 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.170119
2023-05-06 16:57:00,330 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:33
2023-05-06 16:57:00,333 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 16:57:00,336 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 38G
2023-05-06 16:57:00,340 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 769M
2023-05-06 16:57:00,342 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9967, batch_loss: 0.0178, loss: 0.0131 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.91it/s]

2023-05-06 16:57:20,540 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9232, batch_loss: 0.0023, loss: 0.2922 ||: 100%|##########| 157/157 [00:01<00:00, 140.38it/s]

2023-05-06 16:57:21,668 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:57:21,670 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.923
2023-05-06 16:57:21,673 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   769.477  |       N/A
2023-05-06 16:57:21,675 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.013  |     0.292
2023-05-06 16:57:21,677 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38690.848  |       N/A





2023-05-06 16:57:22,329 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.995721
2023-05-06 16:57:22,334 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:11
2023-05-06 16:57:22,335 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 16:57:22,337 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 38G
2023-05-06 16:57:22,339 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 770M
2023-05-06 16:57:22,342 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0006, loss: 0.0023 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.13it/s]

2023-05-06 16:57:42,797 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9208, batch_loss: 0.0013, loss: 0.3446 ||: 100%|##########| 157/157 [00:00<00:00, 189.75it/s]

2023-05-06 16:57:43,632 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 16:57:43,639 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.921
2023-05-06 16:57:43,641 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   769.646  |       N/A
2023-05-06 16:57:43,643 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.345





2023-05-06 16:57:43,645 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  38690.848  |       N/A
2023-05-06 16:57:44,131 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.795545
2023-05-06 16:57:44,133 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 16:57:44,202 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 38690.84765625,
  "peak_gpu_0_memory_MB": 894.14599609375,
  "training_duration": "0:01:49.580830",
  "epoch": 4,
  "training_accuracy": 0.9998,
  "training_loss": 0.002321878662778181,
  "training_worker_0_memory_MB": 38690.84765625,
  "training_gpu_0_memory_MB": 769.646484375,
  "validation_accuracy": 0.9208,
  "validation_loss": 0.34461929382397466,
  "best_validation_accuracy": 0.9312,
  "best_validation_loss": 0.18311749940892313
}
2023-05-06 16:57:44,204 - INFO - allennlp.models.archival - archiving weights and vocabulary t

[32m[I 2023-05-06 16:57:46,460][0m Trial 52 finished with value: 0.9312 and parameters: {'embedding_dim': 109, 'max_filter_size': 5, 'num_filters': 55}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 16:57:46,576 - INFO - allennlp.common.params - evaluation = None
2023-05-06 16:57:46,577 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 16:57:46,584 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 16:57:46,588 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 16:57:46,591 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 16:57:46,594 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 16:57:46,596 - INFO - allennlp.common.params - type = default
2023-05-06 16:57:46,603 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 16:57:46,605 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 16:57:46,607 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 16:57:46,610 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 16:57:46,613 - INFO - all

loading instances: 40000it [01:29, 445.24it/s]

2023-05-06 16:59:16,499 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 16:59:16,505 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 16:59:16,507 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 16:59:16,508 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 16:59:16,509 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 16:59:16,510 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 16:59:16,515 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 16:59:16,515 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 16:59:16,516 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 16:59:16,517 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:08, 620.61it/s]

2023-05-06 16:59:24,584 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 16:59:24,586 - INFO - allennlp.common.params - type = from_instances
2023-05-06 16:59:24,588 - INFO - allennlp.common.params - min_count = None
2023-05-06 16:59:24,590 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 16:59:24,592 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 16:59:24,594 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 16:59:24,596 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 16:59:24,597 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 16:59:24,598 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 16:59:24,600 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 16:59:24,601 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 16


building vocab: 40000it [00:06, 6020.17it/s]


2023-05-06 16:59:31,512 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 16:59:31,514 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 16:59:31,520 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 16:59:31,522 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 16:59:31,523 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 135
2023-05-06 16:59:31,525 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 16:59:31,527 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 16:59:31,529 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 16:59:31,531 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 16:59:41,209 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 16:59:41,211 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 16:59:41,219 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9106, batch_loss: 0.0565, loss: 0.2411 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.41it/s]

2023-05-06 17:00:10,554 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:00:10,572 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:00:10,574 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:00:10,580 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9232, batch_loss: 0.0285, loss: 0.1929 ||: 100%|##########| 157/157 [00:01<00:00, 107.03it/s]

2023-05-06 17:00:12,029 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:00:12,031 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.923
2023-05-06 17:00:12,035 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   769.714  |       N/A
2023-05-06 17:00:12,037 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.193
2023-05-06 17:00:12,039 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39281.668  |       N/A





2023-05-06 17:00:12,730 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.659944
2023-05-06 17:00:12,735 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:38
2023-05-06 17:00:12,739 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:00:12,742 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 38G
2023-05-06 17:00:12,747 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:00:12,750 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9505, batch_loss: 0.1973, loss: 0.1291 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.91it/s]

2023-05-06 17:00:41,890 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9332, batch_loss: 0.0063, loss: 0.1788 ||: 100%|##########| 157/157 [00:01<00:00, 125.31it/s]

2023-05-06 17:00:43,153 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:00:43,155 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.933
2023-05-06 17:00:43,156 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1047.640  |       N/A
2023-05-06 17:00:43,158 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.129  |     0.179
2023-05-06 17:00:43,159 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39332.453  |       N/A





2023-05-06 17:00:43,913 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.173746
2023-05-06 17:00:43,915 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:08
2023-05-06 17:00:43,921 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:00:43,922 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 38G
2023-05-06 17:00:43,924 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:00:43,929 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9839, batch_loss: 0.0026, loss: 0.0470 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.58it/s]

2023-05-06 17:01:13,291 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9284, batch_loss: 0.0014, loss: 0.2335 ||: 100%|##########| 157/157 [00:01<00:00, 135.47it/s]

2023-05-06 17:01:14,457 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:01:14,459 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.928
2023-05-06 17:01:14,462 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1047.764  |       N/A
2023-05-06 17:01:14,464 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.047  |     0.233
2023-05-06 17:01:14,465 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39332.453  |       N/A





2023-05-06 17:01:15,145 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.224651
2023-05-06 17:01:15,147 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:37
2023-05-06 17:01:15,149 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:01:15,154 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 38G
2023-05-06 17:01:15,159 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:01:15,161 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9975, batch_loss: 0.0204, loss: 0.0100 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.99it/s]

2023-05-06 17:01:44,247 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9284, batch_loss: 0.0002, loss: 0.2977 ||: 100%|##########| 157/157 [00:01<00:00, 116.87it/s]

2023-05-06 17:01:45,601 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:01:45,602 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.928
2023-05-06 17:01:45,603 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1047.580  |       N/A
2023-05-06 17:01:45,605 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.298
2023-05-06 17:01:45,606 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39332.453  |       N/A





2023-05-06 17:01:46,225 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.075417
2023-05-06 17:01:46,226 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:06
2023-05-06 17:01:46,230 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:01:46,231 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 38G
2023-05-06 17:01:46,233 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:01:46,235 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0004, loss: 0.0016 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.94it/s]

2023-05-06 17:02:15,353 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9244, batch_loss: 0.0018, loss: 0.3343 ||: 100%|##########| 157/157 [00:01<00:00, 123.80it/s]

2023-05-06 17:02:16,629 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 17:02:16,635 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 17:02:16,636 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1047.750  |       N/A
2023-05-06 17:02:16,638 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.334
2023-05-06 17:02:16,639 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39332.453  |       N/A
2023-05-06 17:02:17,441 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.211679
2023-05-06 17:02:17,444 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:02:17,517 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 39332.453125,
  "peak_gpu_0_memory_MB": 1047.76416015625,
  "training_duration": "0:02:35.558982",
  "epoch": 4,
  "training_accuracy": 0.9998,
  "training_loss": 0.0015

[32m[I 2023-05-06 17:02:21,308][0m Trial 53 finished with value: 0.9332 and parameters: {'embedding_dim': 135, 'max_filter_size': 5, 'num_filters': 87}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:02:21,476 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:02:21,480 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:02:21,483 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:02:21,484 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:02:21,486 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:02:21,488 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:02:21,490 - INFO - allennlp.common.params - type = default
2023-05-06 17:02:21,492 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:02:21,495 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:02:21,497 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:02:21,500 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:02:21,501 - INFO - all

loading instances: 40000it [01:27, 456.99it/s]

2023-05-06 17:03:49,091 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:03:49,092 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:03:49,094 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:03:49,098 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:03:49,099 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:03:49,103 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:03:49,104 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:03:49,105 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:03:49,106 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:03:49,107 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:13, 364.50it/s]

2023-05-06 17:04:02,832 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:04:02,835 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:04:02,837 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:04:02,839 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:04:02,841 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:04:02,843 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:04:02,845 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:04:02,846 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:04:02,847 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:04:02,848 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:04:02,849 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:04, 8895.19it/s]


2023-05-06 17:04:07,565 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:04:07,568 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:04:07,569 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:04:07,571 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:04:07,573 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 139
2023-05-06 17:04:07,575 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:04:07,577 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:04:07,577 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:04:07,579 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:04:14,860 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:04:14,862 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:04:14,870 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9072, batch_loss: 0.0517, loss: 0.2413 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.61it/s]

2023-05-06 17:04:44,081 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:04:44,097 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:04:44,099 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:04:44,104 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9202, batch_loss: 0.0492, loss: 0.2024 ||: 100%|##########| 157/157 [00:01<00:00, 122.53it/s]

2023-05-06 17:04:45,373 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 17:04:45,375 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.920
2023-05-06 17:04:45,377 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1047.817  |       N/A
2023-05-06 17:04:45,379 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.202
2023-05-06 17:04:45,380 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39919.371  |       N/A
2023-05-06 17:04:46,184 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.453354
2023-05-06 17:04:46,187 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:35
2023-05-06 17:04:46,191 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:04:46,192 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 39G
2023-05-06 17:04:46,195 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9511, batch_loss: 0.1654, loss: 0.1295 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.21it/s]

2023-05-06 17:05:15,822 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9336, batch_loss: 0.0093, loss: 0.1819 ||: 100%|##########| 157/157 [00:01<00:00, 135.68it/s]

2023-05-06 17:05:16,988 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:05:16,990 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.934
2023-05-06 17:05:16,991 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1060.693  |       N/A
2023-05-06 17:05:16,995 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.129  |     0.182
2023-05-06 17:05:16,997 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39971.574  |       N/A





2023-05-06 17:05:17,662 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.471573
2023-05-06 17:05:17,663 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:09
2023-05-06 17:05:17,666 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:05:17,672 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 39G
2023-05-06 17:05:17,679 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:05:17,681 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9840, batch_loss: 0.0011, loss: 0.0468 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.66it/s]

2023-05-06 17:05:46,994 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9296, batch_loss: 0.0008, loss: 0.2481 ||: 100%|##########| 157/157 [00:01<00:00, 120.30it/s]

2023-05-06 17:05:48,312 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:05:48,315 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.930
2023-05-06 17:05:48,317 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1060.817  |       N/A
2023-05-06 17:05:48,319 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.047  |     0.248
2023-05-06 17:05:48,321 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39971.574  |       N/A





2023-05-06 17:05:48,973 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.307841
2023-05-06 17:05:48,975 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:38
2023-05-06 17:05:48,977 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:05:48,981 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 39G
2023-05-06 17:05:48,984 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:05:48,987 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9976, batch_loss: 0.0719, loss: 0.0094 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.96it/s]

2023-05-06 17:06:18,088 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9246, batch_loss: 0.0001, loss: 0.2945 ||: 100%|##########| 157/157 [00:01<00:00, 122.70it/s]

2023-05-06 17:06:19,376 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:06:19,379 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.925
2023-05-06 17:06:19,381 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1060.633  |       N/A
2023-05-06 17:06:19,383 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.009  |     0.294
2023-05-06 17:06:19,387 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39971.574  |       N/A





2023-05-06 17:06:20,182 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.205039
2023-05-06 17:06:20,185 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:06
2023-05-06 17:06:20,187 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:06:20,189 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 39G
2023-05-06 17:06:20,191 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:06:20,193 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0003, loss: 0.0015 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.31it/s]

2023-05-06 17:06:49,740 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9272, batch_loss: 0.0000, loss: 0.3385 ||: 100%|##########| 157/157 [00:01<00:00, 134.60it/s]

2023-05-06 17:06:50,914 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:06:50,915 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.927
2023-05-06 17:06:50,919 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1060.803  |       N/A





2023-05-06 17:06:50,921 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.339
2023-05-06 17:06:50,922 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  39971.574  |       N/A
2023-05-06 17:06:51,598 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.410890
2023-05-06 17:06:51,600 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:06:51,674 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 39971.57421875,
  "peak_gpu_0_memory_MB": 1060.8173828125,
  "training_duration": "0:02:36.183078",
  "epoch": 4,
  "training_accuracy": 0.999825,
  "training_loss": 0.001486416561523947,
  "training_worker_0_memory_MB": 39971.57421875,
  "training_gpu_0_memory_MB": 1060.802734375,
  "validation_accuracy": 0.9272,
  "validation_loss": 0.33853235110835816,
  "best_validation_accuracy": 0.9336,
  "best_validatio

[32m[I 2023-05-06 17:06:54,785][0m Trial 54 finished with value: 0.9336 and parameters: {'embedding_dim': 139, 'max_filter_size': 5, 'num_filters': 86}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:06:54,943 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:06:54,945 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:06:54,948 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:06:54,949 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:06:54,951 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:06:54,952 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:06:54,953 - INFO - allennlp.common.params - type = default
2023-05-06 17:06:54,955 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:06:54,956 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:06:54,957 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:06:54,958 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:06:54,959 - INFO - all

loading instances: 40000it [01:28, 449.66it/s]

2023-05-06 17:08:23,958 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:08:23,960 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:08:23,962 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:08:23,964 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:08:23,966 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:08:23,968 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:08:23,969 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:08:23,970 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:08:23,971 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:08:23,972 - INFO - allennlp.common.params - validation_data_loader.cuda_de




2023-05-06 17:08:23,973 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 17:08:23,975 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:13, 358.71it/s]

2023-05-06 17:08:37,919 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:08:37,921 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:08:37,924 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:08:37,926 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:08:37,928 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:08:37,932 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:08:37,938 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:08:37,940 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:08:37,941 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:08:37,943 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:08:37,944 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:04, 9669.56it/s]


2023-05-06 17:08:42,319 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:08:42,325 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:08:42,327 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:08:42,329 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:08:42,332 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 179
2023-05-06 17:08:42,336 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:08:42,337 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:08:42,340 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:08:42,341 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:08:50,988 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:08:50,991 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:08:51,002 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9099, batch_loss: 0.0734, loss: 0.2385 ||: 100%|##########| 1250/1250 [00:34<00:00, 35.77it/s]

2023-05-06 17:09:25,817 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:09:25,835 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:09:25,837 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:09:25,844 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9230, batch_loss: 0.0183, loss: 0.1955 ||: 100%|##########| 157/157 [00:01<00:00, 98.95it/s] 

2023-05-06 17:09:27,411 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:09:27,413 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.923
2023-05-06 17:09:27,415 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1060.871  |       N/A
2023-05-06 17:09:27,417 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.239  |     0.196
2023-05-06 17:09:27,418 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  40574.539  |       N/A





2023-05-06 17:09:28,442 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:37.583133
2023-05-06 17:09:28,445 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:28
2023-05-06 17:09:28,449 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:09:28,450 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 40G
2023-05-06 17:09:28,453 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 17:09:28,455 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9510, batch_loss: 0.1728, loss: 0.1263 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.17it/s]

2023-05-06 17:10:03,022 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9328, batch_loss: 0.0114, loss: 0.1850 ||: 100%|##########| 157/157 [00:01<00:00, 104.37it/s]

2023-05-06 17:10:04,536 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:10:04,537 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.933
2023-05-06 17:10:04,538 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1220.908  |       N/A
2023-05-06 17:10:04,539 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.126  |     0.185
2023-05-06 17:10:04,542 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  40642.328  |       N/A





2023-05-06 17:10:05,510 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:37.060377
2023-05-06 17:10:05,512 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:54
2023-05-06 17:10:05,515 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:10:05,517 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 40G
2023-05-06 17:10:05,519 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 17:10:05,521 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9852, batch_loss: 0.0006, loss: 0.0437 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.19it/s]

2023-05-06 17:10:40,068 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9308, batch_loss: 0.0015, loss: 0.2391 ||: 100%|##########| 157/157 [00:01<00:00, 102.04it/s]

2023-05-06 17:10:41,616 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:10:41,617 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.931
2023-05-06 17:10:41,620 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1220.908  |       N/A
2023-05-06 17:10:41,621 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.044  |     0.239
2023-05-06 17:10:41,623 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  40642.328  |       N/A





2023-05-06 17:10:42,700 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:37.184454
2023-05-06 17:10:42,708 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:18
2023-05-06 17:10:42,710 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:10:42,712 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 40G
2023-05-06 17:10:42,714 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 17:10:42,715 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9982, batch_loss: 0.0126, loss: 0.0081 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.10it/s]

2023-05-06 17:11:17,355 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0009, loss: 0.3026 ||: 100%|##########| 157/157 [00:01<00:00, 101.17it/s]

2023-05-06 17:11:18,916 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:11:18,918 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.926
2023-05-06 17:11:18,919 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1220.908  |       N/A
2023-05-06 17:11:18,921 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.008  |     0.303
2023-05-06 17:11:18,922 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  40642.328  |       N/A





2023-05-06 17:11:19,964 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:37.253781
2023-05-06 17:11:19,966 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:42
2023-05-06 17:11:19,968 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:11:19,970 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 40G
2023-05-06 17:11:19,972 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 17:11:19,975 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 1.0000, batch_loss: 0.0007, loss: 0.0012 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.03it/s]

2023-05-06 17:11:54,676 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9286, batch_loss: 0.0001, loss: 0.3293 ||: 100%|##########| 157/157 [00:01<00:00, 109.15it/s]

2023-05-06 17:11:56,123 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:11:56,127 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.929
2023-05-06 17:11:56,128 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1220.908  |       N/A
2023-05-06 17:11:56,130 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.329
2023-05-06 17:11:56,131 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  40642.328  |       N/A





2023-05-06 17:11:57,263 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:37.295158
2023-05-06 17:11:57,268 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:11:57,364 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 40642.328125,
  "peak_gpu_0_memory_MB": 1220.908203125,
  "training_duration": "0:03:05.263869",
  "epoch": 4,
  "training_accuracy": 0.999975,
  "training_loss": 0.001220729708291765,
  "training_worker_0_memory_MB": 40642.328125,
  "training_gpu_0_memory_MB": 1220.908203125,
  "validation_accuracy": 0.9286,
  "validation_loss": 0.32925040054767707,
  "best_validation_accuracy": 0.9328,
  "best_validation_loss": 0.1850093599397001
}
2023-05-06 17:11:57,367 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/55/model.tar.gz


[32m[I 2023-05-06 17:12:02,253][0m Trial 55 finished with value: 0.9328 and parameters: {'embedding_dim': 179, 'max_filter_size': 5, 'num_filters': 77}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:12:02,362 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:12:02,364 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:12:02,371 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:12:02,374 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:12:02,378 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:12:02,381 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:12:02,383 - INFO - allennlp.common.params - type = default
2023-05-06 17:12:02,386 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:12:02,388 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:12:02,391 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:12:02,392 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:12:02,396 - INFO - all

loading instances: 40000it [01:38, 404.43it/s]

2023-05-06 17:13:41,352 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:13:41,354 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:13:41,356 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:13:41,361 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:13:41,362 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:13:41,364 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:13:41,365 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:13:41,366 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:13:41,367 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:13:41,368 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 505.00it/s]

2023-05-06 17:13:51,276 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:13:51,278 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:13:51,280 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:13:51,282 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:13:51,284 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:13:51,285 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:13:51,295 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:13:51,296 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:13:51,297 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:13:51,299 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:13:51,301 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:06, 5762.73it/s]


2023-05-06 17:13:58,419 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:13:58,421 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:13:58,431 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:13:58,433 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:13:58,435 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 192
2023-05-06 17:13:58,437 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:13:58,439 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:13:58,441 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:13:58,442 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:14:03,834 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:14:03,838 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:14:03,844 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9050, batch_loss: 0.1211, loss: 0.2569 ||: 100%|##########| 1250/1250 [00:17<00:00, 69.50it/s]

2023-05-06 17:14:21,713 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:14:21,725 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:14:21,726 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:14:21,732 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9188, batch_loss: 0.0298, loss: 0.2008 ||: 100%|##########| 157/157 [00:00<00:00, 222.51it/s]

2023-05-06 17:14:22,427 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:14:22,432 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.905  |     0.919





2023-05-06 17:14:22,438 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1220.908  |       N/A
2023-05-06 17:14:22,441 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.257  |     0.201
2023-05-06 17:14:22,442 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41169.438  |       N/A
2023-05-06 17:14:23,287 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.575660
2023-05-06 17:14:23,289 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:48
2023-05-06 17:14:23,292 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:14:23,294 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 40G
2023-05-06 17:14:23,296 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:14:23,298 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9398, batch_loss: 0.3517, loss: 0.1568 ||: 100%|##########| 1250/1250 [00:18<00:00, 69.07it/s]

2023-05-06 17:14:41,412 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9266, batch_loss: 0.0319, loss: 0.1890 ||: 100%|##########| 157/157 [00:00<00:00, 234.48it/s]

2023-05-06 17:14:42,089 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:14:42,092 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.940  |     0.927
2023-05-06 17:14:42,099 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1059.568  |       N/A
2023-05-06 17:14:42,103 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.157  |     0.189
2023-05-06 17:14:42,104 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41241.492  |       N/A





2023-05-06 17:14:43,042 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.750148
2023-05-06 17:14:43,044 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:33
2023-05-06 17:14:43,046 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:14:43,048 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 40G
2023-05-06 17:14:43,051 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:14:43,053 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9685, batch_loss: 0.0084, loss: 0.0855 ||: 100%|##########| 1250/1250 [00:18<00:00, 68.99it/s]

2023-05-06 17:15:01,182 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9256, batch_loss: 0.0207, loss: 0.2263 ||: 100%|##########| 157/157 [00:00<00:00, 234.89it/s]

2023-05-06 17:15:01,860 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:15:01,862 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.968  |     0.926
2023-05-06 17:15:01,863 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1059.692  |       N/A
2023-05-06 17:15:01,864 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.085  |     0.226
2023-05-06 17:15:01,865 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41241.492  |       N/A





2023-05-06 17:15:02,781 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.735255
2023-05-06 17:15:02,783 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:15
2023-05-06 17:15:02,790 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:15:02,793 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 40G
2023-05-06 17:15:02,796 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:15:02,800 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9892, batch_loss: 0.1657, loss: 0.0343 ||: 100%|##########| 1250/1250 [00:17<00:00, 69.73it/s]

2023-05-06 17:15:20,732 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9224, batch_loss: 0.0046, loss: 0.2934 ||: 100%|##########| 157/157 [00:00<00:00, 231.90it/s]

2023-05-06 17:15:21,421 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:15:21,423 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.989  |     0.922
2023-05-06 17:15:21,427 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1059.508  |       N/A
2023-05-06 17:15:21,431 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.034  |     0.293
2023-05-06 17:15:21,433 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41241.492  |       N/A





2023-05-06 17:15:22,332 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.541958
2023-05-06 17:15:22,334 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:56
2023-05-06 17:15:22,336 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:15:22,338 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 40G
2023-05-06 17:15:22,341 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:15:22,342 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9978, batch_loss: 0.0073, loss: 0.0099 ||: 100%|##########| 1250/1250 [00:18<00:00, 68.91it/s]

2023-05-06 17:15:40,488 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9198, batch_loss: 0.0143, loss: 0.3281 ||: 100%|##########| 157/157 [00:00<00:00, 233.29it/s]

2023-05-06 17:15:41,172 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:15:41,173 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.920
2023-05-06 17:15:41,178 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1059.678  |       N/A
2023-05-06 17:15:41,180 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.328
2023-05-06 17:15:41,181 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41241.492  |       N/A





2023-05-06 17:15:42,098 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.761446
2023-05-06 17:15:42,100 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:15:42,192 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 41241.4921875,
  "peak_gpu_0_memory_MB": 1220.908203125,
  "training_duration": "0:01:37.460627",
  "epoch": 4,
  "training_accuracy": 0.997825,
  "training_loss": 0.009887121772230603,
  "training_worker_0_memory_MB": 41241.4921875,
  "training_gpu_0_memory_MB": 1059.677734375,
  "validation_accuracy": 0.9198,
  "validation_loss": 0.3280672255909414,
  "best_validation_accuracy": 0.9266,
  "best_validation_loss": 0.188978660187334
}
2023-05-06 17:15:42,194 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/56/model.tar.gz


[32m[I 2023-05-06 17:15:46,749][0m Trial 56 finished with value: 0.9266 and parameters: {'embedding_dim': 192, 'max_filter_size': 2, 'num_filters': 128}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:15:46,921 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:15:46,924 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:15:46,928 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:15:46,930 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:15:46,933 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:15:46,937 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:15:46,940 - INFO - allennlp.common.params - type = default
2023-05-06 17:15:46,943 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:15:46,945 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:15:46,946 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:15:46,948 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:15:46,951 - INFO - all

loading instances: 40000it [01:39, 402.72it/s]

2023-05-06 17:17:26,341 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:17:26,344 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:17:26,346 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:17:26,348 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:17:26,349 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:17:26,350 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:17:26,352 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:17:26,353 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:17:26,354 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:17:26,355 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:12, 385.69it/s]

2023-05-06 17:17:39,326 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:17:39,332 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:17:39,334 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:17:39,336 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:17:39,337 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:17:39,343 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:17:39,344 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:17:39,345 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:17:39,346 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:17:39,347 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:17:39,348 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:06, 6203.22it/s]


2023-05-06 17:17:46,053 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:17:46,056 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:17:46,062 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:17:46,064 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:17:46,066 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 145
2023-05-06 17:17:46,068 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:17:46,072 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:17:46,073 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:17:46,074 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:17:52,260 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:17:52,263 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:17:52,270 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9050, batch_loss: 0.0724, loss: 0.2516 ||: 100%|##########| 1250/1250 [00:19<00:00, 64.66it/s]

2023-05-06 17:18:11,479 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:18:11,490 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:18:11,491 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:18:11,496 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9188, batch_loss: 0.0575, loss: 0.2034 ||: 100%|##########| 157/157 [00:00<00:00, 194.57it/s]

2023-05-06 17:18:12,292 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:18:12,297 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.905  |     0.919
2023-05-06 17:18:12,301 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1059.746  |       N/A
2023-05-06 17:18:12,304 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.252  |     0.203
2023-05-06 17:18:12,306 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41775.816  |       N/A





2023-05-06 17:18:12,918 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.779279
2023-05-06 17:18:12,919 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:01
2023-05-06 17:18:12,925 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:18:12,936 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 41G
2023-05-06 17:18:12,937 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 948M
2023-05-06 17:18:12,938 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9436, batch_loss: 0.2005, loss: 0.1477 ||: 100%|##########| 1250/1250 [00:19<00:00, 64.23it/s]

2023-05-06 17:18:32,406 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0182, loss: 0.1890 ||: 100%|##########| 157/157 [00:00<00:00, 201.98it/s]

2023-05-06 17:18:33,190 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:18:33,191 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.944  |     0.926
2023-05-06 17:18:33,198 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   948.332  |       N/A
2023-05-06 17:18:33,199 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.148  |     0.189
2023-05-06 17:18:33,201 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41830.230  |       N/A





2023-05-06 17:18:33,858 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.933289
2023-05-06 17:18:33,865 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:44
2023-05-06 17:18:33,867 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:18:33,870 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 41G
2023-05-06 17:18:33,872 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 948M
2023-05-06 17:18:33,874 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9768, batch_loss: 0.0124, loss: 0.0681 ||: 100%|##########| 1250/1250 [00:19<00:00, 65.00it/s]

2023-05-06 17:18:53,108 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9248, batch_loss: 0.0029, loss: 0.2415 ||: 100%|##########| 157/157 [00:00<00:00, 198.75it/s]

2023-05-06 17:18:53,909 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:18:53,913 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.977  |     0.925
2023-05-06 17:18:53,914 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   948.456  |       N/A
2023-05-06 17:18:53,916 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.068  |     0.242
2023-05-06 17:18:53,918 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41830.230  |       N/A





2023-05-06 17:18:54,606 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.738831
2023-05-06 17:18:54,608 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:24
2023-05-06 17:18:54,615 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:18:54,619 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 41G
2023-05-06 17:18:54,622 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 948M
2023-05-06 17:18:54,625 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9949, batch_loss: 0.0262, loss: 0.0196 ||: 100%|##########| 1250/1250 [00:19<00:00, 65.72it/s]

2023-05-06 17:19:13,652 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9208, batch_loss: 0.0003, loss: 0.2789 ||: 100%|##########| 157/157 [00:00<00:00, 203.32it/s]

2023-05-06 17:19:14,436 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:19:14,440 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.921
2023-05-06 17:19:14,441 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   948.272  |       N/A
2023-05-06 17:19:14,443 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.020  |     0.279
2023-05-06 17:19:14,444 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41830.230  |       N/A





2023-05-06 17:19:15,112 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.496862
2023-05-06 17:19:15,114 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:03
2023-05-06 17:19:15,117 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:19:15,119 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 41G
2023-05-06 17:19:15,120 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 948M
2023-05-06 17:19:15,122 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9993, batch_loss: 0.0065, loss: 0.0039 ||: 100%|##########| 1250/1250 [00:18<00:00, 65.90it/s]

2023-05-06 17:19:34,096 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9212, batch_loss: 0.0003, loss: 0.3237 ||: 100%|##########| 157/157 [00:00<00:00, 205.50it/s]

2023-05-06 17:19:34,873 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:19:34,876 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.921
2023-05-06 17:19:34,878 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   948.441  |       N/A
2023-05-06 17:19:34,879 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.004  |     0.324
2023-05-06 17:19:34,880 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  41830.230  |       N/A





2023-05-06 17:19:35,579 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.462220
2023-05-06 17:19:35,588 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:19:35,673 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 41830.23046875,
  "peak_gpu_0_memory_MB": 1059.74560546875,
  "training_duration": "0:01:42.734015",
  "epoch": 4,
  "training_accuracy": 0.9993,
  "training_loss": 0.003928942344186362,
  "training_worker_0_memory_MB": 41830.23046875,
  "training_gpu_0_memory_MB": 948.44140625,
  "validation_accuracy": 0.9212,
  "validation_loss": 0.32365399297377967,
  "best_validation_accuracy": 0.926,
  "best_validation_loss": 0.1889734733137925
}
2023-05-06 17:19:35,678 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/57/model.tar.gz


[32m[I 2023-05-06 17:19:39,658][0m Trial 57 finished with value: 0.926 and parameters: {'embedding_dim': 145, 'max_filter_size': 3, 'num_filters': 97}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:19:39,822 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:19:39,825 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:19:39,828 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:19:39,830 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:19:39,834 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:19:39,836 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:19:39,838 - INFO - allennlp.common.params - type = default
2023-05-06 17:19:39,840 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:19:39,843 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:19:39,844 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:19:39,846 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:19:39,848 - INFO - all

loading instances: 40000it [01:32, 434.18it/s]

2023-05-06 17:21:12,042 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:21:12,047 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:21:12,049 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:21:12,052 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:21:12,054 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:21:12,056 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:21:12,058 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:21:12,059 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:21:12,060 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:21:12,060 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:11, 453.92it/s]

2023-05-06 17:21:23,083 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:21:23,089 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:21:23,095 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:21:23,096 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:21:23,098 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:21:23,100 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:21:23,103 - INFO - allennlp.common.params - only_include_pretrained_words = False





2023-05-06 17:21:23,105 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:21:23,107 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:21:23,118 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:21:23,119 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17:21:23,122 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:04, 9297.25it/s]


2023-05-06 17:21:27,617 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:21:27,620 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:21:27,623 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:21:27,625 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:21:27,628 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 157
2023-05-06 17:21:27,630 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:21:27,632 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:21:27,633 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:21:27,634 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:21:35,630 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:21:35,632 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:21:35,639 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9071, batch_loss: 0.0683, loss: 0.2419 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.65it/s]

2023-05-06 17:22:01,714 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:22:01,728 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:22:01,729 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:22:01,734 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9218, batch_loss: 0.0298, loss: 0.1966 ||: 100%|##########| 157/157 [00:00<00:00, 163.67it/s]

2023-05-06 17:22:02,681 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:22:02,685 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.922
2023-05-06 17:22:02,686 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   948.509  |       N/A
2023-05-06 17:22:02,687 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.242  |     0.197
2023-05-06 17:22:02,688 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  42422.098  |       N/A





2023-05-06 17:22:03,458 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.989632
2023-05-06 17:22:03,462 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:04
2023-05-06 17:22:03,466 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:22:03,471 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 41G
2023-05-06 17:22:03,476 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 996M
2023-05-06 17:22:03,479 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9503, batch_loss: 0.1575, loss: 0.1297 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.85it/s]

2023-05-06 17:22:29,607 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9312, batch_loss: 0.0147, loss: 0.1797 ||: 100%|##########| 157/157 [00:01<00:00, 139.60it/s]

2023-05-06 17:22:30,739 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:22:30,740 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.931
2023-05-06 17:22:30,742 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   996.343  |       N/A
2023-05-06 17:22:30,744 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.180
2023-05-06 17:22:30,747 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  42481.008  |       N/A





2023-05-06 17:22:31,621 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.154976
2023-05-06 17:22:31,624 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:41
2023-05-06 17:22:31,628 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:22:31,632 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 41G
2023-05-06 17:22:31,634 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 996M
2023-05-06 17:22:31,639 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9828, batch_loss: 0.0039, loss: 0.0501 ||: 100%|##########| 1250/1250 [00:25<00:00, 48.36it/s]


2023-05-06 17:22:57,492 - INFO - allennlp.training.gradient_descent_trainer - Validating


accuracy: 0.9246, batch_loss: 0.0028, loss: 0.2354 ||: 100%|##########| 157/157 [00:00<00:00, 168.35it/s]

2023-05-06 17:22:58,437 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:22:58,438 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.925
2023-05-06 17:22:58,444 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   996.343  |       N/A
2023-05-06 17:22:58,447 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.050  |     0.235
2023-05-06 17:22:58,449 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  42481.008  |       N/A





2023-05-06 17:22:59,194 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.565797
2023-05-06 17:22:59,195 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:13
2023-05-06 17:22:59,202 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:22:59,204 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 41G
2023-05-06 17:22:59,206 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 996M
2023-05-06 17:22:59,208 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9968, batch_loss: 0.0756, loss: 0.0119 ||: 100%|##########| 1250/1250 [00:25<00:00, 48.72it/s]

2023-05-06 17:23:24,871 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9198, batch_loss: 0.0020, loss: 0.2770 ||: 100%|##########| 157/157 [00:01<00:00, 146.74it/s]

2023-05-06 17:23:25,947 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:23:25,949 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.920
2023-05-06 17:23:25,950 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   996.343  |       N/A
2023-05-06 17:23:25,952 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.012  |     0.277
2023-05-06 17:23:25,953 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  42481.008  |       N/A





2023-05-06 17:23:26,896 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.694116
2023-05-06 17:23:26,898 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:45
2023-05-06 17:23:26,901 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:23:26,903 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 41G
2023-05-06 17:23:26,904 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 996M
2023-05-06 17:23:26,908 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0002, loss: 0.0019 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.96it/s]

2023-05-06 17:23:52,980 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0003, loss: 0.3530 ||: 100%|##########| 157/157 [00:00<00:00, 171.28it/s]

2023-05-06 17:23:53,903 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:23:53,909 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 17:23:53,910 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   996.343  |       N/A
2023-05-06 17:23:53,911 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.353
2023-05-06 17:23:53,914 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  42481.008  |       N/A





2023-05-06 17:23:54,631 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.730197
2023-05-06 17:23:54,633 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:23:54,712 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 42481.0078125,
  "peak_gpu_0_memory_MB": 996.34326171875,
  "training_duration": "0:02:18.433399",
  "epoch": 4,
  "training_accuracy": 0.999925,
  "training_loss": 0.0018663066743814852,
  "training_worker_0_memory_MB": 42481.0078125,
  "training_gpu_0_memory_MB": 996.34326171875,
  "validation_accuracy": 0.925,
  "validation_loss": 0.3529938568363328,
  "best_validation_accuracy": 0.9312,
  "best_validation_loss": 0.1797362902872027
}
2023-05-06 17:23:54,717 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/58/model.tar.gz


[32m[I 2023-05-06 17:23:57,886][0m Trial 58 finished with value: 0.9312 and parameters: {'embedding_dim': 157, 'max_filter_size': 5, 'num_filters': 43}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:23:58,015 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:23:58,018 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:23:58,025 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:23:58,028 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:23:58,030 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:23:58,032 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:23:58,034 - INFO - allennlp.common.params - type = default
2023-05-06 17:23:58,035 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:23:58,037 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:23:58,038 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:23:58,040 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:23:58,041 - INFO - all

loading instances: 40000it [01:32, 431.80it/s]

2023-05-06 17:25:30,714 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:25:30,716 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:25:30,722 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:25:30,725 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:25:30,726 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:25:30,729 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:25:30,731 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:25:30,734 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:25:30,736 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:25:30,739 - INFO - allennlp.common.params - validation_data_loader.cuda_de




2023-05-06 17:25:30,743 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 17:25:30,745 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:14, 346.02it/s]

2023-05-06 17:25:45,202 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:25:45,205 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:25:45,207 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:25:45,209 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:25:45,211 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:25:45,212 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:25:45,214 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:25:45,215 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:25:45,216 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:25:45,218 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:25:45,219 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:04, 9900.79it/s] 


2023-05-06 17:25:49,430 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:25:49,431 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:25:49,437 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:25:49,440 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:25:49,442 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 168
2023-05-06 17:25:49,444 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:25:49,445 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:25:49,448 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:25:49,449 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:25:54,736 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:25:54,738 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:25:54,745 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9107, batch_loss: 0.0557, loss: 0.2409 ||: 100%|##########| 1250/1250 [00:32<00:00, 37.99it/s]

2023-05-06 17:26:27,507 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:26:27,522 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:26:27,524 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:26:27,529 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9242, batch_loss: 0.0346, loss: 0.1937 ||: 100%|##########| 157/157 [00:01<00:00, 123.71it/s]

2023-05-06 17:26:28,783 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:26:28,785 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.924
2023-05-06 17:26:28,787 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   996.343  |       N/A
2023-05-06 17:26:28,789 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.194
2023-05-06 17:26:28,791 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43056.551  |       N/A





2023-05-06 17:26:29,554 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.960686
2023-05-06 17:26:29,557 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:07
2023-05-06 17:26:29,559 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:26:29,560 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 42G
2023-05-06 17:26:29,562 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 17:26:29,564 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9503, batch_loss: 0.1944, loss: 0.1302 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.29it/s]

2023-05-06 17:27:02,228 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9298, batch_loss: 0.0139, loss: 0.1922 ||: 100%|##########| 157/157 [00:01<00:00, 125.85it/s]

2023-05-06 17:27:03,482 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:27:03,484 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.930
2023-05-06 17:27:03,488 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1141.985  |       N/A
2023-05-06 17:27:03,492 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.192
2023-05-06 17:27:03,497 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43120.141  |       N/A





2023-05-06 17:27:04,276 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.717575
2023-05-06 17:27:04,277 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:35
2023-05-06 17:27:04,280 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:27:04,284 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 42G
2023-05-06 17:27:04,294 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 17:27:04,295 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9835, batch_loss: 0.0011, loss: 0.0484 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.68it/s]

2023-05-06 17:27:36,619 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9280, batch_loss: 0.0056, loss: 0.2517 ||: 100%|##########| 157/157 [00:01<00:00, 106.95it/s]

2023-05-06 17:27:38,097 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:27:38,098 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.928
2023-05-06 17:27:38,100 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1141.985  |       N/A
2023-05-06 17:27:38,101 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.048  |     0.252
2023-05-06 17:27:38,103 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43120.141  |       N/A





2023-05-06 17:27:39,041 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.760867
2023-05-06 17:27:39,042 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:01
2023-05-06 17:27:39,048 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:27:39,052 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 42G
2023-05-06 17:27:39,055 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 17:27:39,059 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9971, batch_loss: 0.1468, loss: 0.0107 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.59it/s]

2023-05-06 17:28:11,459 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0029, loss: 0.3637 ||: 100%|##########| 157/157 [00:01<00:00, 111.50it/s]

2023-05-06 17:28:12,878 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:28:12,881 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.926
2023-05-06 17:28:12,883 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1141.985  |       N/A
2023-05-06 17:28:12,884 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.364
2023-05-06 17:28:12,886 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43120.141  |       N/A





2023-05-06 17:28:13,942 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.893663
2023-05-06 17:28:13,950 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:27
2023-05-06 17:28:13,952 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:28:13,953 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 42G
2023-05-06 17:28:13,955 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 17:28:13,957 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0059, loss: 0.0019 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.66it/s]

2023-05-06 17:28:46,299 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0011, loss: 0.4057 ||: 100%|##########| 157/157 [00:01<00:00, 119.29it/s]

2023-05-06 17:28:47,623 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:28:47,625 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.926
2023-05-06 17:28:47,629 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1141.985  |       N/A
2023-05-06 17:28:47,631 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.406
2023-05-06 17:28:47,632 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43120.141  |       N/A





2023-05-06 17:28:48,726 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.774189
2023-05-06 17:28:48,730 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:28:48,823 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 43120.140625,
  "peak_gpu_0_memory_MB": 1141.9853515625,
  "training_duration": "0:02:53.029466",
  "epoch": 4,
  "training_accuracy": 0.99975,
  "training_loss": 0.0018971895249429507,
  "training_worker_0_memory_MB": 43120.140625,
  "training_gpu_0_memory_MB": 1141.9853515625,
  "validation_accuracy": 0.9262,
  "validation_loss": 0.4056978148495731,
  "best_validation_accuracy": 0.9298,
  "best_validation_loss": 0.19222959428195172
}
2023-05-06 17:28:48,825 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/59/model.tar.gz


[32m[I 2023-05-06 17:28:53,539][0m Trial 59 finished with value: 0.9298 and parameters: {'embedding_dim': 168, 'max_filter_size': 5, 'num_filters': 71}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:28:53,658 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:28:53,663 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:28:53,665 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:28:53,667 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:28:53,670 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:28:53,675 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:28:53,677 - INFO - allennlp.common.params - type = default
2023-05-06 17:28:53,679 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:28:53,680 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:28:53,681 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:28:53,683 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:28:53,684 - INFO - all

loading instances: 40000it [01:30, 440.50it/s]

2023-05-06 17:30:24,533 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:30:24,536 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:30:24,538 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:30:24,540 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:30:24,544 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:30:24,546 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:30:24,547 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:30:24,548 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:30:24,549 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:30:24,552 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:13, 367.88it/s]

2023-05-06 17:30:38,149 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:30:38,152 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:30:38,154 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:30:38,156 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:30:38,158 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:30:38,159 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:30:38,161 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:30:38,162 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:30:38,163 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:30:38,164 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:30:38,169 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:05, 6724.33it/s]


2023-05-06 17:30:44,366 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:30:44,368 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:30:44,371 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:30:44,373 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:30:44,375 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 86
2023-05-06 17:30:44,377 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:30:44,378 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:30:44,380 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:30:44,381 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:30:51,100 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:30:51,102 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:30:51,108 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9085, batch_loss: 0.0785, loss: 0.2447 ||: 100%|##########| 1250/1250 [00:21<00:00, 59.46it/s]

2023-05-06 17:31:12,015 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:31:12,028 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:31:12,030 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:31:12,035 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9230, batch_loss: 0.0531, loss: 0.1932 ||: 100%|##########| 157/157 [00:00<00:00, 157.12it/s]

2023-05-06 17:31:13,025 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:31:13,026 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.923
2023-05-06 17:31:13,028 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1141.985  |       N/A
2023-05-06 17:31:13,030 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.245  |     0.193
2023-05-06 17:31:13,031 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43650.855  |       N/A





2023-05-06 17:31:13,435 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.457199
2023-05-06 17:31:13,437 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:18
2023-05-06 17:31:13,441 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:31:13,443 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 43G
2023-05-06 17:31:13,445 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 801M
2023-05-06 17:31:13,447 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9471, batch_loss: 0.1979, loss: 0.1368 ||: 100%|##########| 1250/1250 [00:21<00:00, 58.52it/s]

2023-05-06 17:31:34,815 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9290, batch_loss: 0.0308, loss: 0.1860 ||: 100%|##########| 157/157 [00:00<00:00, 165.27it/s]

2023-05-06 17:31:35,770 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:31:35,772 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.929





2023-05-06 17:31:35,778 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   801.089  |       N/A
2023-05-06 17:31:35,780 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.137  |     0.186
2023-05-06 17:31:35,786 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43683.504  |       N/A
2023-05-06 17:31:36,187 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.746319
2023-05-06 17:31:36,189 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:59
2023-05-06 17:31:36,191 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:31:36,197 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 43G
2023-05-06 17:31:36,199 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 801M
2023-05-06 17:31:36,203 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9796, batch_loss: 0.0028, loss: 0.0579 ||: 100%|##########| 1250/1250 [00:21<00:00, 59.40it/s]

2023-05-06 17:31:57,255 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9280, batch_loss: 0.0205, loss: 0.2254 ||: 100%|##########| 157/157 [00:01<00:00, 143.68it/s]

2023-05-06 17:31:58,356 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:31:58,358 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.980  |     0.928
2023-05-06 17:31:58,359 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   801.213  |       N/A
2023-05-06 17:31:58,364 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.058  |     0.225
2023-05-06 17:31:58,366 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43683.707  |       N/A





2023-05-06 17:31:58,864 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.673099
2023-05-06 17:31:58,866 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:37
2023-05-06 17:31:58,870 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:31:58,871 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 43G
2023-05-06 17:31:58,873 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 801M
2023-05-06 17:31:58,875 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9958, batch_loss: 0.0949, loss: 0.0149 ||: 100%|##########| 1250/1250 [00:20<00:00, 59.54it/s]

2023-05-06 17:32:19,874 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0123, loss: 0.3316 ||: 100%|##########| 157/157 [00:01<00:00, 131.15it/s]

2023-05-06 17:32:21,077 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:32:21,079 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.926
2023-05-06 17:32:21,082 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   801.029  |       N/A
2023-05-06 17:32:21,084 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.015  |     0.332
2023-05-06 17:32:21,086 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43683.707  |       N/A





2023-05-06 17:32:21,624 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.754025
2023-05-06 17:32:21,628 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:15
2023-05-06 17:32:21,629 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:32:21,631 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 43G
2023-05-06 17:32:21,633 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 801M
2023-05-06 17:32:21,635 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0007, loss: 0.0030 ||: 100%|##########| 1250/1250 [00:21<00:00, 58.65it/s]

2023-05-06 17:32:42,956 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0041, loss: 0.3462 ||: 100%|##########| 157/157 [00:01<00:00, 131.09it/s]

2023-05-06 17:32:44,164 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:32:44,165 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 17:32:44,167 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   801.199  |       N/A
2023-05-06 17:32:44,170 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.346
2023-05-06 17:32:44,173 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  43683.707  |       N/A





2023-05-06 17:32:44,706 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.077236
2023-05-06 17:32:44,709 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:32:44,763 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 43683.70703125,
  "peak_gpu_0_memory_MB": 1141.9853515625,
  "training_duration": "0:01:53.185131",
  "epoch": 4,
  "training_accuracy": 0.99955,
  "training_loss": 0.002970308297117299,
  "training_worker_0_memory_MB": 43683.70703125,
  "training_gpu_0_memory_MB": 801.19873046875,
  "validation_accuracy": 0.925,
  "validation_loss": 0.34622685676055015,
  "best_validation_accuracy": 0.929,
  "best_validation_loss": 0.18598829647585466
}
2023-05-06 17:32:44,766 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/60/model.tar.gz


[32m[I 2023-05-06 17:32:47,018][0m Trial 60 finished with value: 0.929 and parameters: {'embedding_dim': 86, 'max_filter_size': 5, 'num_filters': 82}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:32:47,131 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:32:47,133 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:32:47,135 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:32:47,137 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:32:47,139 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:32:47,141 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:32:47,142 - INFO - allennlp.common.params - type = default
2023-05-06 17:32:47,143 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:32:47,145 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:32:47,146 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:32:47,147 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:32:47,148 - INFO - all

loading instances: 40000it [01:31, 436.02it/s]

2023-05-06 17:34:18,929 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:34:18,931 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:34:18,932 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:34:18,934 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:34:18,936 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:34:18,938 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:34:18,940 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:34:18,942 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:34:18,943 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:34:18,946 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:08, 591.86it/s]

2023-05-06 17:34:27,404 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:34:27,406 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:34:27,415 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:34:27,416 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:34:27,418 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:34:27,420 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:34:27,424 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:34:27,425 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:34:27,427 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:34:27,430 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:34:27,431 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:05, 7575.80it/s]


2023-05-06 17:34:32,986 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:34:32,989 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:34:32,992 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:34:32,994 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:34:32,997 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 134
2023-05-06 17:34:33,004 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:34:33,005 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:34:33,006 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:34:33,007 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:34:40,150 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:34:40,152 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:34:40,159 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9093, batch_loss: 0.0745, loss: 0.2397 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.41it/s]

2023-05-06 17:35:09,510 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:35:09,528 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:35:09,530 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:35:09,534 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9208, batch_loss: 0.0347, loss: 0.2073 ||: 100%|##########| 157/157 [00:01<00:00, 119.92it/s]

2023-05-06 17:35:10,830 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:35:10,831 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.921
2023-05-06 17:35:10,834 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   801.267  |       N/A
2023-05-06 17:35:10,835 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.240  |     0.207
2023-05-06 17:35:10,836 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  44316.324  |       N/A





2023-05-06 17:35:11,609 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.583511
2023-05-06 17:35:11,613 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:37
2023-05-06 17:35:11,615 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:35:11,617 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 43G
2023-05-06 17:35:11,621 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:35:11,623 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9519, batch_loss: 0.1703, loss: 0.1279 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.24it/s]

2023-05-06 17:35:41,224 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9312, batch_loss: 0.0146, loss: 0.1921 ||: 100%|##########| 157/157 [00:01<00:00, 134.72it/s]

2023-05-06 17:35:42,395 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:35:42,396 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.952  |     0.931
2023-05-06 17:35:42,403 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1068.239  |       N/A





2023-05-06 17:35:42,406 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.128  |     0.192
2023-05-06 17:35:42,411 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  44367.473  |       N/A
2023-05-06 17:35:43,035 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.419854
2023-05-06 17:35:43,044 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:09
2023-05-06 17:35:43,045 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:35:43,048 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 43G
2023-05-06 17:35:43,051 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:35:43,053 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9850, batch_loss: 0.0029, loss: 0.0441 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.40it/s]

2023-05-06 17:36:12,541 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9272, batch_loss: 0.0073, loss: 0.2467 ||: 100%|##########| 157/157 [00:01<00:00, 111.72it/s]

2023-05-06 17:36:13,954 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:36:13,956 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.927
2023-05-06 17:36:13,958 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1068.364  |       N/A
2023-05-06 17:36:13,960 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.044  |     0.247
2023-05-06 17:36:13,961 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  44367.473  |       N/A





2023-05-06 17:36:14,794 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.748490
2023-05-06 17:36:14,795 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:39
2023-05-06 17:36:14,802 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:36:14,805 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 43G
2023-05-06 17:36:14,809 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:36:14,813 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9977, batch_loss: 0.0141, loss: 0.0092 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.96it/s]

2023-05-06 17:36:43,920 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9240, batch_loss: 0.0002, loss: 0.3543 ||: 100%|##########| 157/157 [00:01<00:00, 124.58it/s]

2023-05-06 17:36:45,186 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:36:45,190 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.924
2023-05-06 17:36:45,192 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1068.180  |       N/A
2023-05-06 17:36:45,194 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.009  |     0.354
2023-05-06 17:36:45,195 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  44367.473  |       N/A





2023-05-06 17:36:46,056 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.254317
2023-05-06 17:36:46,058 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:07
2023-05-06 17:36:46,064 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:36:46,066 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 43G
2023-05-06 17:36:46,068 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:36:46,075 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0004, loss: 0.0014 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.15it/s]

2023-05-06 17:37:15,738 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9252, batch_loss: 0.0008, loss: 0.4174 ||: 100%|##########| 157/157 [00:01<00:00, 135.40it/s]

2023-05-06 17:37:16,907 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:37:16,914 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 17:37:16,915 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1068.349  |       N/A





2023-05-06 17:37:16,922 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.417
2023-05-06 17:37:16,928 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  44367.473  |       N/A
2023-05-06 17:37:17,582 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.517651
2023-05-06 17:37:17,584 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:37:17,663 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 44367.47265625,
  "peak_gpu_0_memory_MB": 1068.36376953125,
  "training_duration": "0:02:36.880310",
  "epoch": 4,
  "training_accuracy": 0.999925,
  "training_loss": 0.0014218335783643852,
  "training_worker_0_memory_MB": 44367.47265625,
  "training_gpu_0_memory_MB": 1068.34912109375,
  "validation_accuracy": 0.9252,
  "validation_loss": 0.41741181039518327,
  "best_validation_accuracy": 0.9312,
  "best_valid

[32m[I 2023-05-06 17:37:20,416][0m Trial 61 finished with value: 0.9312 and parameters: {'embedding_dim': 134, 'max_filter_size': 5, 'num_filters': 92}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:37:20,527 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:37:20,529 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:37:20,531 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:37:20,535 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:37:20,537 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:37:20,538 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:37:20,539 - INFO - allennlp.common.params - type = default
2023-05-06 17:37:20,541 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:37:20,542 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:37:20,543 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:37:20,544 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:37:20,546 - INFO - all

loading instances: 40000it [01:32, 431.43it/s]

2023-05-06 17:38:53,300 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:38:53,304 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:38:53,305 - INFO - allennlp.common.params - validation_data_loader.drop_last = False





2023-05-06 17:38:53,308 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:38:53,309 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:38:53,315 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:38:53,316 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:38:53,319 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:38:53,320 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:38:53,322 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 17:38:53,325 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 17:38:53,327 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:14, 337.45it/s]

2023-05-06 17:39:08,155 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:39:08,157 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:39:08,159 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:39:08,161 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:39:08,166 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:39:08,168 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:39:08,169 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:39:08,170 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:39:08,171 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:39:08,172 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:39:08,173 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:04, 8816.70it/s] 


2023-05-06 17:39:12,890 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:39:12,891 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:39:12,897 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:39:12,901 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:39:12,903 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 145
2023-05-06 17:39:12,904 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:39:12,905 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:39:12,906 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:39:12,910 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:39:18,118 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:39:18,120 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:39:18,127 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9089, batch_loss: 0.0586, loss: 0.2410 ||: 100%|##########| 1250/1250 [00:30<00:00, 40.37it/s]

2023-05-06 17:39:48,953 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:39:48,970 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:39:48,972 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:39:48,979 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9214, batch_loss: 0.0435, loss: 0.2032 ||: 100%|##########| 157/157 [00:01<00:00, 124.05it/s]

2023-05-06 17:39:50,224 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:39:50,230 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.921
2023-05-06 17:39:50,234 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1068.417  |       N/A
2023-05-06 17:39:50,237 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.203
2023-05-06 17:39:50,238 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  44962.488  |       N/A





2023-05-06 17:39:50,894 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.911169
2023-05-06 17:39:50,895 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:50
2023-05-06 17:39:50,898 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:39:50,900 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 44G
2023-05-06 17:39:50,902 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 17:39:50,904 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9496, batch_loss: 0.2157, loss: 0.1299 ||: 100%|##########| 1250/1250 [00:30<00:00, 40.84it/s]

2023-05-06 17:40:21,520 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9314, batch_loss: 0.0178, loss: 0.1863 ||: 100%|##########| 157/157 [00:01<00:00, 102.90it/s]

2023-05-06 17:40:23,055 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:40:23,057 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.931
2023-05-06 17:40:23,059 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1083.775  |       N/A
2023-05-06 17:40:23,061 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.186
2023-05-06 17:40:23,065 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  45017.312  |       N/A





2023-05-06 17:40:23,995 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:33.096857
2023-05-06 17:40:23,997 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:20
2023-05-06 17:40:24,002 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:40:24,003 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 44G
2023-05-06 17:40:24,008 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 17:40:24,010 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9832, batch_loss: 0.0011, loss: 0.0485 ||: 100%|##########| 1250/1250 [00:30<00:00, 41.41it/s]

2023-05-06 17:40:54,203 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0097, loss: 0.2768 ||: 100%|##########| 157/157 [00:01<00:00, 131.62it/s]

2023-05-06 17:40:55,406 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:40:55,409 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.926
2023-05-06 17:40:55,412 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1083.900  |       N/A
2023-05-06 17:40:55,416 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.049  |     0.277
2023-05-06 17:40:55,419 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  45017.523  |       N/A





2023-05-06 17:40:56,134 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.132261
2023-05-06 17:40:56,135 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:47
2023-05-06 17:40:56,140 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:40:56,152 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 44G
2023-05-06 17:40:56,154 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 17:40:56,157 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9971, batch_loss: 0.1218, loss: 0.0109 ||: 100%|##########| 1250/1250 [00:30<00:00, 41.02it/s]

2023-05-06 17:41:26,633 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9280, batch_loss: 0.0022, loss: 0.3152 ||: 100%|##########| 157/157 [00:01<00:00, 128.46it/s]

2023-05-06 17:41:27,868 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:41:27,874 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.928
2023-05-06 17:41:27,877 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1083.716  |       N/A
2023-05-06 17:41:27,881 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.315
2023-05-06 17:41:27,882 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  45017.766  |       N/A





2023-05-06 17:41:28,571 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.430987
2023-05-06 17:41:28,573 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:14
2023-05-06 17:41:28,578 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:41:28,583 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 44G
2023-05-06 17:41:28,588 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 17:41:28,590 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0011, loss: 0.0023 ||: 100%|##########| 1250/1250 [00:30<00:00, 41.09it/s]

2023-05-06 17:41:59,016 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9240, batch_loss: 0.0054, loss: 0.3309 ||: 100%|##########| 157/157 [00:01<00:00, 105.90it/s]

2023-05-06 17:42:00,512 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:42:00,516 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 17:42:00,518 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1083.885  |       N/A
2023-05-06 17:42:00,520 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.331
2023-05-06 17:42:00,524 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  45017.766  |       N/A





2023-05-06 17:42:01,406 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.827590
2023-05-06 17:42:01,408 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:42:01,493 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 45017.765625,
  "peak_gpu_0_memory_MB": 1083.89990234375,
  "training_duration": "0:02:42.528886",
  "epoch": 4,
  "training_accuracy": 0.9996,
  "training_loss": 0.0023321575312280404,
  "training_worker_0_memory_MB": 45017.765625,
  "training_gpu_0_memory_MB": 1083.88525390625,
  "validation_accuracy": 0.924,
  "validation_loss": 0.3309164696382685,
  "best_validation_accuracy": 0.9314,
  "best_validation_loss": 0.186251954098416
}
2023-05-06 17:42:01,497 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/62/model.tar.gz


[32m[I 2023-05-06 17:42:05,115][0m Trial 62 finished with value: 0.9314 and parameters: {'embedding_dim': 145, 'max_filter_size': 5, 'num_filters': 85}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:42:05,236 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:42:05,238 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:42:05,243 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:42:05,244 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:42:05,246 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:42:05,250 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:42:05,251 - INFO - allennlp.common.params - type = default
2023-05-06 17:42:05,253 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:42:05,255 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:42:05,257 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:42:05,258 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:42:05,261 - INFO - all

loading instances: 40000it [01:36, 413.41it/s]

2023-05-06 17:43:42,083 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:43:42,086 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:43:42,092 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:43:42,095 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:43:42,101 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:43:42,102 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:43:42,103 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:43:42,104 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:43:42,105 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:43:42,106 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:14, 334.07it/s]

2023-05-06 17:43:57,080 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:43:57,086 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:43:57,088 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:43:57,090 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:43:57,092 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:43:57,096 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:43:57,098 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:43:57,099 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:43:57,100 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:43:57,101 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:43:57,102 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:04, 8546.37it/s] 


2023-05-06 17:44:01,955 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:44:01,958 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:44:01,960 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:44:01,965 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:44:01,968 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 140
2023-05-06 17:44:01,970 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:44:01,972 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:44:01,973 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:44:01,977 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:44:07,350 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:44:07,352 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:44:07,358 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9108, batch_loss: 0.0745, loss: 0.2396 ||: 100%|##########| 1250/1250 [00:33<00:00, 37.83it/s]

2023-05-06 17:44:40,269 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:44:40,286 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:44:40,288 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:44:40,293 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9212, batch_loss: 0.0325, loss: 0.1983 ||: 100%|##########| 157/157 [00:01<00:00, 120.70it/s]

2023-05-06 17:44:41,579 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:44:41,581 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.921
2023-05-06 17:44:41,583 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1083.953  |       N/A
2023-05-06 17:44:41,585 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.240  |     0.198
2023-05-06 17:44:41,587 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  45600.266  |       N/A





2023-05-06 17:44:42,242 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.026275
2023-05-06 17:44:42,243 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:09
2023-05-06 17:44:42,246 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:44:42,248 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 45G
2023-05-06 17:44:42,251 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 17:44:42,253 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9509, batch_loss: 0.2057, loss: 0.1288 ||: 100%|##########| 1250/1250 [00:32<00:00, 39.04it/s]

2023-05-06 17:45:14,278 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9300, batch_loss: 0.0122, loss: 0.1874 ||: 100%|##########| 157/157 [00:01<00:00, 108.56it/s]

2023-05-06 17:45:15,733 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:45:15,735 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.930
2023-05-06 17:45:15,736 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1210.965  |       N/A
2023-05-06 17:45:15,738 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.129  |     0.187
2023-05-06 17:45:15,740 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  45653.520  |       N/A





2023-05-06 17:45:16,643 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.397304
2023-05-06 17:45:16,648 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:34
2023-05-06 17:45:16,651 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:45:16,653 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 45G
2023-05-06 17:45:16,658 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 17:45:16,661 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9846, batch_loss: 0.0008, loss: 0.0446 ||: 100%|##########| 1250/1250 [00:31<00:00, 39.22it/s]

2023-05-06 17:45:48,538 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9290, batch_loss: 0.0022, loss: 0.2622 ||: 100%|##########| 157/157 [00:01<00:00, 108.39it/s]

2023-05-06 17:45:49,995 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:45:49,999 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.929
2023-05-06 17:45:50,002 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1211.089  |       N/A
2023-05-06 17:45:50,003 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.045  |     0.262
2023-05-06 17:45:50,005 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  45653.520  |       N/A





2023-05-06 17:45:50,874 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.223219
2023-05-06 17:45:50,877 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:59
2023-05-06 17:45:50,880 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:45:50,884 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 45G
2023-05-06 17:45:50,886 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 17:45:50,887 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9978, batch_loss: 0.0370, loss: 0.0086 ||: 100%|##########| 1250/1250 [00:31<00:00, 39.17it/s]

2023-05-06 17:46:22,808 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0020, loss: 0.3021 ||: 100%|##########| 157/157 [00:01<00:00, 122.86it/s]

2023-05-06 17:46:24,098 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 17:46:24,107 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.925
2023-05-06 17:46:24,111 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1210.905  |       N/A
2023-05-06 17:46:24,115 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.009  |     0.302
2023-05-06 17:46:24,116 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  45653.520  |       N/A
2023-05-06 17:46:24,747 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:33.866832
2023-05-06 17:46:24,749 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:25
2023-05-06 17:46:24,755 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:46:24,759 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 45G
2023-05-06 17:46:24,762 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9999, batch_loss: 0.0003, loss: 0.0012 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.66it/s]

2023-05-06 17:46:57,104 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9292, batch_loss: 0.0009, loss: 0.3698 ||: 100%|##########| 157/157 [00:01<00:00, 120.64it/s]

2023-05-06 17:46:58,423 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:46:58,424 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.929
2023-05-06 17:46:58,426 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1211.075  |       N/A
2023-05-06 17:46:58,427 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.370
2023-05-06 17:46:58,429 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  45653.520  |       N/A





2023-05-06 17:46:59,128 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.373381
2023-05-06 17:46:59,130 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:46:59,208 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 45653.51953125,
  "peak_gpu_0_memory_MB": 1211.08935546875,
  "training_duration": "0:02:51.206241",
  "epoch": 4,
  "training_accuracy": 0.999875,
  "training_loss": 0.0012483369243578635,
  "training_worker_0_memory_MB": 45653.51953125,
  "training_gpu_0_memory_MB": 1211.07470703125,
  "validation_accuracy": 0.9292,
  "validation_loss": 0.3698301839356379,
  "best_validation_accuracy": 0.93,
  "best_validation_loss": 0.1873797204072593
}
2023-05-06 17:46:59,210 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/63/model.tar.gz


[32m[I 2023-05-06 17:47:02,150][0m Trial 63 finished with value: 0.93 and parameters: {'embedding_dim': 140, 'max_filter_size': 5, 'num_filters': 115}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:47:02,264 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:47:02,266 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:47:02,269 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:47:02,275 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:47:02,276 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:47:02,277 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:47:02,278 - INFO - allennlp.common.params - type = default
2023-05-06 17:47:02,280 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:47:02,283 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:47:02,284 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:47:02,285 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:47:02,286 - INFO - all

loading instances: 40000it [01:42, 390.84it/s]

2023-05-06 17:48:44,685 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:48:44,688 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:48:44,690 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:48:44,691 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:48:44,693 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:48:44,695 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:48:44,696 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0





2023-05-06 17:48:44,699 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:48:44,699 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:48:44,700 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 17:48:44,701 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 17:48:44,702 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:11, 421.09it/s]

2023-05-06 17:48:56,588 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:48:56,591 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:48:56,593 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:48:56,594 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:48:56,595 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:48:56,597 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:48:56,598 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:48:56,599 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:48:56,600 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:48:56,601 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:48:56,602 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:04, 9171.71it/s] 


2023-05-06 17:49:01,143 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:49:01,144 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:49:01,150 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:49:01,153 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:49:01,156 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 129
2023-05-06 17:49:01,160 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:49:01,161 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:49:01,164 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:49:01,165 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:49:11,750 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:49:11,752 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:49:11,758 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9103, batch_loss: 0.0706, loss: 0.2388 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.69it/s]

2023-05-06 17:49:49,867 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:49:49,885 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:49:49,887 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:49:49,892 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9200, batch_loss: 0.0320, loss: 0.2028 ||: 100%|##########| 157/157 [00:01<00:00, 101.52it/s]

2023-05-06 17:49:51,420 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:49:51,422 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.920
2023-05-06 17:49:51,424 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1211.143  |       N/A
2023-05-06 17:49:51,426 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.239  |     0.203
2023-05-06 17:49:51,428 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46332.359  |       N/A





2023-05-06 17:49:52,047 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:40.430716
2023-05-06 17:49:52,049 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:58
2023-05-06 17:49:52,052 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:49:52,054 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 45G
2023-05-06 17:49:52,055 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 17:49:52,057 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9509, batch_loss: 0.2100, loss: 0.1274 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.77it/s]

2023-05-06 17:50:30,212 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9290, batch_loss: 0.0072, loss: 0.2000 ||: 100%|##########| 157/157 [00:01<00:00, 104.53it/s]

2023-05-06 17:50:31,724 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:50:31,725 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.929
2023-05-06 17:50:31,727 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1357.872  |       N/A
2023-05-06 17:50:31,729 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.127  |     0.200
2023-05-06 17:50:31,730 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46332.359  |       N/A





2023-05-06 17:50:32,399 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:40.347306
2023-05-06 17:50:32,407 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:20
2023-05-06 17:50:32,409 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:50:32,411 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 45G
2023-05-06 17:50:32,414 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 17:50:32,415 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9853, batch_loss: 0.0026, loss: 0.0419 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.56it/s]

2023-05-06 17:51:10,817 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9280, batch_loss: 0.0007, loss: 0.2606 ||: 100%|##########| 157/157 [00:01<00:00, 103.86it/s]

2023-05-06 17:51:12,339 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:51:12,340 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.928





2023-05-06 17:51:12,345 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1357.997  |       N/A
2023-05-06 17:51:12,348 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.042  |     0.261
2023-05-06 17:51:12,351 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46332.359  |       N/A
2023-05-06 17:51:12,983 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:40.573975
2023-05-06 17:51:12,985 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:41
2023-05-06 17:51:12,989 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:51:12,991 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 45G
2023-05-06 17:51:12,993 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 17:51:12,995 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9980, batch_loss: 0.2544, loss: 0.0079 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.74it/s]

2023-05-06 17:51:51,184 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9224, batch_loss: 0.0003, loss: 0.3758 ||: 100%|##########| 157/157 [00:01<00:00, 97.61it/s] 

2023-05-06 17:51:52,803 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:51:52,805 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.922
2023-05-06 17:51:52,808 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1357.812  |       N/A
2023-05-06 17:51:52,810 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.008  |     0.376
2023-05-06 17:51:52,811 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46332.359  |       N/A





2023-05-06 17:51:53,589 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:40.600605
2023-05-06 17:51:53,591 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:01
2023-05-06 17:51:53,597 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:51:53,598 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 45G
2023-05-06 17:51:53,600 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.3G
2023-05-06 17:51:53,602 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0002, loss: 0.0012 ||: 100%|##########| 1250/1250 [00:38<00:00, 32.26it/s]

2023-05-06 17:52:32,350 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9266, batch_loss: 0.0012, loss: 0.3641 ||: 100%|##########| 157/157 [00:01<00:00, 93.03it/s]

2023-05-06 17:52:34,052 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:52:34,054 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.927
2023-05-06 17:52:34,056 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1357.982  |       N/A
2023-05-06 17:52:34,058 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.364
2023-05-06 17:52:34,059 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46332.359  |       N/A





2023-05-06 17:52:34,872 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:41.275401
2023-05-06 17:52:34,875 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:52:34,969 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 46332.359375,
  "peak_gpu_0_memory_MB": 1357.99658203125,
  "training_duration": "0:03:22.435233",
  "epoch": 4,
  "training_accuracy": 0.999875,
  "training_loss": 0.0012467848356813192,
  "training_worker_0_memory_MB": 46332.359375,
  "training_gpu_0_memory_MB": 1357.98193359375,
  "validation_accuracy": 0.9266,
  "validation_loss": 0.36407584173065033,
  "best_validation_accuracy": 0.929,
  "best_validation_loss": 0.20003380401320991
}
2023-05-06 17:52:34,973 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/64/model.tar.gz


[32m[I 2023-05-06 17:52:38,729][0m Trial 64 finished with value: 0.929 and parameters: {'embedding_dim': 129, 'max_filter_size': 5, 'num_filters': 150}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:52:38,888 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:52:38,892 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:52:38,895 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:52:38,897 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:52:38,899 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:52:38,901 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:52:38,903 - INFO - allennlp.common.params - type = default
2023-05-06 17:52:38,906 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:52:38,908 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:52:38,909 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:52:38,910 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:52:38,911 - INFO - all

loading instances: 40000it [01:36, 415.59it/s]

2023-05-06 17:54:15,202 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:54:15,206 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:54:15,209 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:54:15,211 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:54:15,218 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:54:15,219 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:54:15,221 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:54:15,222 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:54:15,223 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:54:15,224 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:14, 342.93it/s]

2023-05-06 17:54:29,816 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:54:29,821 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:54:29,823 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:54:29,829 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:54:29,830 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:54:29,832 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:54:29,837 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:54:29,838 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:54:29,840 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:54:29,840 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:54:29,841 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:06, 5927.09it/s]


2023-05-06 17:54:36,868 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:54:36,873 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:54:36,877 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:54:36,879 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:54:36,884 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 114
2023-05-06 17:54:36,886 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:54:36,888 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:54:36,889 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:54:36,890 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:54:43,362 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:54:43,364 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:54:43,370 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9099, batch_loss: 0.0567, loss: 0.2381 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.06it/s]

2023-05-06 17:55:09,815 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:55:09,830 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:55:09,832 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:55:09,837 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9244, batch_loss: 0.0403, loss: 0.1979 ||: 100%|##########| 157/157 [00:01<00:00, 127.15it/s]

2023-05-06 17:55:11,057 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:55:11,060 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.924
2023-05-06 17:55:11,062 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1358.050  |       N/A
2023-05-06 17:55:11,063 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.238  |     0.198
2023-05-06 17:55:11,066 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46880.281  |       N/A





2023-05-06 17:55:11,813 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.570657
2023-05-06 17:55:11,817 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:10
2023-05-06 17:55:11,819 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:55:11,821 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 46G
2023-05-06 17:55:11,823 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:55:11,825 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9538, batch_loss: 0.1760, loss: 0.1232 ||: 100%|##########| 1250/1250 [00:27<00:00, 44.90it/s]

2023-05-06 17:55:39,673 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9334, batch_loss: 0.0146, loss: 0.1840 ||: 100%|##########| 157/157 [00:01<00:00, 104.63it/s]

2023-05-06 17:55:41,184 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:55:41,190 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.954  |     0.933
2023-05-06 17:55:41,191 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1048.105  |       N/A
2023-05-06 17:55:41,193 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.123  |     0.184
2023-05-06 17:55:41,195 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46923.902  |       N/A





2023-05-06 17:55:41,796 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.977272
2023-05-06 17:55:41,798 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:51
2023-05-06 17:55:41,803 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 17:55:41,805 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 46G
2023-05-06 17:55:41,807 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:55:41,808 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9859, batch_loss: 0.0017, loss: 0.0414 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.45it/s]

2023-05-06 17:56:08,158 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0017, loss: 0.2436 ||: 100%|##########| 157/157 [00:01<00:00, 136.07it/s]

2023-05-06 17:56:09,318 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:56:09,319 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.986  |     0.926
2023-05-06 17:56:09,326 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1048.230  |       N/A
2023-05-06 17:56:09,328 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.041  |     0.244
2023-05-06 17:56:09,329 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46923.902  |       N/A





2023-05-06 17:56:09,926 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.123226
2023-05-06 17:56:09,927 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:20
2023-05-06 17:56:09,935 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 17:56:09,938 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 46G
2023-05-06 17:56:09,942 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:56:09,946 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9982, batch_loss: 0.0131, loss: 0.0077 ||: 100%|##########| 1250/1250 [00:27<00:00, 46.28it/s]

2023-05-06 17:56:36,969 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9274, batch_loss: 0.0003, loss: 0.3102 ||: 100%|##########| 157/157 [00:01<00:00, 105.94it/s]

2023-05-06 17:56:38,458 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:56:38,461 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.927
2023-05-06 17:56:38,462 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1048.046  |       N/A
2023-05-06 17:56:38,464 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.008  |     0.310
2023-05-06 17:56:38,466 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46923.902  |       N/A





2023-05-06 17:56:39,223 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.288619
2023-05-06 17:56:39,228 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:52
2023-05-06 17:56:39,232 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 17:56:39,236 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 46G
2023-05-06 17:56:39,238 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:56:39,241 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0006, loss: 0.0011 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.38it/s]

2023-05-06 17:57:06,200 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0001, loss: 0.4262 ||: 100%|##########| 157/157 [00:01<00:00, 133.94it/s]

2023-05-06 17:57:07,381 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:57:07,387 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.926
2023-05-06 17:57:07,391 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1048.215  |       N/A
2023-05-06 17:57:07,394 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.426
2023-05-06 17:57:07,396 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  46923.902  |       N/A





2023-05-06 17:57:07,987 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.755236
2023-05-06 17:57:07,989 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 17:57:08,051 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 46923.90234375,
  "peak_gpu_0_memory_MB": 1358.0498046875,
  "training_duration": "0:02:24.138813",
  "epoch": 4,
  "training_accuracy": 0.999925,
  "training_loss": 0.0011457454329545727,
  "training_worker_0_memory_MB": 46923.90234375,
  "training_gpu_0_memory_MB": 1048.21533203125,
  "validation_accuracy": 0.926,
  "validation_loss": 0.4261700280597782,
  "best_validation_accuracy": 0.9334,
  "best_validation_loss": 0.1840460791614405
}
2023-05-06 17:57:08,054 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/65/model.tar.gz


[32m[I 2023-05-06 17:57:10,629][0m Trial 65 finished with value: 0.9334 and parameters: {'embedding_dim': 114, 'max_filter_size': 5, 'num_filters': 106}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 17:57:10,825 - INFO - allennlp.common.params - evaluation = None
2023-05-06 17:57:10,827 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 17:57:10,829 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 17:57:10,835 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 17:57:10,837 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 17:57:10,838 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 17:57:10,840 - INFO - allennlp.common.params - type = default
2023-05-06 17:57:10,841 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 17:57:10,845 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 17:57:10,846 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 17:57:10,847 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 17:57:10,849 - INFO - all

loading instances: 40000it [01:39, 401.38it/s]

2023-05-06 17:58:50,545 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 17:58:50,549 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 17:58:50,554 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 17:58:50,555 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 17:58:50,556 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 17:58:50,559 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 17:58:50,561 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 17:58:50,564 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 17:58:50,567 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 17:58:50,569 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:16, 295.58it/s]

2023-05-06 17:59:07,493 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 17:59:07,496 - INFO - allennlp.common.params - type = from_instances
2023-05-06 17:59:07,498 - INFO - allennlp.common.params - min_count = None
2023-05-06 17:59:07,500 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 17:59:07,502 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 17:59:07,505 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 17:59:07,508 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 17:59:07,510 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 17:59:07,513 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 17:59:07,516 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 17:59:07,520 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 17


building vocab: 40000it [00:04, 9005.85it/s]


2023-05-06 17:59:12,205 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 17:59:12,211 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 17:59:12,213 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 17:59:12,219 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 17:59:12,221 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 116
2023-05-06 17:59:12,222 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 17:59:12,227 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 17:59:12,228 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 17:59:12,229 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 17:59:20,516 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:59:20,518 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:59:20,528 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9081, batch_loss: 0.0780, loss: 0.2438 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.32it/s]

2023-05-06 17:59:46,791 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 17:59:46,807 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 17:59:46,808 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 17:59:46,814 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9236, batch_loss: 0.0304, loss: 0.2019 ||: 100%|##########| 157/157 [00:01<00:00, 131.22it/s]

2023-05-06 17:59:47,998 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 17:59:48,000 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.924
2023-05-06 17:59:48,002 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1048.283  |       N/A
2023-05-06 17:59:48,003 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.244  |     0.202
2023-05-06 17:59:48,006 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  47516.867  |       N/A





2023-05-06 17:59:48,609 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.245156
2023-05-06 17:59:48,622 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:08
2023-05-06 17:59:48,626 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 17:59:48,627 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 46G
2023-05-06 17:59:48,629 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 17:59:48,632 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9490, batch_loss: 0.2116, loss: 0.1335 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.39it/s]

2023-05-06 18:00:15,582 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9322, batch_loss: 0.0141, loss: 0.1881 ||: 100%|##########| 157/157 [00:01<00:00, 111.34it/s]

2023-05-06 18:00:17,001 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:00:17,007 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.932
2023-05-06 18:00:17,009 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1033.631  |       N/A
2023-05-06 18:00:17,011 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.134  |     0.188
2023-05-06 18:00:17,012 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  47560.973  |       N/A





2023-05-06 18:00:17,793 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.167397
2023-05-06 18:00:17,797 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:46
2023-05-06 18:00:17,799 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:00:17,803 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 46G
2023-05-06 18:00:17,805 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:00:17,807 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9818, batch_loss: 0.0014, loss: 0.0516 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.46it/s]

2023-05-06 18:00:44,154 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0033, loss: 0.2568 ||: 100%|##########| 157/157 [00:01<00:00, 137.63it/s]

2023-05-06 18:00:45,309 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:00:45,319 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.982  |     0.925
2023-05-06 18:00:45,321 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1033.755  |       N/A
2023-05-06 18:00:45,323 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.052  |     0.257
2023-05-06 18:00:45,327 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  47560.973  |       N/A





2023-05-06 18:00:45,914 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.114849
2023-05-06 18:00:45,915 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:18
2023-05-06 18:00:45,918 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:00:45,921 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 46G
2023-05-06 18:00:45,926 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:00:45,929 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9970, batch_loss: 0.0292, loss: 0.0114 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.38it/s]

2023-05-06 18:01:12,314 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0006, loss: 0.3087 ||: 100%|##########| 157/157 [00:01<00:00, 115.03it/s]

2023-05-06 18:01:13,687 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:01:13,689 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.924
2023-05-06 18:01:13,691 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1033.571  |       N/A
2023-05-06 18:01:13,693 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.309
2023-05-06 18:01:13,695 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  47560.973  |       N/A





2023-05-06 18:01:14,402 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.484462
2023-05-06 18:01:14,406 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:49
2023-05-06 18:01:14,407 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:01:14,409 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 46G
2023-05-06 18:01:14,411 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:01:14,414 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0014, loss: 0.0019 ||: 100%|##########| 1250/1250 [00:27<00:00, 46.26it/s]

2023-05-06 18:01:41,443 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0005, loss: 0.3585 ||: 100%|##########| 157/157 [00:01<00:00, 137.44it/s]

2023-05-06 18:01:42,596 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:01:42,598 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 18:01:42,600 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1033.741  |       N/A
2023-05-06 18:01:42,602 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.358
2023-05-06 18:01:42,604 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  47560.973  |       N/A





2023-05-06 18:01:43,204 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.796127
2023-05-06 18:01:43,209 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:01:43,277 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 47560.97265625,
  "peak_gpu_0_memory_MB": 1048.283203125,
  "training_duration": "0:02:22.231966",
  "epoch": 4,
  "training_accuracy": 0.999775,
  "training_loss": 0.0018858968270462355,
  "training_worker_0_memory_MB": 47560.97265625,
  "training_gpu_0_memory_MB": 1033.74072265625,
  "validation_accuracy": 0.9238,
  "validation_loss": 0.3584857153530888,
  "best_validation_accuracy": 0.9322,
  "best_validation_loss": 0.18808971539400754
}
2023-05-06 18:01:43,280 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/66/model.tar.gz


[32m[I 2023-05-06 18:01:45,758][0m Trial 66 finished with value: 0.9322 and parameters: {'embedding_dim': 116, 'max_filter_size': 5, 'num_filters': 101}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:01:45,880 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:01:45,882 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:01:45,885 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:01:45,887 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:01:45,889 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:01:45,891 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:01:45,892 - INFO - allennlp.common.params - type = default
2023-05-06 18:01:45,895 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:01:45,897 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:01:45,898 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:01:45,899 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:01:45,900 - INFO - all

loading instances: 40000it [01:39, 400.79it/s]

2023-05-06 18:03:25,745 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:03:25,748 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:03:25,754 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:03:25,755 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:03:25,757 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:03:25,759 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:03:25,761 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:03:25,764 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:03:25,767 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:03:25,770 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:11, 429.48it/s]

2023-05-06 18:03:37,428 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:03:37,432 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:03:37,435 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:03:37,439 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:03:37,440 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:03:37,445 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:03:37,446 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:03:37,447 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:03:37,448 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:03:37,449 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:03:37,450 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:05, 7760.42it/s]


2023-05-06 18:03:42,802 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:03:42,804 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:03:42,807 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:03:42,809 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:03:42,811 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 102
2023-05-06 18:03:42,813 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:03:42,815 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:03:42,816 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:03:42,817 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:03:48,195 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:03:48,198 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:03:48,204 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9058, batch_loss: 0.0947, loss: 0.2466 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.60it/s]

2023-05-06 18:04:08,361 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:04:08,373 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:04:08,374 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:04:08,379 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9220, batch_loss: 0.0548, loss: 0.1980 ||: 100%|##########| 157/157 [00:00<00:00, 162.25it/s]

2023-05-06 18:04:09,337 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:04:09,341 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.906  |     0.922
2023-05-06 18:04:09,344 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1033.809  |       N/A
2023-05-06 18:04:09,345 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.198
2023-05-06 18:04:09,347 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48158.648  |       N/A





2023-05-06 18:04:09,876 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.815780
2023-05-06 18:04:09,878 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:11
2023-05-06 18:04:09,880 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:04:09,882 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 47G
2023-05-06 18:04:09,884 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 901M
2023-05-06 18:04:09,886 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9470, batch_loss: 0.1975, loss: 0.1379 ||: 100%|##########| 1250/1250 [00:19<00:00, 62.50it/s]

2023-05-06 18:04:29,899 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9266, batch_loss: 0.0451, loss: 0.1907 ||: 100%|##########| 157/157 [00:00<00:00, 169.87it/s]

2023-05-06 18:04:30,839 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:04:30,842 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.927
2023-05-06 18:04:30,843 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   901.020  |       N/A
2023-05-06 18:04:30,845 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.138  |     0.191
2023-05-06 18:04:30,846 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48197.648  |       N/A





2023-05-06 18:04:31,351 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.471250
2023-05-06 18:04:31,352 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:51
2023-05-06 18:04:31,355 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:04:31,359 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 47G
2023-05-06 18:04:31,362 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 901M
2023-05-06 18:04:31,365 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9789, batch_loss: 0.0028, loss: 0.0595 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.17it/s]

2023-05-06 18:04:51,163 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9244, batch_loss: 0.0190, loss: 0.2425 ||: 100%|##########| 157/157 [00:00<00:00, 168.98it/s]

2023-05-06 18:04:52,100 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:04:52,101 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.979  |     0.924
2023-05-06 18:04:52,102 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   901.144  |       N/A
2023-05-06 18:04:52,105 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.059  |     0.243
2023-05-06 18:04:52,106 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48197.648  |       N/A





2023-05-06 18:04:52,798 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.443276
2023-05-06 18:04:52,803 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:29
2023-05-06 18:04:52,805 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:04:52,807 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 47G
2023-05-06 18:04:52,809 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 901M
2023-05-06 18:04:52,815 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9961, batch_loss: 0.0144, loss: 0.0148 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.41it/s]

2023-05-06 18:05:12,534 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9226, batch_loss: 0.0564, loss: 0.3130 ||: 100%|##########| 157/157 [00:01<00:00, 138.72it/s]

2023-05-06 18:05:13,676 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:05:13,678 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.923
2023-05-06 18:05:13,680 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   900.960  |       N/A
2023-05-06 18:05:13,681 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.015  |     0.313
2023-05-06 18:05:13,683 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48197.648  |       N/A





2023-05-06 18:05:14,382 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.576659
2023-05-06 18:05:14,390 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:08
2023-05-06 18:05:14,391 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:05:14,393 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 47G
2023-05-06 18:05:14,395 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 901M
2023-05-06 18:05:14,399 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0017, loss: 0.0031 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.54it/s]

2023-05-06 18:05:34,715 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9164, batch_loss: 0.1243, loss: 0.3486 ||: 100%|##########| 157/157 [00:01<00:00, 148.83it/s]

2023-05-06 18:05:35,777 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:05:35,780 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.916
2023-05-06 18:05:35,786 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   901.129  |       N/A
2023-05-06 18:05:35,787 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.349
2023-05-06 18:05:35,789 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48197.648  |       N/A





2023-05-06 18:05:36,500 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.108207
2023-05-06 18:05:36,501 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:05:36,571 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 48197.6484375,
  "peak_gpu_0_memory_MB": 1033.80859375,
  "training_duration": "0:01:47.716708",
  "epoch": 4,
  "training_accuracy": 0.999575,
  "training_loss": 0.003089545537433878,
  "training_worker_0_memory_MB": 48197.6484375,
  "training_gpu_0_memory_MB": 901.12939453125,
  "validation_accuracy": 0.9164,
  "validation_loss": 0.34864423157659113,
  "best_validation_accuracy": 0.9266,
  "best_validation_loss": 0.19070817422192948
}
2023-05-06 18:05:36,574 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/67/model.tar.gz


[32m[I 2023-05-06 18:05:39,563][0m Trial 67 finished with value: 0.9266 and parameters: {'embedding_dim': 102, 'max_filter_size': 4, 'num_filters': 108}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:05:39,726 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:05:39,729 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:05:39,731 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:05:39,733 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:05:39,735 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:05:39,737 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:05:39,738 - INFO - allennlp.common.params - type = default
2023-05-06 18:05:39,741 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:05:39,743 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:05:39,745 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:05:39,747 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:05:39,748 - INFO - all

loading instances: 40000it [01:39, 400.95it/s]

2023-05-06 18:07:19,568 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:07:19,570 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:07:19,575 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:07:19,578 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:07:19,582 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:07:19,586 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:07:19,588 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:07:19,590 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None





2023-05-06 18:07:19,595 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:07:19,597 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 18:07:19,598 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 18:07:19,601 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:09, 549.07it/s]

2023-05-06 18:07:28,715 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:07:28,717 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:07:28,722 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:07:28,724 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:07:28,726 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:07:28,728 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:07:28,729 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:07:28,732 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:07:28,733 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:07:28,734 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:07:28,735 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:06, 5731.76it/s]


2023-05-06 18:07:35,998 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:07:36,000 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:07:36,003 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:07:36,006 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:07:36,008 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 74
2023-05-06 18:07:36,009 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:07:36,011 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:07:36,014 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:07:36,018 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:07:42,291 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:07:42,293 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:07:42,299 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9038, batch_loss: 0.0653, loss: 0.2521 ||: 100%|##########| 1250/1250 [00:18<00:00, 68.17it/s]

2023-05-06 18:08:00,514 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:08:00,527 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:08:00,528 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:08:00,533 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9226, batch_loss: 0.0448, loss: 0.1967 ||: 100%|##########| 157/157 [00:00<00:00, 181.12it/s]

2023-05-06 18:08:01,387 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:08:01,389 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.904  |     0.923
2023-05-06 18:08:01,392 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   901.197  |       N/A
2023-05-06 18:08:01,394 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.252  |     0.197
2023-05-06 18:08:01,395 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48793.215  |       N/A





2023-05-06 18:08:01,774 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.604823
2023-05-06 18:08:01,778 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:52
2023-05-06 18:08:01,780 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:08:01,783 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 48G
2023-05-06 18:08:01,788 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 637M
2023-05-06 18:08:01,790 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9444, batch_loss: 0.2259, loss: 0.1441 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.11it/s]

2023-05-06 18:08:19,627 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9332, batch_loss: 0.0155, loss: 0.1794 ||: 100%|##########| 157/157 [00:00<00:00, 192.24it/s]

2023-05-06 18:08:20,455 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:08:20,465 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.944  |     0.933
2023-05-06 18:08:20,468 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   636.600  |       N/A
2023-05-06 18:08:20,471 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.144  |     0.179
2023-05-06 18:08:20,474 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48876.992  |       N/A





2023-05-06 18:08:20,856 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.075709
2023-05-06 18:08:20,864 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:33
2023-05-06 18:08:20,866 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:08:20,870 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 48G
2023-05-06 18:08:20,872 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 637M
2023-05-06 18:08:20,874 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9760, batch_loss: 0.0041, loss: 0.0679 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.93it/s]

2023-05-06 18:08:38,504 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0120, loss: 0.2260 ||: 100%|##########| 157/157 [00:00<00:00, 164.83it/s]

2023-05-06 18:08:39,462 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:08:39,464 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.976  |     0.926
2023-05-06 18:08:39,470 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   636.724  |       N/A
2023-05-06 18:08:39,471 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.068  |     0.226
2023-05-06 18:08:39,473 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48877.383  |       N/A





2023-05-06 18:08:39,900 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.034401
2023-05-06 18:08:39,904 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:13
2023-05-06 18:08:39,906 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:08:39,909 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 48G
2023-05-06 18:08:39,912 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 637M
2023-05-06 18:08:39,916 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9940, batch_loss: 0.1048, loss: 0.0202 ||: 100%|##########| 1250/1250 [00:17<00:00, 72.66it/s]

2023-05-06 18:08:57,129 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0047, loss: 0.2976 ||: 100%|##########| 157/157 [00:01<00:00, 135.92it/s]

2023-05-06 18:08:58,294 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:08:58,295 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.994  |     0.925
2023-05-06 18:08:58,297 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   636.540  |       N/A
2023-05-06 18:08:58,298 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.020  |     0.298
2023-05-06 18:08:58,301 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48877.383  |       N/A





2023-05-06 18:08:58,705 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.799313
2023-05-06 18:08:58,709 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:54
2023-05-06 18:08:58,713 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:08:58,714 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 48G
2023-05-06 18:08:58,720 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 637M
2023-05-06 18:08:58,724 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9992, batch_loss: 0.0034, loss: 0.0045 ||: 100%|##########| 1250/1250 [00:17<00:00, 72.65it/s]

2023-05-06 18:09:15,938 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9188, batch_loss: 0.0019, loss: 0.3347 ||: 100%|##########| 157/157 [00:01<00:00, 152.06it/s]

2023-05-06 18:09:16,981 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:09:16,983 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.919
2023-05-06 18:09:16,985 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   636.709  |       N/A
2023-05-06 18:09:16,987 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.005  |     0.335
2023-05-06 18:09:16,989 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  48877.383  |       N/A





2023-05-06 18:09:17,431 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.717891
2023-05-06 18:09:17,434 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:09:17,490 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 48877.3828125,
  "peak_gpu_0_memory_MB": 901.197265625,
  "training_duration": "0:01:34.810268",
  "epoch": 4,
  "training_accuracy": 0.999225,
  "training_loss": 0.004519424875781988,
  "training_worker_0_memory_MB": 48877.3828125,
  "training_gpu_0_memory_MB": 636.70947265625,
  "validation_accuracy": 0.9188,
  "validation_loss": 0.3346707482294292,
  "best_validation_accuracy": 0.9332,
  "best_validation_loss": 0.17940506334327588
}
2023-05-06 18:09:17,493 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/68/model.tar.gz


[32m[I 2023-05-06 18:09:19,643][0m Trial 68 finished with value: 0.9332 and parameters: {'embedding_dim': 74, 'max_filter_size': 5, 'num_filters': 60}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:09:19,772 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:09:19,774 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:09:19,777 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:09:19,780 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:09:19,783 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:09:19,787 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:09:19,788 - INFO - allennlp.common.params - type = default
2023-05-06 18:09:19,793 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:09:19,796 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:09:19,797 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:09:19,799 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:09:19,802 - INFO - all

loading instances: 40000it [01:38, 404.34it/s]

2023-05-06 18:10:58,802 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:10:58,810 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:10:58,815 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:10:58,817 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:10:58,820 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:10:58,823 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:10:58,824 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:10:58,825 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:10:58,829 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:10:58,830 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:15, 327.07it/s]

2023-05-06 18:11:14,128 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:11:14,132 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:11:14,134 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:11:14,136 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:11:14,138 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:11:14,141 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:11:14,143 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:11:14,145 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:11:14,147 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:11:14,149 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:11:14,151 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:05, 7310.50it/s]


2023-05-06 18:11:19,816 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:11:19,819 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:11:19,820 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:11:19,824 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:11:19,825 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 111
2023-05-06 18:11:19,827 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:11:19,829 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:11:19,830 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:11:19,831 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:11:25,284 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:11:25,287 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:11:25,292 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9072, batch_loss: 0.0744, loss: 0.2468 ||: 100%|##########| 1250/1250 [00:21<00:00, 59.27it/s]

2023-05-06 18:11:46,260 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:11:46,272 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:11:46,273 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:11:46,278 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9234, batch_loss: 0.0400, loss: 0.1951 ||: 100%|##########| 157/157 [00:00<00:00, 175.53it/s]

2023-05-06 18:11:47,160 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:11:47,161 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.923
2023-05-06 18:11:47,166 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   636.777  |       N/A
2023-05-06 18:11:47,172 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.195
2023-05-06 18:11:47,174 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  49444.098  |       N/A





2023-05-06 18:11:47,648 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.488446
2023-05-06 18:11:47,658 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:17
2023-05-06 18:11:47,661 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:11:47,663 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 48G
2023-05-06 18:11:47,669 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 770M
2023-05-06 18:11:47,671 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9468, batch_loss: 0.2271, loss: 0.1372 ||: 100%|##########| 1250/1250 [00:21<00:00, 58.52it/s]

2023-05-06 18:12:09,040 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9322, batch_loss: 0.0116, loss: 0.1861 ||: 100%|##########| 157/157 [00:00<00:00, 162.09it/s]

2023-05-06 18:12:10,015 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:12:10,016 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.932
2023-05-06 18:12:10,020 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   770.362  |       N/A
2023-05-06 18:12:10,022 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.137  |     0.186
2023-05-06 18:12:10,023 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  49486.629  |       N/A





2023-05-06 18:12:10,606 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.945080
2023-05-06 18:12:10,610 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:59
2023-05-06 18:12:10,616 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:12:10,621 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 48G
2023-05-06 18:12:10,623 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 770M
2023-05-06 18:12:10,630 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9790, batch_loss: 0.0019, loss: 0.0584 ||: 100%|##########| 1250/1250 [00:21<00:00, 58.74it/s]

2023-05-06 18:12:31,914 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9256, batch_loss: 0.0040, loss: 0.2441 ||: 100%|##########| 157/157 [00:01<00:00, 149.30it/s]

2023-05-06 18:12:32,974 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:12:32,976 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.979  |     0.926
2023-05-06 18:12:32,977 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   770.362  |       N/A
2023-05-06 18:12:32,979 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.058  |     0.244
2023-05-06 18:12:32,981 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  49486.629  |       N/A





2023-05-06 18:12:33,608 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.992284
2023-05-06 18:12:33,610 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:38
2023-05-06 18:12:33,612 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:12:33,617 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 48G
2023-05-06 18:12:33,622 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 770M
2023-05-06 18:12:33,623 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9959, batch_loss: 0.1023, loss: 0.0153 ||: 100%|##########| 1250/1250 [00:20<00:00, 60.76it/s]

2023-05-06 18:12:54,207 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0011, loss: 0.2937 ||: 100%|##########| 157/157 [00:01<00:00, 138.44it/s]

2023-05-06 18:12:55,349 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:12:55,350 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.924
2023-05-06 18:12:55,353 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   770.362  |       N/A
2023-05-06 18:12:55,354 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.015  |     0.294
2023-05-06 18:12:55,356 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  49486.629  |       N/A





2023-05-06 18:12:55,977 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.364959
2023-05-06 18:12:55,980 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:15
2023-05-06 18:12:55,984 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:12:55,986 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 48G
2023-05-06 18:12:55,988 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 770M
2023-05-06 18:12:55,990 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0017, loss: 0.0031 ||: 100%|##########| 1250/1250 [00:20<00:00, 59.86it/s]

2023-05-06 18:13:16,882 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9240, batch_loss: 0.0003, loss: 0.3714 ||: 100%|##########| 157/157 [00:01<00:00, 142.04it/s]

2023-05-06 18:13:17,995 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:13:17,997 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 18:13:17,998 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   770.362  |       N/A
2023-05-06 18:13:18,002 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.371
2023-05-06 18:13:18,003 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  49486.629  |       N/A





2023-05-06 18:13:18,625 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.641238
2023-05-06 18:13:18,631 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:13:18,675 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 49486.62890625,
  "peak_gpu_0_memory_MB": 770.3623046875,
  "training_duration": "0:01:52.834081",
  "epoch": 4,
  "training_accuracy": 0.9996,
  "training_loss": 0.003076088566472754,
  "training_worker_0_memory_MB": 49486.62890625,
  "training_gpu_0_memory_MB": 770.3623046875,
  "validation_accuracy": 0.924,
  "validation_loss": 0.3714158395868815,
  "best_validation_accuracy": 0.9322,
  "best_validation_loss": 0.18614072065539422
}
2023-05-06 18:13:18,677 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/69/model.tar.gz


[32m[I 2023-05-06 18:13:21,518][0m Trial 69 finished with value: 0.9322 and parameters: {'embedding_dim': 111, 'max_filter_size': 5, 'num_filters': 52}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:13:21,651 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:13:21,652 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:13:21,664 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:13:21,665 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:13:21,667 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:13:21,668 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:13:21,669 - INFO - allennlp.common.params - type = default
2023-05-06 18:13:21,672 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:13:21,675 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:13:21,678 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:13:21,679 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:13:21,681 - INFO - all

loading instances: 40000it [01:37, 409.20it/s]

2023-05-06 18:14:59,484 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:14:59,488 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:14:59,489 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:14:59,490 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:14:59,496 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:14:59,498 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:14:59,501 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:14:59,501 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:14:59,507 - INFO - allennlp.common.params - validation_data_loader.start_method = fork





2023-05-06 18:14:59,508 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 18:14:59,509 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 18:14:59,510 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:15, 333.09it/s]

2023-05-06 18:15:14,527 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:15:14,531 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:15:14,532 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:15:14,533 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:15:14,534 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:15:14,535 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:15:14,536 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:15:14,537 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:15:14,538 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:15:14,540 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:15:14,547 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:04, 8999.09it/s]


2023-05-06 18:15:19,179 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:15:19,180 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:15:19,184 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:15:19,186 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:15:19,190 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 124
2023-05-06 18:15:19,195 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:15:19,196 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:15:19,198 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:15:19,200 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:15:26,186 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:15:26,188 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:15:26,194 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9064, batch_loss: 0.0760, loss: 0.2428 ||: 100%|##########| 1250/1250 [00:25<00:00, 48.50it/s]

2023-05-06 18:15:51,820 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:15:51,835 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:15:51,840 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:15:51,845 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9234, batch_loss: 0.0290, loss: 0.1943 ||: 100%|##########| 157/157 [00:01<00:00, 139.77it/s]

2023-05-06 18:15:52,949 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:15:52,951 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.906  |     0.923
2023-05-06 18:15:52,955 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   770.362  |       N/A
2023-05-06 18:15:52,956 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.243  |     0.194
2023-05-06 18:15:52,958 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50122.184  |       N/A





2023-05-06 18:15:53,559 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.523647
2023-05-06 18:15:53,561 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:02
2023-05-06 18:15:53,570 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:15:53,575 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 49G
2023-05-06 18:15:53,580 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 897M
2023-05-06 18:15:53,582 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9487, batch_loss: 0.2092, loss: 0.1322 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.53it/s]

2023-05-06 18:16:18,824 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9320, batch_loss: 0.0147, loss: 0.1816 ||: 100%|##########| 157/157 [00:01<00:00, 142.70it/s]

2023-05-06 18:16:19,930 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:16:19,931 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.932
2023-05-06 18:16:19,933 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   897.469  |       N/A
2023-05-06 18:16:19,937 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.132  |     0.182
2023-05-06 18:16:19,939 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50169.160  |       N/A





2023-05-06 18:16:20,535 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.965821
2023-05-06 18:16:20,537 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:35
2023-05-06 18:16:20,548 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:16:20,550 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 49G
2023-05-06 18:16:20,553 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 898M
2023-05-06 18:16:20,554 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9821, batch_loss: 0.0028, loss: 0.0508 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.40it/s]

2023-05-06 18:16:45,867 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0028, loss: 0.2420 ||: 100%|##########| 157/157 [00:01<00:00, 104.25it/s]

2023-05-06 18:16:47,385 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:16:47,386 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.982  |     0.926
2023-05-06 18:16:47,388 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   897.593  |       N/A
2023-05-06 18:16:47,391 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.051  |     0.242
2023-05-06 18:16:47,394 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50169.160  |       N/A





2023-05-06 18:16:48,235 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.687113
2023-05-06 18:16:48,238 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:09
2023-05-06 18:16:48,240 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:16:48,244 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 49G
2023-05-06 18:16:48,246 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 897M
2023-05-06 18:16:48,248 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9971, batch_loss: 0.0155, loss: 0.0109 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.47it/s]

2023-05-06 18:17:13,522 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9242, batch_loss: 0.0012, loss: 0.3015 ||: 100%|##########| 157/157 [00:01<00:00, 145.61it/s]

2023-05-06 18:17:14,607 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:17:14,608 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.924
2023-05-06 18:17:14,610 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   897.409  |       N/A
2023-05-06 18:17:14,612 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.302
2023-05-06 18:17:14,614 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50169.160  |       N/A





2023-05-06 18:17:15,222 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.982441
2023-05-06 18:17:15,224 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:42
2023-05-06 18:17:15,228 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:17:15,230 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 49G
2023-05-06 18:17:15,232 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 898M
2023-05-06 18:17:15,234 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0005, loss: 0.0020 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.53it/s]

2023-05-06 18:17:40,475 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9242, batch_loss: 0.0010, loss: 0.3676 ||: 100%|##########| 157/157 [00:01<00:00, 146.54it/s]

2023-05-06 18:17:41,554 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:17:41,556 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 18:17:41,557 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   897.579  |       N/A
2023-05-06 18:17:41,558 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.368
2023-05-06 18:17:41,559 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50169.160  |       N/A





2023-05-06 18:17:42,209 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.980562
2023-05-06 18:17:42,211 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:17:42,274 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 50169.16015625,
  "peak_gpu_0_memory_MB": 897.59326171875,
  "training_duration": "0:02:15.518211",
  "epoch": 4,
  "training_accuracy": 0.9998,
  "training_loss": 0.0020047564954846168,
  "training_worker_0_memory_MB": 50169.16015625,
  "training_gpu_0_memory_MB": 897.57861328125,
  "validation_accuracy": 0.9242,
  "validation_loss": 0.36762831912221755,
  "best_validation_accuracy": 0.932,
  "best_validation_loss": 0.18159138572633646
}
2023-05-06 18:17:42,276 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/70/model.tar.gz


[32m[I 2023-05-06 18:17:45,734][0m Trial 70 finished with value: 0.932 and parameters: {'embedding_dim': 124, 'max_filter_size': 5, 'num_filters': 67}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:17:45,935 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:17:45,940 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:17:45,944 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:17:45,945 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:17:45,948 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:17:45,953 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:17:45,955 - INFO - allennlp.common.params - type = default
2023-05-06 18:17:45,960 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:17:45,962 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:17:45,963 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:17:45,964 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:17:45,965 - INFO - all

loading instances: 40000it [01:38, 407.67it/s]

2023-05-06 18:19:24,135 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:19:24,137 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32





2023-05-06 18:19:24,143 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:19:24,145 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:19:24,146 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:19:24,147 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:19:24,148 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:19:24,149 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:19:24,150 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:19:24,151 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 18:19:24,154 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 18:19:24,155 - INFO - allennlp.common.params - validation_data_loader.collate_fn

loading instances: 5000it [00:12, 409.85it/s]

2023-05-06 18:19:36,361 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:19:36,363 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:19:36,365 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:19:36,369 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:19:36,370 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:19:36,371 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:19:36,372 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:19:36,373 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:19:36,374 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:19:36,375 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:19:36,376 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:04, 9044.38it/s]


2023-05-06 18:19:40,982 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:19:40,984 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:19:40,989 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:19:40,994 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:19:40,996 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 132
2023-05-06 18:19:40,998 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:19:41,000 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:19:41,001 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:19:41,002 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:19:48,166 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:19:48,169 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:19:48,174 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9082, batch_loss: 0.0595, loss: 0.2417 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.47it/s]

2023-05-06 18:20:17,450 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:20:17,465 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:20:17,467 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:20:17,472 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9224, batch_loss: 0.0326, loss: 0.1976 ||: 100%|##########| 157/157 [00:01<00:00, 129.66it/s]

2023-05-06 18:20:18,670 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:20:18,672 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.922
2023-05-06 18:20:18,676 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   897.646  |       N/A
2023-05-06 18:20:18,678 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.242  |     0.198
2023-05-06 18:20:18,679 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50734.773  |       N/A





2023-05-06 18:20:19,343 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.333654
2023-05-06 18:20:19,345 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:35
2023-05-06 18:20:19,347 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:20:19,349 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 50G
2023-05-06 18:20:19,351 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:20:19,352 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9506, batch_loss: 0.1328, loss: 0.1310 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.45it/s]

2023-05-06 18:20:48,809 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9302, batch_loss: 0.0074, loss: 0.1813 ||: 100%|##########| 157/157 [00:01<00:00, 110.73it/s]

2023-05-06 18:20:50,242 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:20:50,244 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.930
2023-05-06 18:20:50,246 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1035.891  |       N/A
2023-05-06 18:20:50,247 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.131  |     0.181
2023-05-06 18:20:50,250 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50784.688  |       N/A





2023-05-06 18:20:50,997 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.650075
2023-05-06 18:20:50,999 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:08
2023-05-06 18:20:51,002 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:20:51,003 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 50G
2023-05-06 18:20:51,010 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:20:51,012 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9837, batch_loss: 0.0012, loss: 0.0491 ||: 100%|##########| 1250/1250 [00:29<00:00, 43.00it/s]

2023-05-06 18:21:20,086 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9252, batch_loss: 0.0021, loss: 0.2715 ||: 100%|##########| 157/157 [00:01<00:00, 132.49it/s]

2023-05-06 18:21:21,279 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:21:21,281 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.925
2023-05-06 18:21:21,282 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1036.015  |       N/A
2023-05-06 18:21:21,285 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.049  |     0.272





2023-05-06 18:21:21,289 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50784.688  |       N/A
2023-05-06 18:21:22,061 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.059476
2023-05-06 18:21:22,063 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:37
2023-05-06 18:21:22,065 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:21:22,068 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 50G
2023-05-06 18:21:22,069 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:21:22,076 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.0787, loss: 0.0106 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.24it/s]

2023-05-06 18:21:51,675 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9242, batch_loss: 0.0005, loss: 0.2907 ||: 100%|##########| 157/157 [00:01<00:00, 134.77it/s]

2023-05-06 18:21:52,848 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:21:52,851 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.924
2023-05-06 18:21:52,853 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1035.831  |       N/A
2023-05-06 18:21:52,854 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.291
2023-05-06 18:21:52,855 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50784.688  |       N/A





2023-05-06 18:21:53,562 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.496327
2023-05-06 18:21:53,569 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:07
2023-05-06 18:21:53,575 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:21:53,576 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 50G
2023-05-06 18:21:53,579 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:21:53,581 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0011, loss: 0.0020 ||: 100%|##########| 1250/1250 [00:29<00:00, 43.07it/s]

2023-05-06 18:22:22,612 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9256, batch_loss: 0.0001, loss: 0.3525 ||: 100%|##########| 157/157 [00:01<00:00, 120.42it/s]

2023-05-06 18:22:23,924 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 18:22:23,928 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.926
2023-05-06 18:22:23,930 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1036.000  |       N/A
2023-05-06 18:22:23,932 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.352
2023-05-06 18:22:23,935 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  50784.688  |       N/A
2023-05-06 18:22:24,695 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.120215
2023-05-06 18:22:24,700 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:22:24,766 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 50784.6875,
  "peak_gpu_0_memory_MB": 1036.01513671875,
  "training_duration": "0:02:35.914061",
  "epoch": 4,
  "training_accuracy": 0.999625,
  "training_loss": 0.0020

[32m[I 2023-05-06 18:22:28,402][0m Trial 71 finished with value: 0.9302 and parameters: {'embedding_dim': 132, 'max_filter_size': 5, 'num_filters': 87}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:22:28,555 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:22:28,558 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:22:28,563 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:22:28,564 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:22:28,566 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:22:28,568 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:22:28,569 - INFO - allennlp.common.params - type = default
2023-05-06 18:22:28,571 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:22:28,572 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:22:28,574 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:22:28,575 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:22:28,577 - INFO - all

loading instances: 40000it [01:41, 392.48it/s]

2023-05-06 18:24:10,533 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:24:10,536 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:24:10,537 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:24:10,539 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:24:10,541 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:24:10,542 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:24:10,543 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:24:10,544 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:24:10,545 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:24:10,546 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:10, 483.65it/s]

2023-05-06 18:24:20,892 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:24:20,896 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:24:20,898 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:24:20,903 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:24:20,905 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:24:20,907 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:24:20,908 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:24:20,909 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:24:20,913 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:24:20,914 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:24:20,915 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:06, 6266.35it/s]


2023-05-06 18:24:27,494 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:24:27,496 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:24:27,499 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:24:27,502 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:24:27,505 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 98
2023-05-06 18:24:27,506 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:24:27,509 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:24:27,511 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:24:27,514 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:24:32,853 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:24:32,855 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:24:32,860 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9082, batch_loss: 0.0955, loss: 0.2466 ||: 100%|##########| 1250/1250 [00:18<00:00, 66.14it/s]

2023-05-06 18:24:51,635 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:24:51,647 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:24:51,649 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:24:51,653 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9230, batch_loss: 0.0444, loss: 0.1947 ||: 100%|##########| 157/157 [00:00<00:00, 191.09it/s]

2023-05-06 18:24:52,464 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:24:52,467 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.923





2023-05-06 18:24:52,469 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1036.068  |       N/A
2023-05-06 18:24:52,470 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.195
2023-05-06 18:24:52,471 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  51336.648  |       N/A
2023-05-06 18:24:52,963 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.235313
2023-05-06 18:24:52,976 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:57
2023-05-06 18:24:52,978 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:24:52,983 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 50G
2023-05-06 18:24:52,985 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 668M
2023-05-06 18:24:52,986 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9479, batch_loss: 0.2430, loss: 0.1355 ||: 100%|##########| 1250/1250 [00:19<00:00, 65.74it/s]

2023-05-06 18:25:12,008 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9314, batch_loss: 0.0125, loss: 0.1827 ||: 100%|##########| 157/157 [00:00<00:00, 199.44it/s]

2023-05-06 18:25:12,802 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:25:12,803 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.948  |     0.931
2023-05-06 18:25:12,805 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   668.201  |       N/A
2023-05-06 18:25:12,810 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.136  |     0.183
2023-05-06 18:25:12,811 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  51373.898  |       N/A





2023-05-06 18:25:13,256 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.278044
2023-05-06 18:25:13,260 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:40
2023-05-06 18:25:13,262 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:25:13,263 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 50G
2023-05-06 18:25:13,265 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 668M
2023-05-06 18:25:13,267 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9799, batch_loss: 0.0010, loss: 0.0578 ||: 100%|##########| 1250/1250 [00:19<00:00, 65.24it/s]

2023-05-06 18:25:32,436 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9248, batch_loss: 0.0096, loss: 0.2564 ||: 100%|##########| 157/157 [00:00<00:00, 197.77it/s]

2023-05-06 18:25:33,237 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:25:33,239 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.980  |     0.925
2023-05-06 18:25:33,244 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   668.201  |       N/A
2023-05-06 18:25:33,245 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.058  |     0.256
2023-05-06 18:25:33,247 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  51373.898  |       N/A





2023-05-06 18:25:33,754 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.492695
2023-05-06 18:25:33,756 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:21
2023-05-06 18:25:33,762 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:25:33,766 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 50G
2023-05-06 18:25:33,770 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 668M
2023-05-06 18:25:33,774 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9958, batch_loss: 0.0773, loss: 0.0157 ||: 100%|##########| 1250/1250 [00:19<00:00, 65.57it/s]

2023-05-06 18:25:52,849 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9242, batch_loss: 0.0017, loss: 0.2762 ||: 100%|##########| 157/157 [00:00<00:00, 191.08it/s]

2023-05-06 18:25:53,679 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:25:53,681 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.924
2023-05-06 18:25:53,686 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   668.201  |       N/A
2023-05-06 18:25:53,687 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.016  |     0.276
2023-05-06 18:25:53,689 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  51373.898  |       N/A





2023-05-06 18:25:54,182 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.419593
2023-05-06 18:25:54,184 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:01
2023-05-06 18:25:54,187 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:25:54,190 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 50G
2023-05-06 18:25:54,192 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 668M
2023-05-06 18:25:54,194 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0014, loss: 0.0028 ||: 100%|##########| 1250/1250 [00:19<00:00, 65.67it/s]

2023-05-06 18:26:13,235 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9226, batch_loss: 0.0001, loss: 0.3188 ||: 100%|##########| 157/157 [00:00<00:00, 195.64it/s]

2023-05-06 18:26:14,044 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:26:14,045 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.923
2023-05-06 18:26:14,050 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   668.201  |       N/A
2023-05-06 18:26:14,052 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.319
2023-05-06 18:26:14,054 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  51373.898  |       N/A





2023-05-06 18:26:14,553 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:20.365628
2023-05-06 18:26:14,554 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:26:14,606 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 51373.8984375,
  "peak_gpu_0_memory_MB": 1036.068359375,
  "training_duration": "0:01:41.316306",
  "epoch": 4,
  "training_accuracy": 0.9997,
  "training_loss": 0.0028037595624278767,
  "training_worker_0_memory_MB": 51373.8984375,
  "training_gpu_0_memory_MB": 668.201171875,
  "validation_accuracy": 0.9226,
  "validation_loss": 0.31880222962048443,
  "best_validation_accuracy": 0.9314,
  "best_validation_loss": 0.18268614497249294
}
2023-05-06 18:26:14,608 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/72/model.tar.gz


[32m[I 2023-05-06 18:26:16,643][0m Trial 72 finished with value: 0.9314 and parameters: {'embedding_dim': 98, 'max_filter_size': 5, 'num_filters': 42}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:26:16,809 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:26:16,812 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:26:16,814 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:26:16,816 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:26:16,818 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:26:16,819 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:26:16,822 - INFO - allennlp.common.params - type = default
2023-05-06 18:26:16,823 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:26:16,826 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:26:16,827 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:26:16,829 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:26:16,833 - INFO - all

loading instances: 40000it [01:37, 411.04it/s]

2023-05-06 18:27:54,224 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:27:54,226 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:27:54,231 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:27:54,232 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:27:54,233 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:27:54,234 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:27:54,235 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:27:54,236 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:27:54,239 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:27:54,240 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:11, 426.79it/s]

2023-05-06 18:28:05,965 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:28:05,968 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:28:05,970 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:28:05,971 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:28:05,977 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:28:05,979 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:28:05,981 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:28:05,982 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:28:05,983 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:28:05,984 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:28:05,985 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:04, 8725.62it/s]


2023-05-06 18:28:10,769 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:28:10,770 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:28:10,774 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:28:10,776 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:28:10,778 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 122
2023-05-06 18:28:10,780 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:28:10,782 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:28:10,784 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:28:10,784 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:28:21,594 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:28:21,596 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:28:21,603 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9113, batch_loss: 0.0743, loss: 0.2389 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.33it/s]

2023-05-06 18:28:50,310 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:28:50,331 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:28:50,334 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:28:50,341 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9218, batch_loss: 0.0226, loss: 0.1955 ||: 100%|##########| 157/157 [00:01<00:00, 117.38it/s]

2023-05-06 18:28:51,659 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:28:51,664 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.922
2023-05-06 18:28:51,665 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   668.201  |       N/A
2023-05-06 18:28:51,667 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.239  |     0.196
2023-05-06 18:28:51,669 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  51973.035  |       N/A





2023-05-06 18:28:52,337 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.883529
2023-05-06 18:28:52,339 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:31
2023-05-06 18:28:52,343 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:28:52,345 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 51G
2023-05-06 18:28:52,350 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 18:28:52,353 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9516, batch_loss: 0.2240, loss: 0.1265 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.38it/s]

2023-05-06 18:29:21,852 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9314, batch_loss: 0.0105, loss: 0.1898 ||: 100%|##########| 157/157 [00:01<00:00, 105.74it/s]

2023-05-06 18:29:23,346 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:29:23,347 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.952  |     0.931
2023-05-06 18:29:23,354 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1165.050  |       N/A
2023-05-06 18:29:23,355 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.126  |     0.190
2023-05-06 18:29:23,356 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  52019.590  |       N/A





2023-05-06 18:29:23,975 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.632511
2023-05-06 18:29:23,989 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:07
2023-05-06 18:29:23,991 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:29:23,997 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 51G
2023-05-06 18:29:24,003 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 18:29:24,005 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9848, batch_loss: 0.0018, loss: 0.0443 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.40it/s]

2023-05-06 18:29:52,816 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9274, batch_loss: 0.0003, loss: 0.2580 ||: 100%|##########| 157/157 [00:01<00:00, 113.40it/s]

2023-05-06 18:29:54,208 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:29:54,210 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.927
2023-05-06 18:29:54,212 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1165.175  |       N/A
2023-05-06 18:29:54,214 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.044  |     0.258
2023-05-06 18:29:54,216 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  52019.590  |       N/A





2023-05-06 18:29:55,092 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.100477
2023-05-06 18:29:55,093 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:36
2023-05-06 18:29:55,097 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:29:55,098 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 51G
2023-05-06 18:29:55,100 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 18:29:55,101 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9975, batch_loss: 0.0474, loss: 0.0091 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.52it/s]

2023-05-06 18:30:24,505 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9212, batch_loss: 0.0002, loss: 0.3252 ||: 100%|##########| 157/157 [00:01<00:00, 123.45it/s]

2023-05-06 18:30:25,784 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:30:25,789 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.921
2023-05-06 18:30:25,793 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1164.991  |       N/A
2023-05-06 18:30:25,797 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.009  |     0.325





2023-05-06 18:30:25,798 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  52019.590  |       N/A
2023-05-06 18:30:26,418 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.321805
2023-05-06 18:30:26,421 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:06
2023-05-06 18:30:26,423 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:30:26,425 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 51G
2023-05-06 18:30:26,427 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 18:30:26,429 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0058, loss: 0.0013 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.63it/s]

2023-05-06 18:30:55,082 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9192, batch_loss: 0.0000, loss: 0.3567 ||: 100%|##########| 157/157 [00:01<00:00, 115.12it/s]

2023-05-06 18:30:56,453 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:30:56,455 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.919
2023-05-06 18:30:56,457 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1165.160  |       N/A
2023-05-06 18:30:56,461 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.357
2023-05-06 18:30:56,462 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  52019.590  |       N/A





2023-05-06 18:30:57,165 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.742323
2023-05-06 18:30:57,167 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:30:57,228 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 52019.58984375,
  "peak_gpu_0_memory_MB": 1165.1748046875,
  "training_duration": "0:02:34.998203",
  "epoch": 4,
  "training_accuracy": 0.999875,
  "training_loss": 0.0013446044291111322,
  "training_worker_0_memory_MB": 52019.58984375,
  "training_gpu_0_memory_MB": 1165.16015625,
  "validation_accuracy": 0.9192,
  "validation_loss": 0.35672905833663515,
  "best_validation_accuracy": 0.9314,
  "best_validation_loss": 0.18978793323514567
}
2023-05-06 18:30:57,230 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/73/model.tar.gz


[32m[I 2023-05-06 18:31:00,760][0m Trial 73 finished with value: 0.9314 and parameters: {'embedding_dim': 122, 'max_filter_size': 5, 'num_filters': 122}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:31:00,968 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:31:00,972 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:31:00,975 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:31:00,979 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:31:00,980 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:31:00,985 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:31:00,987 - INFO - allennlp.common.params - type = default
2023-05-06 18:31:00,988 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:31:00,990 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:31:00,994 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:31:00,995 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:31:00,996 - INFO - all

loading instances: 40000it [01:36, 414.39it/s]

2023-05-06 18:32:37,571 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:32:37,574 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:32:37,576 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:32:37,578 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:32:37,579 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:32:37,581 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:32:37,582 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:32:37,583 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:32:37,584 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:32:37,585 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:15, 317.03it/s]

2023-05-06 18:32:53,362 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:32:53,366 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:32:53,368 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:32:53,370 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:32:53,372 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:32:53,375 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:32:53,377 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:32:53,380 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:32:53,382 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:32:53,383 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:32:53,386 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:05, 6971.00it/s]


2023-05-06 18:32:59,400 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:32:59,404 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:32:59,406 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:32:59,407 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:32:59,409 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 114
2023-05-06 18:32:59,412 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:32:59,413 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:32:59,414 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:32:59,416 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:33:06,959 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:33:06,964 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:33:06,971 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9090, batch_loss: 0.0618, loss: 0.2433 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.80it/s]

2023-05-06 18:33:33,545 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:33:33,561 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:33:33,562 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:33:33,567 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9210, batch_loss: 0.0612, loss: 0.2026 ||: 100%|##########| 157/157 [00:01<00:00, 126.83it/s]

2023-05-06 18:33:34,790 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:33:34,794 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.921
2023-05-06 18:33:34,798 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1165.228  |       N/A
2023-05-06 18:33:34,799 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.243  |     0.203
2023-05-06 18:33:34,802 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  52614.559  |       N/A





2023-05-06 18:33:35,476 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.651232
2023-05-06 18:33:35,480 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:11
2023-05-06 18:33:35,482 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:33:35,486 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 51G
2023-05-06 18:33:35,488 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:33:35,489 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9493, batch_loss: 0.2306, loss: 0.1311 ||: 100%|##########| 1250/1250 [00:27<00:00, 45.16it/s]

2023-05-06 18:34:03,180 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9340, batch_loss: 0.0066, loss: 0.1870 ||: 100%|##########| 157/157 [00:01<00:00, 112.87it/s]

2023-05-06 18:34:04,579 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:34:04,581 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.934
2023-05-06 18:34:04,583 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1053.125  |       N/A
2023-05-06 18:34:04,585 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.131  |     0.187
2023-05-06 18:34:04,586 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  52657.930  |       N/A





2023-05-06 18:34:05,167 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.684740
2023-05-06 18:34:05,168 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:51
2023-05-06 18:34:05,172 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:34:05,178 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 51G
2023-05-06 18:34:05,182 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:34:05,185 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9841, batch_loss: 0.0013, loss: 0.0462 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.61it/s]

2023-05-06 18:34:32,014 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0016, loss: 0.2760 ||: 100%|##########| 157/157 [00:01<00:00, 127.96it/s]

2023-05-06 18:34:33,248 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:34:33,250 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.926
2023-05-06 18:34:33,251 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1053.250  |       N/A
2023-05-06 18:34:33,252 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.046  |     0.276
2023-05-06 18:34:33,253 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  52657.930  |       N/A





2023-05-06 18:34:33,860 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.687132
2023-05-06 18:34:33,861 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:21
2023-05-06 18:34:33,864 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:34:33,867 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 51G
2023-05-06 18:34:33,869 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:34:33,871 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9974, batch_loss: 0.0207, loss: 0.0101 ||: 100%|##########| 1250/1250 [00:27<00:00, 45.69it/s]

2023-05-06 18:35:01,238 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9242, batch_loss: 0.0002, loss: 0.3102 ||: 100%|##########| 157/157 [00:01<00:00, 96.21it/s]

2023-05-06 18:35:02,877 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:35:02,878 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.924
2023-05-06 18:35:02,880 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1053.066  |       N/A
2023-05-06 18:35:02,884 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.310
2023-05-06 18:35:02,885 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  52657.930  |       N/A





2023-05-06 18:35:03,656 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.792586
2023-05-06 18:35:03,660 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:54
2023-05-06 18:35:03,661 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:35:03,663 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 51G
2023-05-06 18:35:03,667 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:35:03,669 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0019, loss: 0.0014 ||: 100%|##########| 1250/1250 [00:27<00:00, 45.75it/s]

2023-05-06 18:35:30,997 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9242, batch_loss: 0.0001, loss: 0.3570 ||: 100%|##########| 157/157 [00:01<00:00, 130.60it/s]

2023-05-06 18:35:32,210 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:35:32,215 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 18:35:32,217 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1053.235  |       N/A
2023-05-06 18:35:32,218 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.357
2023-05-06 18:35:32,219 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  52657.930  |       N/A





2023-05-06 18:35:32,851 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.190199
2023-05-06 18:35:32,853 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:35:32,912 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 52657.9296875,
  "peak_gpu_0_memory_MB": 1165.22802734375,
  "training_duration": "0:02:25.385328",
  "epoch": 4,
  "training_accuracy": 0.9999,
  "training_loss": 0.0014286137114700977,
  "training_worker_0_memory_MB": 52657.9296875,
  "training_gpu_0_memory_MB": 1053.2353515625,
  "validation_accuracy": 0.9242,
  "validation_loss": 0.35698468265281974,
  "best_validation_accuracy": 0.934,
  "best_validation_loss": 0.18695884044311797
}
2023-05-06 18:35:32,914 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/74/model.tar.gz


[32m[I 2023-05-06 18:35:35,418][0m Trial 74 finished with value: 0.934 and parameters: {'embedding_dim': 114, 'max_filter_size': 5, 'num_filters': 107}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:35:35,553 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:35:35,556 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:35:35,559 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:35:35,566 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:35:35,567 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:35:35,570 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:35:35,571 - INFO - allennlp.common.params - type = default
2023-05-06 18:35:35,572 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:35:35,574 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:35:35,575 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:35:35,576 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:35:35,577 - INFO - all

loading instances: 40000it [01:34, 423.11it/s]

2023-05-06 18:37:12,393 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:37:12,396 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:37:12,397 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:37:12,398 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:37:12,399 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:37:12,400 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:37:12,401 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:37:12,403 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:37:12,406 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:37:12,407 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:14, 350.47it/s]

2023-05-06 18:37:26,685 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:37:26,688 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:37:26,689 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:37:26,690 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:37:26,692 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:37:26,692 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:37:26,693 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:37:26,697 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:37:26,700 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:37:26,701 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:37:26,703 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:04, 8830.74it/s]


2023-05-06 18:37:31,479 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:37:31,481 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:37:31,486 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:37:31,488 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:37:31,490 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 117
2023-05-06 18:37:31,493 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:37:31,497 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:37:31,498 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:37:31,499 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:37:39,528 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:37:39,531 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:37:39,538 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9093, batch_loss: 0.0707, loss: 0.2421 ||: 100%|##########| 1250/1250 [00:27<00:00, 46.21it/s]

2023-05-06 18:38:06,432 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:38:06,452 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:38:06,455 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:38:06,461 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9220, batch_loss: 0.0614, loss: 0.1969 ||: 100%|##########| 157/157 [00:01<00:00, 128.48it/s]

2023-05-06 18:38:07,665 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:38:07,673 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.922
2023-05-06 18:38:07,677 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1053.303  |       N/A
2023-05-06 18:38:07,680 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.242  |     0.197
2023-05-06 18:38:07,682 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53240.281  |       N/A





2023-05-06 18:38:08,274 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.901823
2023-05-06 18:38:08,275 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:14
2023-05-06 18:38:08,280 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:38:08,287 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 52G
2023-05-06 18:38:08,291 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:38:08,295 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9490, batch_loss: 0.2690, loss: 0.1338 ||: 100%|##########| 1250/1250 [00:27<00:00, 45.86it/s]

2023-05-06 18:38:35,557 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9310, batch_loss: 0.0197, loss: 0.1847 ||: 100%|##########| 157/157 [00:01<00:00, 109.92it/s]

2023-05-06 18:38:36,999 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:38:37,000 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.931
2023-05-06 18:38:37,002 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1051.712  |       N/A
2023-05-06 18:38:37,004 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.134  |     0.185
2023-05-06 18:38:37,005 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53285.086  |       N/A





2023-05-06 18:38:37,834 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.553280
2023-05-06 18:38:37,839 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:50
2023-05-06 18:38:37,841 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:38:37,842 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 52G
2023-05-06 18:38:37,844 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:38:37,846 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9825, batch_loss: 0.0035, loss: 0.0512 ||: 100%|##########| 1250/1250 [00:27<00:00, 46.26it/s]

2023-05-06 18:39:04,875 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0051, loss: 0.2434 ||: 100%|##########| 157/157 [00:01<00:00, 134.66it/s]

2023-05-06 18:39:06,048 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:39:06,049 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.982  |     0.926
2023-05-06 18:39:06,052 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1051.836  |       N/A





2023-05-06 18:39:06,058 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.051  |     0.243
2023-05-06 18:39:06,059 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53285.086  |       N/A
2023-05-06 18:39:06,682 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.841247
2023-05-06 18:39:06,683 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:22
2023-05-06 18:39:06,687 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:39:06,690 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 52G
2023-05-06 18:39:06,692 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:39:06,694 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9968, batch_loss: 0.1207, loss: 0.0114 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.57it/s]

2023-05-06 18:39:33,543 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0003, loss: 0.3266 ||: 100%|##########| 157/157 [00:01<00:00, 108.88it/s]

2023-05-06 18:39:34,992 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:39:34,994 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.926
2023-05-06 18:39:34,995 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1051.652  |       N/A
2023-05-06 18:39:34,997 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.327
2023-05-06 18:39:35,002 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53285.086  |       N/A





2023-05-06 18:39:35,753 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.066126
2023-05-06 18:39:35,755 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:53
2023-05-06 18:39:35,757 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:39:35,760 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 52G
2023-05-06 18:39:35,762 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 18:39:35,765 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0002, loss: 0.0021 ||: 100%|##########| 1250/1250 [00:27<00:00, 45.34it/s]

2023-05-06 18:40:03,342 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9282, batch_loss: 0.0001, loss: 0.4047 ||: 100%|##########| 157/157 [00:01<00:00, 132.29it/s]

2023-05-06 18:40:04,545 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:40:04,547 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.928
2023-05-06 18:40:04,548 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1051.822  |       N/A
2023-05-06 18:40:04,549 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.405
2023-05-06 18:40:04,552 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53285.086  |       N/A





2023-05-06 18:40:05,185 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.428100
2023-05-06 18:40:05,186 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:40:05,252 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 53285.0859375,
  "peak_gpu_0_memory_MB": 1053.30322265625,
  "training_duration": "0:02:25.172466",
  "epoch": 4,
  "training_accuracy": 0.99975,
  "training_loss": 0.00208754541665694,
  "training_worker_0_memory_MB": 53285.0859375,
  "training_gpu_0_memory_MB": 1051.82177734375,
  "validation_accuracy": 0.9282,
  "validation_loss": 0.4047338245970197,
  "best_validation_accuracy": 0.931,
  "best_validation_loss": 0.18465296564635578
}
2023-05-06 18:40:05,256 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/75/model.tar.gz


[32m[I 2023-05-06 18:40:07,759][0m Trial 75 finished with value: 0.931 and parameters: {'embedding_dim': 117, 'max_filter_size': 5, 'num_filters': 104}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:40:07,891 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:40:07,892 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:40:07,899 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:40:07,903 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:40:07,906 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:40:07,909 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:40:07,913 - INFO - allennlp.common.params - type = default
2023-05-06 18:40:07,914 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:40:07,917 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:40:07,919 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:40:07,922 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:40:07,926 - INFO - all

loading instances: 40000it [01:36, 416.27it/s]

2023-05-06 18:41:46,037 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:41:46,039 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:41:46,041 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:41:46,048 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:41:46,049 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:41:46,052 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:41:46,054 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:41:46,055 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:41:46,056 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:41:46,059 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:15, 331.66it/s]


2023-05-06 18:42:01,144 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:42:01,149 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:42:01,151 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:42:01,152 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:42:01,154 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:42:01,155 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:42:01,156 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:42:01,157 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:42:01,158 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:42:01,159 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:42:01,160 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18

building vocab: 40000it [00:04, 8854.58it/s]


2023-05-06 18:42:05,889 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:42:05,891 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:42:05,898 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:42:05,899 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:42:05,902 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 128
2023-05-06 18:42:05,905 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:42:05,908 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:42:05,910 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:42:05,913 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:42:13,168 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:42:13,170 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:42:13,176 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9094, batch_loss: 0.0823, loss: 0.2402 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.97it/s]

2023-05-06 18:42:42,104 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:42:42,124 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:42:42,127 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:42:42,135 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9204, batch_loss: 0.0343, loss: 0.2018 ||: 100%|##########| 157/157 [00:01<00:00, 116.60it/s]

2023-05-06 18:42:43,461 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:42:43,467 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.920
2023-05-06 18:42:43,474 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1051.890  |       N/A
2023-05-06 18:42:43,475 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.240  |     0.202
2023-05-06 18:42:43,477 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53858.242  |       N/A





2023-05-06 18:42:44,153 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.150186
2023-05-06 18:42:44,155 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:34
2023-05-06 18:42:44,157 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:42:44,160 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 53G
2023-05-06 18:42:44,163 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 18:42:44,164 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9512, batch_loss: 0.2186, loss: 0.1308 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.28it/s]

2023-05-06 18:43:13,732 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9308, batch_loss: 0.0117, loss: 0.1848 ||: 100%|##########| 157/157 [00:01<00:00, 103.47it/s]

2023-05-06 18:43:15,256 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:43:15,258 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.931
2023-05-06 18:43:15,259 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1156.647  |       N/A
2023-05-06 18:43:15,260 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.131  |     0.185
2023-05-06 18:43:15,261 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53906.914  |       N/A





2023-05-06 18:43:16,055 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.897870
2023-05-06 18:43:16,059 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:09
2023-05-06 18:43:16,062 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:43:16,064 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 53G
2023-05-06 18:43:16,067 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 18:43:16,071 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9843, batch_loss: 0.0010, loss: 0.0461 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.46it/s]

2023-05-06 18:43:44,841 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0018, loss: 0.2416 ||: 100%|##########| 157/157 [00:01<00:00, 125.76it/s]

2023-05-06 18:43:46,097 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:43:46,099 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.926
2023-05-06 18:43:46,102 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1156.772  |       N/A
2023-05-06 18:43:46,106 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.046  |     0.242
2023-05-06 18:43:46,109 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53906.914  |       N/A





2023-05-06 18:43:46,739 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.675992
2023-05-06 18:43:46,741 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:37
2023-05-06 18:43:46,743 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:43:46,748 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 53G
2023-05-06 18:43:46,750 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 18:43:46,752 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9979, batch_loss: 0.0832, loss: 0.0089 ||: 100%|##########| 1250/1250 [00:29<00:00, 43.09it/s]

2023-05-06 18:44:15,774 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9278, batch_loss: 0.0024, loss: 0.3171 ||: 100%|##########| 157/157 [00:01<00:00, 101.59it/s]

2023-05-06 18:44:17,328 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:44:17,329 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.928
2023-05-06 18:44:17,330 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1156.588  |       N/A
2023-05-06 18:44:17,332 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.009  |     0.317
2023-05-06 18:44:17,335 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53906.914  |       N/A





2023-05-06 18:44:18,062 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.318972
2023-05-06 18:44:18,063 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:06
2023-05-06 18:44:18,068 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:44:18,070 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 53G
2023-05-06 18:44:18,074 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 18:44:18,078 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0004, loss: 0.0014 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.83it/s]

2023-05-06 18:44:46,603 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9276, batch_loss: 0.0004, loss: 0.3739 ||: 100%|##########| 157/157 [00:01<00:00, 117.57it/s]

2023-05-06 18:44:47,946 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:44:47,947 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.928
2023-05-06 18:44:47,949 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1156.757  |       N/A
2023-05-06 18:44:47,951 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.374
2023-05-06 18:44:47,953 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  53906.914  |       N/A





2023-05-06 18:44:48,736 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.668098
2023-05-06 18:44:48,741 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:44:48,844 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 53906.9140625,
  "peak_gpu_0_memory_MB": 1156.77197265625,
  "training_duration": "0:02:34.942469",
  "epoch": 4,
  "training_accuracy": 0.999775,
  "training_loss": 0.0014367872848688422,
  "training_worker_0_memory_MB": 53906.9140625,
  "training_gpu_0_memory_MB": 1156.75732421875,
  "validation_accuracy": 0.9276,
  "validation_loss": 0.37386421661692,
  "best_validation_accuracy": 0.9308,
  "best_validation_loss": 0.18480814404334803
}
2023-05-06 18:44:48,847 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/76/model.tar.gz


[32m[I 2023-05-06 18:44:52,485][0m Trial 76 finished with value: 0.9308 and parameters: {'embedding_dim': 128, 'max_filter_size': 5, 'num_filters': 115}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:44:52,697 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:44:52,704 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:44:52,707 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:44:52,708 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:44:52,713 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:44:52,715 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:44:52,716 - INFO - allennlp.common.params - type = default
2023-05-06 18:44:52,718 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:44:52,719 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:44:52,724 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:44:52,725 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:44:52,727 - INFO - all

loading instances: 40000it [01:39, 400.35it/s]

2023-05-06 18:46:32,737 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:46:32,741 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:46:32,743 - INFO - allennlp.common.params - validation_data_loader.drop_last = False





2023-05-06 18:46:32,748 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:46:32,750 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:46:32,752 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:46:32,753 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:46:32,754 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:46:32,755 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:46:32,757 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 18:46:32,758 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 18:46:32,759 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:13, 367.70it/s]

2023-05-06 18:46:46,365 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:46:46,367 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:46:46,369 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:46:46,371 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:46:46,373 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:46:46,375 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:46:46,376 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:46:46,377 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:46:46,378 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:46:46,379 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:46:46,380 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:06, 6347.81it/s]


2023-05-06 18:46:52,940 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:46:52,944 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:46:52,950 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:46:52,952 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:46:52,954 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 106
2023-05-06 18:46:52,956 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:46:52,958 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:46:52,962 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:46:52,963 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:47:00,213 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:47:00,215 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:47:00,222 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9105, batch_loss: 0.0692, loss: 0.2424 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.99it/s]

2023-05-06 18:47:25,082 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:47:25,102 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:47:25,104 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:47:25,109 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9210, batch_loss: 0.0334, loss: 0.1939 ||: 100%|##########| 157/157 [00:01<00:00, 137.70it/s]

2023-05-06 18:47:26,235 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:47:26,237 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.921
2023-05-06 18:47:26,238 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1156.825  |       N/A
2023-05-06 18:47:26,240 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.242  |     0.194
2023-05-06 18:47:26,241 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  54489.211  |       N/A





2023-05-06 18:47:26,787 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.728535
2023-05-06 18:47:26,789 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:55
2023-05-06 18:47:26,791 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:47:26,798 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 53G
2023-05-06 18:47:26,801 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 957M
2023-05-06 18:47:26,808 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9498, batch_loss: 0.2159, loss: 0.1306 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.45it/s]

2023-05-06 18:47:52,094 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9300, batch_loss: 0.0103, loss: 0.1867 ||: 100%|##########| 157/157 [00:01<00:00, 109.79it/s]

2023-05-06 18:47:53,534 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:47:53,536 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.930
2023-05-06 18:47:53,538 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   957.016  |       N/A
2023-05-06 18:47:53,539 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.131  |     0.187
2023-05-06 18:47:53,542 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  54530.016  |       N/A





2023-05-06 18:47:54,248 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.457268
2023-05-06 18:47:54,252 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:33
2023-05-06 18:47:54,254 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:47:54,260 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 53G
2023-05-06 18:47:54,264 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 957M
2023-05-06 18:47:54,265 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9839, batch_loss: 0.0026, loss: 0.0487 ||: 100%|##########| 1250/1250 [00:25<00:00, 48.95it/s]

2023-05-06 18:48:19,809 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0060, loss: 0.2437 ||: 100%|##########| 157/157 [00:01<00:00, 142.53it/s]

2023-05-06 18:48:20,923 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:48:20,924 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.925
2023-05-06 18:48:20,925 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   957.141  |       N/A
2023-05-06 18:48:20,926 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.049  |     0.244
2023-05-06 18:48:20,927 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  54530.016  |       N/A





2023-05-06 18:48:21,469 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.215073
2023-05-06 18:48:21,471 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:08
2023-05-06 18:48:21,472 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:48:21,474 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 53G
2023-05-06 18:48:21,480 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 957M
2023-05-06 18:48:21,482 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9977, batch_loss: 0.0196, loss: 0.0107 ||: 100%|##########| 1250/1250 [00:24<00:00, 50.10it/s]

2023-05-06 18:48:46,441 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0017, loss: 0.2979 ||: 100%|##########| 157/157 [00:01<00:00, 141.02it/s]

2023-05-06 18:48:47,561 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:48:47,563 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.926
2023-05-06 18:48:47,564 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   956.957  |       N/A
2023-05-06 18:48:47,565 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.298
2023-05-06 18:48:47,566 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  54530.016  |       N/A





2023-05-06 18:48:48,157 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.685442
2023-05-06 18:48:48,159 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:41
2023-05-06 18:48:48,165 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:48:48,168 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 53G
2023-05-06 18:48:48,172 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 957M
2023-05-06 18:48:48,173 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0007, loss: 0.0018 ||: 100%|##########| 1250/1250 [00:25<00:00, 49.65it/s]

2023-05-06 18:49:13,358 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9274, batch_loss: 0.0750, loss: 0.3454 ||: 100%|##########| 157/157 [00:01<00:00, 110.29it/s]

2023-05-06 18:49:14,789 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:49:14,791 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.927
2023-05-06 18:49:14,793 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   957.126  |       N/A
2023-05-06 18:49:14,794 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.345
2023-05-06 18:49:14,796 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  54530.016  |       N/A





2023-05-06 18:49:15,493 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.328083
2023-05-06 18:49:15,496 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:49:15,571 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 54530.015625,
  "peak_gpu_0_memory_MB": 1156.8251953125,
  "training_duration": "0:02:14.730631",
  "epoch": 4,
  "training_accuracy": 0.99985,
  "training_loss": 0.0018166780418563575,
  "training_worker_0_memory_MB": 54530.015625,
  "training_gpu_0_memory_MB": 957.1259765625,
  "validation_accuracy": 0.9274,
  "validation_loss": 0.34536216950791465,
  "best_validation_accuracy": 0.93,
  "best_validation_loss": 0.1866824833711811
}
2023-05-06 18:49:15,576 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/77/model.tar.gz


[32m[I 2023-05-06 18:49:18,596][0m Trial 77 finished with value: 0.93 and parameters: {'embedding_dim': 106, 'max_filter_size': 5, 'num_filters': 95}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:49:18,734 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:49:18,737 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:49:18,740 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:49:18,742 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:49:18,744 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:49:18,746 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:49:18,748 - INFO - allennlp.common.params - type = default
2023-05-06 18:49:18,751 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:49:18,753 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:49:18,754 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:49:18,755 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:49:18,760 - INFO - all

loading instances: 40000it [01:42, 391.29it/s]

2023-05-06 18:51:01,043 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:51:01,046 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:51:01,048 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:51:01,050 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:51:01,051 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:51:01,052 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:51:01,054 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:51:01,055 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:51:01,056 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:51:01,057 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:15, 322.71it/s]

2023-05-06 18:51:16,557 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:51:16,560 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:51:16,563 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:51:16,565 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:51:16,567 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:51:16,569 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:51:16,571 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:51:16,572 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:51:16,573 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:51:16,574 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:51:16,575 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:04, 8760.83it/s]


2023-05-06 18:51:21,338 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:51:21,340 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:51:21,341 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:51:21,346 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:51:21,347 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 114
2023-05-06 18:51:21,349 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:51:21,351 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:51:21,353 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:51:21,356 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:51:27,852 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:51:27,853 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:51:27,860 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9109, batch_loss: 0.0575, loss: 0.2408 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.19it/s]

2023-05-06 18:52:00,418 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:52:00,439 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:52:00,443 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:52:00,447 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9230, batch_loss: 0.0242, loss: 0.1942 ||: 100%|##########| 157/157 [00:01<00:00, 110.64it/s]

2023-05-06 18:52:01,849 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:52:01,853 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.923
2023-05-06 18:52:01,856 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   957.194  |       N/A
2023-05-06 18:52:01,858 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.194
2023-05-06 18:52:01,860 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55135.184  |       N/A





2023-05-06 18:52:02,452 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.774483
2023-05-06 18:52:02,453 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:07
2023-05-06 18:52:02,458 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:52:02,462 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 54G
2023-05-06 18:52:02,465 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 18:52:02,467 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9499, batch_loss: 0.1540, loss: 0.1304 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.44it/s]

2023-05-06 18:52:34,995 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9318, batch_loss: 0.0122, loss: 0.1838 ||: 100%|##########| 157/157 [00:01<00:00, 112.37it/s]

2023-05-06 18:52:36,397 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:52:36,398 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.932
2023-05-06 18:52:36,399 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1202.537  |       N/A
2023-05-06 18:52:36,400 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.184
2023-05-06 18:52:36,401 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55178.582  |       N/A





2023-05-06 18:52:37,004 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.546217
2023-05-06 18:52:37,007 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:34
2023-05-06 18:52:37,010 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:52:37,012 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 54G
2023-05-06 18:52:37,017 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 18:52:37,019 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9839, batch_loss: 0.0010, loss: 0.0468 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.79it/s]

2023-05-06 18:53:09,249 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0016, loss: 0.2719 ||: 100%|##########| 157/157 [00:01<00:00, 91.05it/s]

2023-05-06 18:53:10,984 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:53:10,985 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.926
2023-05-06 18:53:10,987 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1202.662  |       N/A
2023-05-06 18:53:10,990 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.047  |     0.272
2023-05-06 18:53:10,991 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55178.582  |       N/A





2023-05-06 18:53:11,767 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.757385
2023-05-06 18:53:11,771 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:01
2023-05-06 18:53:11,772 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:53:11,774 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 54G
2023-05-06 18:53:11,776 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 18:53:11,778 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.0324, loss: 0.0106 ||: 100%|##########| 1250/1250 [00:32<00:00, 39.03it/s]

2023-05-06 18:53:43,815 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9178, batch_loss: 0.0035, loss: 0.3067 ||: 100%|##########| 157/157 [00:01<00:00, 112.29it/s]

2023-05-06 18:53:45,221 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:53:45,222 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.918
2023-05-06 18:53:45,226 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1202.478  |       N/A
2023-05-06 18:53:45,227 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.307





2023-05-06 18:53:45,230 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55178.582  |       N/A
2023-05-06 18:53:45,873 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.100740
2023-05-06 18:53:45,876 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:26
2023-05-06 18:53:45,878 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:53:45,879 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 54G
2023-05-06 18:53:45,881 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.2G
2023-05-06 18:53:45,885 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0011, loss: 0.0020 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.33it/s]

2023-05-06 18:54:18,502 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9246, batch_loss: 0.0001, loss: 0.3995 ||: 100%|##########| 157/157 [00:01<00:00, 112.69it/s]

2023-05-06 18:54:19,914 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:54:19,915 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 18:54:19,917 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1202.647  |       N/A
2023-05-06 18:54:19,918 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.400
2023-05-06 18:54:19,920 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55178.582  |       N/A





2023-05-06 18:54:20,495 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.617052
2023-05-06 18:54:20,496 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:54:20,553 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 55178.58203125,
  "peak_gpu_0_memory_MB": 1202.66162109375,
  "training_duration": "0:02:52.235704",
  "epoch": 4,
  "training_accuracy": 0.9997,
  "training_loss": 0.0020487271904334192,
  "training_worker_0_memory_MB": 55178.58203125,
  "training_gpu_0_memory_MB": 1202.64697265625,
  "validation_accuracy": 0.9246,
  "validation_loss": 0.3995470730659209,
  "best_validation_accuracy": 0.9318,
  "best_validation_loss": 0.18376964176441454
}
2023-05-06 18:54:20,556 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/78/model.tar.gz


[32m[I 2023-05-06 18:54:22,989][0m Trial 78 finished with value: 0.9318 and parameters: {'embedding_dim': 114, 'max_filter_size': 5, 'num_filters': 133}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:54:23,116 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:54:23,118 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:54:23,125 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:54:23,126 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:54:23,130 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:54:23,132 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:54:23,133 - INFO - allennlp.common.params - type = default
2023-05-06 18:54:23,138 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:54:23,139 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:54:23,143 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:54:23,144 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:54:23,145 - INFO - all

loading instances: 40000it [01:41, 394.69it/s]

2023-05-06 18:56:04,540 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:56:04,542 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:56:04,548 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:56:04,550 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:56:04,557 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:56:04,558 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:56:04,559 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:56:04,560 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:56:04,561 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:56:04,562 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:16, 304.69it/s]

2023-05-06 18:56:20,984 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 18:56:20,987 - INFO - allennlp.common.params - type = from_instances
2023-05-06 18:56:20,988 - INFO - allennlp.common.params - min_count = None
2023-05-06 18:56:20,991 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 18:56:20,993 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 18:56:20,995 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 18:56:20,997 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 18:56:20,998 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 18:56:21,001 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 18:56:21,004 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 18:56:21,006 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 18


building vocab: 40000it [00:04, 8748.09it/s]


2023-05-06 18:56:25,793 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 18:56:25,795 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 18:56:25,801 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 18:56:25,805 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 18:56:25,809 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 93
2023-05-06 18:56:25,813 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 18:56:25,815 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 18:56:25,817 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 18:56:25,819 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 18:56:33,871 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:56:33,873 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:56:33,880 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9074, batch_loss: 0.0815, loss: 0.2483 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.96it/s]

2023-05-06 18:56:51,355 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 18:56:51,369 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 18:56:51,372 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 18:56:51,377 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9234, batch_loss: 0.0471, loss: 0.1928 ||: 100%|##########| 157/157 [00:01<00:00, 152.15it/s]

2023-05-06 18:56:52,397 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:56:52,399 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.923
2023-05-06 18:56:52,404 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1202.715  |       N/A
2023-05-06 18:56:52,405 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.248  |     0.193
2023-05-06 18:56:52,407 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55761.355  |       N/A





2023-05-06 18:56:52,955 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.237751
2023-05-06 18:56:52,958 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:48
2023-05-06 18:56:52,962 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 18:56:52,966 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 54G
2023-05-06 18:56:52,968 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 738M
2023-05-06 18:56:52,969 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9454, batch_loss: 0.2368, loss: 0.1404 ||: 100%|##########| 1250/1250 [00:17<00:00, 69.48it/s]

2023-05-06 18:57:10,968 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9274, batch_loss: 0.0153, loss: 0.1886 ||: 100%|##########| 157/157 [00:01<00:00, 148.83it/s]

2023-05-06 18:57:12,033 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:57:12,034 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.945  |     0.927
2023-05-06 18:57:12,035 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   738.183  |       N/A
2023-05-06 18:57:12,037 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.140  |     0.189
2023-05-06 18:57:12,039 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55797.031  |       N/A





2023-05-06 18:57:12,607 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.645076
2023-05-06 18:57:12,614 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:33
2023-05-06 18:57:12,615 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 18:57:12,617 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 54G
2023-05-06 18:57:12,619 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 738M
2023-05-06 18:57:12,621 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9792, batch_loss: 0.0027, loss: 0.0620 ||: 100%|##########| 1250/1250 [00:18<00:00, 69.34it/s]

2023-05-06 18:57:30,658 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9230, batch_loss: 0.0349, loss: 0.2321 ||: 100%|##########| 157/157 [00:00<00:00, 158.30it/s]

2023-05-06 18:57:31,656 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:57:31,660 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.979  |     0.923
2023-05-06 18:57:31,662 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   738.308  |       N/A
2023-05-06 18:57:31,663 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.062  |     0.232
2023-05-06 18:57:31,666 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55797.031  |       N/A





2023-05-06 18:57:32,273 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.657864
2023-05-06 18:57:32,276 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:15
2023-05-06 18:57:32,279 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 18:57:32,281 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 54G
2023-05-06 18:57:32,283 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 738M
2023-05-06 18:57:32,284 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9959, batch_loss: 0.0250, loss: 0.0164 ||: 100%|##########| 1250/1250 [00:18<00:00, 69.42it/s]

2023-05-06 18:57:50,298 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0069, loss: 0.2889 ||: 100%|##########| 157/157 [00:00<00:00, 167.60it/s]

2023-05-06 18:57:51,241 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:57:51,245 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.923
2023-05-06 18:57:51,249 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   738.124  |       N/A
2023-05-06 18:57:51,250 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.016  |     0.289
2023-05-06 18:57:51,252 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55797.031  |       N/A





2023-05-06 18:57:51,749 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.470356
2023-05-06 18:57:51,751 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:56
2023-05-06 18:57:51,753 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 18:57:51,754 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 54G
2023-05-06 18:57:51,757 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 738M
2023-05-06 18:57:51,758 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9995, batch_loss: 0.0078, loss: 0.0037 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.11it/s]

2023-05-06 18:58:09,597 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9242, batch_loss: 0.0057, loss: 0.3506 ||: 100%|##########| 157/157 [00:00<00:00, 180.63it/s]

2023-05-06 18:58:10,477 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 18:58:10,478 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 18:58:10,479 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   738.293  |       N/A
2023-05-06 18:58:10,481 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.004  |     0.351
2023-05-06 18:58:10,482 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  55797.031  |       N/A





2023-05-06 18:58:11,033 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.280544
2023-05-06 18:58:11,035 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 18:58:11,083 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 55797.03125,
  "peak_gpu_0_memory_MB": 1202.71484375,
  "training_duration": "0:01:36.759212",
  "epoch": 4,
  "training_accuracy": 0.9995,
  "training_loss": 0.0036654850289545723,
  "training_worker_0_memory_MB": 55797.03125,
  "training_gpu_0_memory_MB": 738.29296875,
  "validation_accuracy": 0.9242,
  "validation_loss": 0.35062223434993983,
  "best_validation_accuracy": 0.9274,
  "best_validation_loss": 0.18855172208255264
}
2023-05-06 18:58:11,085 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/79/model.tar.gz


[32m[I 2023-05-06 18:58:13,201][0m Trial 79 finished with value: 0.9274 and parameters: {'embedding_dim': 93, 'max_filter_size': 4, 'num_filters': 79}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 18:58:13,369 - INFO - allennlp.common.params - evaluation = None
2023-05-06 18:58:13,373 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 18:58:13,375 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 18:58:13,377 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 18:58:13,378 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 18:58:13,380 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 18:58:13,382 - INFO - allennlp.common.params - type = default
2023-05-06 18:58:13,385 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 18:58:13,387 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 18:58:13,388 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 18:58:13,389 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 18:58:13,390 - INFO - all

loading instances: 40000it [01:45, 378.18it/s]

2023-05-06 18:59:59,203 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 18:59:59,205 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 18:59:59,206 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 18:59:59,209 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 18:59:59,210 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 18:59:59,213 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 18:59:59,214 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 18:59:59,217 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 18:59:59,219 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 18:59:59,221 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:14, 347.97it/s]


2023-05-06 19:00:13,599 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:00:13,603 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:00:13,605 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:00:13,607 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:00:13,608 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:00:13,610 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:00:13,611 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:00:13,612 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:00:13,619 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:00:13,620 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:00:13,621 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19

building vocab: 40000it [00:06, 5981.18it/s]


2023-05-06 19:00:20,506 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:00:20,508 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:00:20,511 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:00:20,514 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:00:20,518 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 161
2023-05-06 19:00:20,520 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:00:20,522 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:00:20,526 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:00:20,527 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:00:26,220 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:00:26,225 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:00:26,229 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9111, batch_loss: 0.0516, loss: 0.2410 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.49it/s]

2023-05-06 19:00:54,853 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:00:54,870 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:00:54,873 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:00:54,880 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9214, batch_loss: 0.0246, loss: 0.1968 ||: 100%|##########| 157/157 [00:01<00:00, 117.21it/s]

2023-05-06 19:00:56,204 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:00:56,207 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.921
2023-05-06 19:00:56,209 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   738.361  |       N/A
2023-05-06 19:00:56,212 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.197
2023-05-06 19:00:56,214 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  56601.496  |       N/A





2023-05-06 19:00:57,253 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.158352
2023-05-06 19:00:57,255 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:30
2023-05-06 19:00:57,257 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:00:57,259 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 55G
2023-05-06 19:00:57,260 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:00:57,262 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9501, batch_loss: 0.1557, loss: 0.1295 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.62it/s]

2023-05-06 19:01:25,924 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9316, batch_loss: 0.0100, loss: 0.1902 ||: 100%|##########| 157/157 [00:01<00:00, 151.08it/s]

2023-05-06 19:01:26,972 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:01:26,974 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.932





2023-05-06 19:01:26,977 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1076.048  |       N/A
2023-05-06 19:01:26,982 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.129  |     0.190
2023-05-06 19:01:26,983 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  56644.023  |       N/A
2023-05-06 19:01:27,725 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.468002
2023-05-06 19:01:27,727 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:03
2023-05-06 19:01:27,728 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:01:27,734 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 55G
2023-05-06 19:01:27,737 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:01:27,741 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9839, batch_loss: 0.0009, loss: 0.0464 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.51it/s]

2023-05-06 19:01:56,476 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9240, batch_loss: 0.0021, loss: 0.2513 ||: 100%|##########| 157/157 [00:01<00:00, 112.27it/s]

2023-05-06 19:01:57,881 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:01:57,882 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.924
2023-05-06 19:01:57,885 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1076.048  |       N/A
2023-05-06 19:01:57,886 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.046  |     0.251
2023-05-06 19:01:57,887 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  56644.023  |       N/A





2023-05-06 19:01:58,954 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.225753
2023-05-06 19:01:58,957 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:34
2023-05-06 19:01:58,961 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:01:58,963 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 55G
2023-05-06 19:01:58,966 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:01:58,968 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9969, batch_loss: 0.1466, loss: 0.0113 ||: 100%|##########| 1250/1250 [00:28<00:00, 44.07it/s]

2023-05-06 19:02:27,336 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9244, batch_loss: 0.0001, loss: 0.3272 ||: 100%|##########| 157/157 [00:01<00:00, 153.82it/s]

2023-05-06 19:02:28,363 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:02:28,365 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.924
2023-05-06 19:02:28,371 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1076.048  |       N/A
2023-05-06 19:02:28,374 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.327
2023-05-06 19:02:28,376 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  56644.023  |       N/A





2023-05-06 19:02:29,157 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.196007
2023-05-06 19:02:29,161 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:03
2023-05-06 19:02:29,165 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:02:29,169 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 55G
2023-05-06 19:02:29,172 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:02:29,176 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0004, loss: 0.0020 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.95it/s]

2023-05-06 19:02:57,623 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9236, batch_loss: 0.0001, loss: 0.3783 ||: 100%|##########| 157/157 [00:01<00:00, 113.82it/s]

2023-05-06 19:02:59,009 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:02:59,011 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 19:02:59,012 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1076.048  |       N/A
2023-05-06 19:02:59,014 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.378
2023-05-06 19:02:59,016 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  56644.023  |       N/A





2023-05-06 19:03:00,073 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.907585
2023-05-06 19:03:00,080 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:03:00,172 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 56644.0234375,
  "peak_gpu_0_memory_MB": 1076.04833984375,
  "training_duration": "0:02:32.914704",
  "epoch": 4,
  "training_accuracy": 0.9997,
  "training_loss": 0.0020459490518602253,
  "training_worker_0_memory_MB": 56644.0234375,
  "training_gpu_0_memory_MB": 1076.04833984375,
  "validation_accuracy": 0.9236,
  "validation_loss": 0.3783439997881432,
  "best_validation_accuracy": 0.9316,
  "best_validation_loss": 0.19021394074698733
}
2023-05-06 19:03:00,174 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/80/model.tar.gz


[32m[I 2023-05-06 19:03:03,742][0m Trial 80 finished with value: 0.9316 and parameters: {'embedding_dim': 161, 'max_filter_size': 5, 'num_filters': 62}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 19:03:03,881 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:03:03,882 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:03:03,889 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:03:03,893 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:03:03,897 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:03:03,899 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:03:03,901 - INFO - allennlp.common.params - type = default
2023-05-06 19:03:03,904 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:03:03,906 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:03:03,907 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:03:03,909 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:03:03,911 - INFO - all

loading instances: 40000it [01:45, 378.49it/s]

2023-05-06 19:04:49,660 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:04:49,667 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:04:49,669 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:04:49,671 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:04:49,674 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:04:49,675 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:04:49,677 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:04:49,679 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:04:49,680 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:04:49,682 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:11, 434.63it/s]

2023-05-06 19:05:01,195 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:05:01,199 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:05:01,200 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:05:01,202 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:05:01,203 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:05:01,205 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:05:01,208 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:05:01,211 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:05:01,213 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:05:01,214 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:05:01,216 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:05, 7045.54it/s]


2023-05-06 19:05:07,082 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:05:07,085 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:05:07,087 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:05:07,090 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:05:07,092 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 136
2023-05-06 19:05:07,095 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:05:07,097 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:05:07,098 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:05:07,099 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:05:12,618 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:05:12,620 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:05:12,629 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9076, batch_loss: 0.0789, loss: 0.2415 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.14it/s]

2023-05-06 19:05:41,476 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:05:41,495 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:05:41,496 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:05:41,501 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9216, batch_loss: 0.0358, loss: 0.1990 ||: 100%|##########| 157/157 [00:01<00:00, 113.47it/s]

2023-05-06 19:05:42,869 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:05:42,871 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.922
2023-05-06 19:05:42,873 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1076.048  |       N/A





2023-05-06 19:05:42,878 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.242  |     0.199
2023-05-06 19:05:42,879 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57282.102  |       N/A
2023-05-06 19:05:43,766 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.277037
2023-05-06 19:05:43,769 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:33
2023-05-06 19:05:43,772 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:05:43,774 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 56G
2023-05-06 19:05:43,776 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 982M
2023-05-06 19:05:43,779 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9512, batch_loss: 0.2055, loss: 0.1298 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.48it/s]

2023-05-06 19:06:12,537 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9312, batch_loss: 0.0136, loss: 0.1862 ||: 100%|##########| 157/157 [00:01<00:00, 134.87it/s]

2023-05-06 19:06:13,712 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:06:13,720 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.931
2023-05-06 19:06:13,722 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   981.809  |       N/A
2023-05-06 19:06:13,723 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.186
2023-05-06 19:06:13,727 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57282.102  |       N/A





2023-05-06 19:06:14,392 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.619712
2023-05-06 19:06:14,399 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:04
2023-05-06 19:06:14,400 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:06:14,406 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 56G
2023-05-06 19:06:14,408 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 982M
2023-05-06 19:06:14,410 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9832, batch_loss: 0.0010, loss: 0.0486 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.52it/s]

2023-05-06 19:06:43,141 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9274, batch_loss: 0.0056, loss: 0.2475 ||: 100%|##########| 157/157 [00:01<00:00, 110.70it/s]

2023-05-06 19:06:44,570 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:06:44,572 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.927
2023-05-06 19:06:44,575 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   981.933  |       N/A
2023-05-06 19:06:44,577 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.049  |     0.248
2023-05-06 19:06:44,578 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57282.102  |       N/A





2023-05-06 19:06:45,495 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.094846
2023-05-06 19:06:45,498 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:34
2023-05-06 19:06:45,500 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:06:45,501 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 56G
2023-05-06 19:06:45,506 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 982M
2023-05-06 19:06:45,508 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9973, batch_loss: 0.0323, loss: 0.0101 ||: 100%|##########| 1250/1250 [00:28<00:00, 44.24it/s]

2023-05-06 19:07:13,770 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9226, batch_loss: 0.0022, loss: 0.3095 ||: 100%|##########| 157/157 [00:01<00:00, 136.78it/s]

2023-05-06 19:07:14,927 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:07:14,929 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.923
2023-05-06 19:07:14,930 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   981.749  |       N/A
2023-05-06 19:07:14,935 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.309
2023-05-06 19:07:14,938 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57282.102  |       N/A





2023-05-06 19:07:15,627 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.127579
2023-05-06 19:07:15,629 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:03
2023-05-06 19:07:15,632 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:07:15,633 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 56G
2023-05-06 19:07:15,635 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 982M
2023-05-06 19:07:15,637 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0004, loss: 0.0017 ||: 100%|##########| 1250/1250 [00:28<00:00, 43.58it/s]

2023-05-06 19:07:44,334 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9278, batch_loss: 0.0056, loss: 0.3694 ||: 100%|##########| 157/157 [00:01<00:00, 111.78it/s]

2023-05-06 19:07:45,745 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:07:45,746 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.928
2023-05-06 19:07:45,750 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   981.918  |       N/A
2023-05-06 19:07:45,751 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.369
2023-05-06 19:07:45,752 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57282.102  |       N/A





2023-05-06 19:07:46,538 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:30.905781
2023-05-06 19:07:46,542 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:07:46,617 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 57282.1015625,
  "peak_gpu_0_memory_MB": 1076.04833984375,
  "training_duration": "0:02:33.255983",
  "epoch": 4,
  "training_accuracy": 0.9998,
  "training_loss": 0.0017185949366037676,
  "training_worker_0_memory_MB": 57282.1015625,
  "training_gpu_0_memory_MB": 981.91845703125,
  "validation_accuracy": 0.9278,
  "validation_loss": 0.3693636906078192,
  "best_validation_accuracy": 0.9312,
  "best_validation_loss": 0.18617064225232335
}
2023-05-06 19:07:46,623 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/81/model.tar.gz


[32m[I 2023-05-06 19:07:49,804][0m Trial 81 finished with value: 0.9312 and parameters: {'embedding_dim': 136, 'max_filter_size': 5, 'num_filters': 73}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 19:07:49,928 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:07:49,930 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:07:49,934 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:07:49,936 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:07:49,938 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:07:49,939 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:07:49,941 - INFO - allennlp.common.params - type = default
2023-05-06 19:07:49,944 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:07:49,946 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:07:49,947 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:07:49,948 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:07:49,949 - INFO - all

loading instances: 40000it [01:40, 396.08it/s]

2023-05-06 19:09:30,979 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess





2023-05-06 19:09:30,989 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:09:30,993 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:09:30,999 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:09:31,003 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:09:31,005 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:09:31,006 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:09:31,007 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:09:31,008 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:09:31,009 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 19:09:31,010 - INFO - allennlp.common.params - validation_data_loader.quiet = 

loading instances: 5000it [00:10, 476.99it/s]

2023-05-06 19:09:41,498 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:09:41,502 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:09:41,505 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:09:41,507 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:09:41,509 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:09:41,510 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:09:41,511 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:09:41,512 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:09:41,513 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:09:41,514 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:09:41,515 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:06, 6071.39it/s]


2023-05-06 19:09:48,313 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:09:48,315 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:09:48,317 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:09:48,321 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:09:48,328 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 109
2023-05-06 19:09:48,330 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:09:48,333 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:09:48,334 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:09:48,335 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:09:53,878 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:09:53,880 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:09:53,886 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9089, batch_loss: 0.0727, loss: 0.2467 ||: 100%|##########| 1250/1250 [00:20<00:00, 59.56it/s]

2023-05-06 19:10:14,753 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:10:14,764 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:10:14,766 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:10:14,772 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9246, batch_loss: 0.0261, loss: 0.1936 ||: 100%|##########| 157/157 [00:00<00:00, 168.89it/s]

2023-05-06 19:10:15,689 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 19:10:15,693 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.925
2023-05-06 19:10:15,695 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   981.986  |       N/A
2023-05-06 19:10:15,697 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.247  |     0.194
2023-05-06 19:10:15,698 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57705.070  |       N/A
2023-05-06 19:10:16,243 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.482378
2023-05-06 19:10:16,246 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:17
2023-05-06 19:10:16,251 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:10:16,253 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 56G
2023-05-06 19:10:16,258 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9470, batch_loss: 0.2447, loss: 0.1385 ||: 100%|##########| 1250/1250 [00:20<00:00, 60.97it/s]

2023-05-06 19:10:36,772 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9292, batch_loss: 0.0357, loss: 0.1836 ||: 100%|##########| 157/157 [00:00<00:00, 162.50it/s]

2023-05-06 19:10:37,746 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:10:37,748 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.929
2023-05-06 19:10:37,750 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   719.747  |       N/A
2023-05-06 19:10:37,751 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.138  |     0.184
2023-05-06 19:10:37,752 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57746.477  |       N/A





2023-05-06 19:10:38,469 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.218431
2023-05-06 19:10:38,471 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:55
2023-05-06 19:10:38,473 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:10:38,474 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 56G
2023-05-06 19:10:38,476 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 720M
2023-05-06 19:10:38,480 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9786, batch_loss: 0.0037, loss: 0.0616 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.18it/s]

2023-05-06 19:10:58,921 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0060, loss: 0.2335 ||: 100%|##########| 157/157 [00:00<00:00, 158.07it/s]

2023-05-06 19:10:59,928 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:10:59,929 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.979  |     0.923
2023-05-06 19:10:59,933 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   719.747  |       N/A
2023-05-06 19:10:59,934 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.062  |     0.234
2023-05-06 19:10:59,935 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57746.477  |       N/A





2023-05-06 19:11:00,778 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.304769
2023-05-06 19:11:00,782 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:34
2023-05-06 19:11:00,783 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:11:00,787 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 56G
2023-05-06 19:11:00,788 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 720M
2023-05-06 19:11:00,790 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9951, batch_loss: 0.0335, loss: 0.0175 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.57it/s]

2023-05-06 19:11:21,097 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0013, loss: 0.2921 ||: 100%|##########| 157/157 [00:01<00:00, 153.52it/s]

2023-05-06 19:11:22,126 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:11:22,130 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.995  |     0.926
2023-05-06 19:11:22,131 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   719.747  |       N/A
2023-05-06 19:11:22,133 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.018  |     0.292
2023-05-06 19:11:22,137 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57746.477  |       N/A





2023-05-06 19:11:22,804 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:22.021288
2023-05-06 19:11:22,810 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:12
2023-05-06 19:11:22,815 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:11:22,816 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 56G
2023-05-06 19:11:22,818 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 720M
2023-05-06 19:11:22,819 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9994, batch_loss: 0.0014, loss: 0.0037 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.73it/s]

2023-05-06 19:11:43,074 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9232, batch_loss: 0.0028, loss: 0.3298 ||: 100%|##########| 157/157 [00:01<00:00, 154.77it/s]

2023-05-06 19:11:44,099 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:11:44,101 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.923
2023-05-06 19:11:44,103 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   719.747  |       N/A
2023-05-06 19:11:44,104 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.004  |     0.330
2023-05-06 19:11:44,106 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  57746.477  |       N/A





2023-05-06 19:11:44,741 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.926709
2023-05-06 19:11:44,743 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:11:44,831 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 57746.4765625,
  "peak_gpu_0_memory_MB": 981.986328125,
  "training_duration": "0:01:50.338366",
  "epoch": 4,
  "training_accuracy": 0.999375,
  "training_loss": 0.0036827349623461485,
  "training_worker_0_memory_MB": 57746.4765625,
  "training_gpu_0_memory_MB": 719.74658203125,
  "validation_accuracy": 0.9232,
  "validation_loss": 0.3298341679797585,
  "best_validation_accuracy": 0.9292,
  "best_validation_loss": 0.1836323449091547
}
2023-05-06 19:11:44,833 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/82/model.tar.gz


[32m[I 2023-05-06 19:11:47,936][0m Trial 82 finished with value: 0.9292 and parameters: {'embedding_dim': 109, 'max_filter_size': 5, 'num_filters': 39}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 19:11:48,102 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:11:48,105 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:11:48,108 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:11:48,110 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:11:48,112 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:11:48,114 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:11:48,115 - INFO - allennlp.common.params - type = default
2023-05-06 19:11:48,117 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:11:48,119 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:11:48,123 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:11:48,124 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:11:48,126 - INFO - all

loading instances: 40000it [01:42, 389.76it/s]

2023-05-06 19:13:30,846 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:13:30,853 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:13:30,855 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:13:30,856 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:13:30,862 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:13:30,863 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:13:30,864 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:13:30,865 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:13:30,866 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:13:30,869 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 520.17it/s]

2023-05-06 19:13:40,490 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:13:40,492 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:13:40,493 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:13:40,496 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:13:40,498 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:13:40,499 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:13:40,500 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:13:40,503 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:13:40,504 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:13:40,507 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:13:40,508 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:06, 6118.61it/s]


2023-05-06 19:13:47,306 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:13:47,309 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:13:47,310 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:13:47,313 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:13:47,315 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 140
2023-05-06 19:13:47,316 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:13:47,318 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:13:47,319 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:13:47,322 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:13:53,816 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:13:53,818 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:13:53,824 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9089, batch_loss: 0.0763, loss: 0.2410 ||: 100%|##########| 1250/1250 [00:31<00:00, 40.04it/s]

2023-05-06 19:14:24,922 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:14:24,942 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:14:24,943 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:14:24,949 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9216, batch_loss: 0.0524, loss: 0.1989 ||: 100%|##########| 157/157 [00:01<00:00, 100.80it/s]

2023-05-06 19:14:26,487 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:14:26,489 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.922
2023-05-06 19:14:26,490 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   719.747  |       N/A
2023-05-06 19:14:26,492 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.199
2023-05-06 19:14:26,493 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58068.945  |       N/A





2023-05-06 19:14:27,348 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:33.655038
2023-05-06 19:14:27,351 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:55
2023-05-06 19:14:27,353 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:14:27,354 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 57G
2023-05-06 19:14:27,358 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:14:27,360 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9515, batch_loss: 0.1994, loss: 0.1290 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.76it/s]

2023-05-06 19:14:59,617 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9294, batch_loss: 0.0094, loss: 0.1935 ||: 100%|##########| 157/157 [00:01<00:00, 114.80it/s]

2023-05-06 19:15:00,994 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:15:00,995 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.952  |     0.929
2023-05-06 19:15:00,998 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1175.781  |       N/A
2023-05-06 19:15:01,000 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.129  |     0.193
2023-05-06 19:15:01,002 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58122.430  |       N/A





2023-05-06 19:15:01,844 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.490927
2023-05-06 19:15:01,847 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:29
2023-05-06 19:15:01,848 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:15:01,850 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 57G
2023-05-06 19:15:01,851 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:15:01,854 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9845, batch_loss: 0.0009, loss: 0.0457 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.80it/s]

2023-05-06 19:15:34,077 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9258, batch_loss: 0.0022, loss: 0.2702 ||: 100%|##########| 157/157 [00:01<00:00, 122.28it/s]

2023-05-06 19:15:35,367 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:15:35,375 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.926
2023-05-06 19:15:35,376 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1175.905  |       N/A
2023-05-06 19:15:35,379 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.046  |     0.270
2023-05-06 19:15:35,382 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58122.430  |       N/A





2023-05-06 19:15:36,211 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.363268
2023-05-06 19:15:36,212 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:57
2023-05-06 19:15:36,216 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:15:36,218 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 57G
2023-05-06 19:15:36,221 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:15:36,222 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9976, batch_loss: 0.0675, loss: 0.0091 ||: 100%|##########| 1250/1250 [00:31<00:00, 39.37it/s]

2023-05-06 19:16:07,982 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9238, batch_loss: 0.0003, loss: 0.3205 ||: 100%|##########| 157/157 [00:01<00:00, 103.74it/s]

2023-05-06 19:16:09,505 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:16:09,506 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.924
2023-05-06 19:16:09,507 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1175.721  |       N/A
2023-05-06 19:16:09,508 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.009  |     0.320
2023-05-06 19:16:09,511 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58122.430  |       N/A





2023-05-06 19:16:10,437 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.221461
2023-05-06 19:16:10,439 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:23
2023-05-06 19:16:10,443 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:16:10,445 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 57G
2023-05-06 19:16:10,448 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:16:10,449 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0018, loss: 0.0013 ||: 100%|##########| 1250/1250 [00:31<00:00, 39.42it/s]

2023-05-06 19:16:42,162 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9240, batch_loss: 0.0002, loss: 0.3670 ||: 100%|##########| 157/157 [00:01<00:00, 110.90it/s]

2023-05-06 19:16:43,586 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:16:43,587 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 19:16:43,589 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1175.891  |       N/A
2023-05-06 19:16:43,591 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.367
2023-05-06 19:16:43,592 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58122.430  |       N/A





2023-05-06 19:16:44,467 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.023959
2023-05-06 19:16:44,471 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:16:44,551 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 58122.4296875,
  "peak_gpu_0_memory_MB": 1175.9052734375,
  "training_duration": "0:02:49.891871",
  "epoch": 4,
  "training_accuracy": 0.999875,
  "training_loss": 0.0013391772493378084,
  "training_worker_0_memory_MB": 58122.4296875,
  "training_gpu_0_memory_MB": 1175.890625,
  "validation_accuracy": 0.924,
  "validation_loss": 0.3670429172140589,
  "best_validation_accuracy": 0.9294,
  "best_validation_loss": 0.19347369743831408
}
2023-05-06 19:16:44,558 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/83/model.tar.gz


[32m[I 2023-05-06 19:16:48,605][0m Trial 83 finished with value: 0.9294 and parameters: {'embedding_dim': 140, 'max_filter_size': 5, 'num_filters': 108}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 19:16:48,778 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:16:48,781 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:16:48,785 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:16:48,787 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:16:48,791 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:16:48,793 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:16:48,794 - INFO - allennlp.common.params - type = default
2023-05-06 19:16:48,796 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:16:48,797 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:16:48,802 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:16:48,803 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:16:48,804 - INFO - all

loading instances: 40000it [01:42, 391.30it/s]

2023-05-06 19:18:33,377 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:18:33,381 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:18:33,385 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:18:33,388 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:18:33,389 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:18:33,396 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:18:33,396 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:18:33,398 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:18:33,399 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:18:33,400 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 515.65it/s]

2023-05-06 19:18:43,109 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:18:43,111 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:18:43,113 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:18:43,119 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:18:43,121 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:18:43,122 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:18:43,124 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:18:43,125 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:18:43,126 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:18:43,127 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:18:43,131 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:06, 5927.33it/s]


2023-05-06 19:18:50,128 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:18:50,130 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:18:50,135 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:18:50,136 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:18:50,138 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 88
2023-05-06 19:18:50,140 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:18:50,142 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:18:50,143 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:18:50,145 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:18:56,498 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:18:56,500 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:18:56,505 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9092, batch_loss: 0.0562, loss: 0.2431 ||: 100%|##########| 1250/1250 [00:21<00:00, 57.76it/s]

2023-05-06 19:19:18,022 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:19:18,034 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:19:18,036 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:19:18,041 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9236, batch_loss: 0.0737, loss: 0.1978 ||: 100%|##########| 157/157 [00:01<00:00, 149.67it/s]

2023-05-06 19:19:19,077 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:19:19,078 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.924





2023-05-06 19:19:19,084 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1175.958  |       N/A
2023-05-06 19:19:19,086 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.243  |     0.198
2023-05-06 19:19:19,089 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58681.770  |       N/A
2023-05-06 19:19:19,511 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.139554
2023-05-06 19:19:19,515 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:24
2023-05-06 19:19:19,520 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:19:19,522 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 57G
2023-05-06 19:19:19,527 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 852M
2023-05-06 19:19:19,529 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9471, batch_loss: 0.1960, loss: 0.1366 ||: 100%|##########| 1250/1250 [00:21<00:00, 57.68it/s]

2023-05-06 19:19:41,205 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9282, batch_loss: 0.0125, loss: 0.1848 ||: 100%|##########| 157/157 [00:01<00:00, 150.69it/s]

2023-05-06 19:19:42,254 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:19:42,256 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.947  |     0.928
2023-05-06 19:19:42,258 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   852.148  |       N/A
2023-05-06 19:19:42,258 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.137  |     0.185
2023-05-06 19:19:42,259 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58715.277  |       N/A





2023-05-06 19:19:42,729 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.209651
2023-05-06 19:19:42,733 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:03
2023-05-06 19:19:42,735 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:19:42,739 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 57G
2023-05-06 19:19:42,742 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 852M
2023-05-06 19:19:42,749 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9804, batch_loss: 0.0015, loss: 0.0555 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.30it/s]

2023-05-06 19:20:04,955 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9232, batch_loss: 0.0024, loss: 0.2644 ||: 100%|##########| 157/157 [00:01<00:00, 147.12it/s]

2023-05-06 19:20:06,037 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:20:06,039 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.980  |     0.923
2023-05-06 19:20:06,040 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   852.273  |       N/A
2023-05-06 19:20:06,041 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.056  |     0.264
2023-05-06 19:20:06,042 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58715.277  |       N/A





2023-05-06 19:20:06,583 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:23.848574
2023-05-06 19:20:06,586 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:42
2023-05-06 19:20:06,589 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:20:06,591 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 57G
2023-05-06 19:20:06,594 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 852M
2023-05-06 19:20:06,596 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9961, batch_loss: 0.0756, loss: 0.0137 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.01it/s]

2023-05-06 19:20:28,925 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9182, batch_loss: 0.0011, loss: 0.3130 ||: 100%|##########| 157/157 [00:01<00:00, 122.37it/s]

2023-05-06 19:20:30,214 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:20:30,216 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.918
2023-05-06 19:20:30,217 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   852.089  |       N/A
2023-05-06 19:20:30,219 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.014  |     0.313
2023-05-06 19:20:30,222 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58715.277  |       N/A





2023-05-06 19:20:30,827 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.237048
2023-05-06 19:20:30,828 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:20
2023-05-06 19:20:30,829 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:20:30,832 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 57G
2023-05-06 19:20:30,835 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 852M
2023-05-06 19:20:30,836 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0011, loss: 0.0026 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.06it/s]

2023-05-06 19:20:53,140 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9198, batch_loss: 0.0003, loss: 0.4005 ||: 100%|##########| 157/157 [00:01<00:00, 117.64it/s]

2023-05-06 19:20:54,482 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:20:54,484 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.920
2023-05-06 19:20:54,485 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   852.258  |       N/A
2023-05-06 19:20:54,486 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.401
2023-05-06 19:20:54,487 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  58715.277  |       N/A





2023-05-06 19:20:55,034 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.204817
2023-05-06 19:20:55,038 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:20:55,085 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 58715.27734375,
  "peak_gpu_0_memory_MB": 1175.95849609375,
  "training_duration": "0:01:58.109520",
  "epoch": 4,
  "training_accuracy": 0.99955,
  "training_loss": 0.0026041465866532233,
  "training_worker_0_memory_MB": 58715.27734375,
  "training_gpu_0_memory_MB": 852.25830078125,
  "validation_accuracy": 0.9198,
  "validation_loss": 0.4005008225407448,
  "best_validation_accuracy": 0.9282,
  "best_validation_loss": 0.18483563675693457
}
2023-05-06 19:20:55,090 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/84/model.tar.gz


[32m[I 2023-05-06 19:20:57,611][0m Trial 84 finished with value: 0.9282 and parameters: {'embedding_dim': 88, 'max_filter_size': 5, 'num_filters': 90}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 19:20:57,780 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:20:57,782 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:20:57,785 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:20:57,786 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:20:57,788 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:20:57,790 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:20:57,795 - INFO - allennlp.common.params - type = default
2023-05-06 19:20:57,797 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:20:57,799 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:20:57,800 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:20:57,801 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:20:57,806 - INFO - all

loading instances: 40000it [01:41, 394.74it/s]

2023-05-06 19:22:39,190 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:22:39,194 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:22:39,200 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:22:39,201 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:22:39,204 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:22:39,205 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:22:39,206 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:22:39,207 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:22:39,208 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:22:39,209 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:14, 338.61it/s]

2023-05-06 19:22:53,982 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:22:53,986 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:22:53,988 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:22:53,990 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:22:53,991 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:22:53,993 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:22:53,995 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:22:53,996 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:22:54,001 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:22:54,002 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:22:54,003 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:04, 8433.06it/s]


2023-05-06 19:22:58,963 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:22:58,965 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:22:58,967 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:22:58,972 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:22:58,975 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 79
2023-05-06 19:22:58,976 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:22:58,977 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:22:58,980 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:22:58,982 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:23:06,895 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:23:06,898 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:23:06,906 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9097, batch_loss: 0.0674, loss: 0.2416 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.01it/s]

2023-05-06 19:23:39,616 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:23:39,633 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:23:39,637 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:23:39,644 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9216, batch_loss: 0.0422, loss: 0.1977 ||: 100%|##########| 157/157 [00:01<00:00, 85.86it/s]

2023-05-06 19:23:41,450 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:23:41,452 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.922
2023-05-06 19:23:41,453 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   852.326  |       N/A
2023-05-06 19:23:41,456 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.242  |     0.198
2023-05-06 19:23:41,457 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  59321.867  |       N/A





2023-05-06 19:23:41,840 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.120034
2023-05-06 19:23:41,849 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:12
2023-05-06 19:23:41,850 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:23:41,851 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 58G
2023-05-06 19:23:41,854 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.7G
2023-05-06 19:23:41,855 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9508, batch_loss: 0.2171, loss: 0.1312 ||: 100%|##########| 1250/1250 [00:33<00:00, 36.93it/s]

2023-05-06 19:24:15,715 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9280, batch_loss: 0.0142, loss: 0.2085 ||: 100%|##########| 157/157 [00:01<00:00, 94.52it/s]

2023-05-06 19:24:17,382 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:24:17,384 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.928
2023-05-06 19:24:17,386 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1758.281  |       N/A
2023-05-06 19:24:17,388 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.131  |     0.208
2023-05-06 19:24:17,389 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  59352.531  |       N/A





2023-05-06 19:24:17,690 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.839849
2023-05-06 19:24:17,694 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:42
2023-05-06 19:24:17,698 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:24:17,699 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 58G
2023-05-06 19:24:17,703 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.7G
2023-05-06 19:24:17,706 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9835, batch_loss: 0.0040, loss: 0.0477 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.37it/s]

2023-05-06 19:24:52,084 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0025, loss: 0.2633 ||: 100%|##########| 157/157 [00:01<00:00, 96.07it/s] 

2023-05-06 19:24:53,725 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:24:53,730 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.926
2023-05-06 19:24:53,732 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1758.406  |       N/A





2023-05-06 19:24:53,736 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.048  |     0.263
2023-05-06 19:24:53,739 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  59352.531  |       N/A
2023-05-06 19:24:54,037 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.339345
2023-05-06 19:24:54,044 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:09
2023-05-06 19:24:54,045 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:24:54,047 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 58G
2023-05-06 19:24:54,049 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.7G
2023-05-06 19:24:54,051 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.0388, loss: 0.0098 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.07it/s]

2023-05-06 19:25:28,720 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9252, batch_loss: 0.0005, loss: 0.3910 ||: 100%|##########| 157/157 [00:01<00:00, 89.06it/s]

2023-05-06 19:25:30,493 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:25:30,494 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.925
2023-05-06 19:25:30,496 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1758.222  |       N/A
2023-05-06 19:25:30,498 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.391
2023-05-06 19:25:30,499 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  59352.531  |       N/A





2023-05-06 19:25:30,849 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.803358
2023-05-06 19:25:30,851 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:25:30,875 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 0,
  "peak_worker_0_memory_MB": 59352.53125,
  "peak_gpu_0_memory_MB": 1758.40576171875,
  "training_duration": "0:02:23.772649",
  "epoch": 3,
  "training_accuracy": 0.9972,
  "training_loss": 0.009798019742564066,
  "training_worker_0_memory_MB": 59352.53125,
  "training_gpu_0_memory_MB": 1758.2216796875,
  "validation_accuracy": 0.9252,
  "validation_loss": 0.390956040200589,
  "best_validation_accuracy": 0.9216,
  "best_validation_loss": 0.19771482571247656
}
2023-05-06 19:25:30,877 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/85/model.tar.gz


[32m[I 2023-05-06 19:25:32,644][0m Trial 85 finished with value: 0.9216 and parameters: {'embedding_dim': 79, 'max_filter_size': 5, 'num_filters': 246}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 19:25:32,775 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:25:32,777 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:25:32,790 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:25:32,791 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:25:32,792 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:25:32,794 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:25:32,797 - INFO - allennlp.common.params - type = default
2023-05-06 19:25:32,799 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:25:32,800 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:25:32,802 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:25:32,803 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:25:32,805 - INFO - all

loading instances: 40000it [01:40, 398.94it/s]

2023-05-06 19:27:13,138 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:27:13,140 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:27:13,142 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:27:13,145 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:27:13,146 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:27:13,150 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:27:13,151 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:27:13,152 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:27:13,153 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:27:13,156 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:15, 325.13it/s]

2023-05-06 19:27:28,543 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:27:28,547 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:27:28,550 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:27:28,552 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:27:28,554 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:27:28,556 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:27:28,558 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:27:28,560 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:27:28,561 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:27:28,563 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:27:28,565 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:05, 6989.22it/s]


2023-05-06 19:27:34,477 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:27:34,479 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:27:34,482 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:27:34,486 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:27:34,488 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 61
2023-05-06 19:27:34,494 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:27:34,494 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:27:34,498 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:27:34,499 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:27:39,969 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:27:39,972 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:27:39,977 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9070, batch_loss: 0.0946, loss: 0.2500 ||: 100%|##########| 1250/1250 [00:17<00:00, 73.33it/s]

2023-05-06 19:27:56,899 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:27:56,909 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:27:56,911 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:27:56,916 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9226, batch_loss: 0.0792, loss: 0.1925 ||: 100%|##########| 157/157 [00:00<00:00, 182.08it/s]

2023-05-06 19:27:57,769 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:27:57,773 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.907  |     0.923
2023-05-06 19:27:57,774 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1758.391  |       N/A
2023-05-06 19:27:57,778 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.250  |     0.193
2023-05-06 19:27:57,782 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  59975.145  |       N/A





2023-05-06 19:27:58,002 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.161355
2023-05-06 19:27:58,007 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:41
2023-05-06 19:27:58,010 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:27:58,014 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 59G
2023-05-06 19:27:58,018 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 518M
2023-05-06 19:27:58,021 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9459, batch_loss: 0.2200, loss: 0.1411 ||: 100%|##########| 1250/1250 [00:16<00:00, 75.56it/s]

2023-05-06 19:28:14,570 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9298, batch_loss: 0.0257, loss: 0.1806 ||: 100%|##########| 157/157 [00:00<00:00, 194.99it/s]

2023-05-06 19:28:15,389 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:28:15,394 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.946  |     0.930
2023-05-06 19:28:15,398 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   517.804  |       N/A
2023-05-06 19:28:15,400 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.141  |     0.181
2023-05-06 19:28:15,402 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  59975.875  |       N/A





2023-05-06 19:28:15,624 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:17.613000
2023-05-06 19:28:15,625 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:22
2023-05-06 19:28:15,627 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:28:15,628 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 59G
2023-05-06 19:28:15,632 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 518M
2023-05-06 19:28:15,633 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9764, batch_loss: 0.0028, loss: 0.0675 ||: 100%|##########| 1250/1250 [00:15<00:00, 81.17it/s]

2023-05-06 19:28:31,041 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9248, batch_loss: 0.0048, loss: 0.2321 ||: 100%|##########| 157/157 [00:01<00:00, 147.86it/s]

2023-05-06 19:28:32,112 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:28:32,113 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.976  |     0.925





2023-05-06 19:28:32,115 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   517.928  |       N/A
2023-05-06 19:28:32,117 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.068  |     0.232
2023-05-06 19:28:32,119 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  59975.875  |       N/A
2023-05-06 19:28:32,349 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:16.721784
2023-05-06 19:28:32,358 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:01
2023-05-06 19:28:32,367 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:28:32,368 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 59G
2023-05-06 19:28:32,372 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 518M
2023-05-06 19:28:32,373 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9934, batch_loss: 0.0640, loss: 0.0220 ||: 100%|##########| 1250/1250 [00:15<00:00, 82.25it/s]

2023-05-06 19:28:47,583 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9196, batch_loss: 0.0049, loss: 0.2816 ||: 100%|##########| 157/157 [00:00<00:00, 179.58it/s]

2023-05-06 19:28:48,465 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:28:48,467 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.993  |     0.920
2023-05-06 19:28:48,469 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   517.744  |       N/A
2023-05-06 19:28:48,472 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.022  |     0.282
2023-05-06 19:28:48,473 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  59975.875  |       N/A





2023-05-06 19:28:48,726 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:16.359190
2023-05-06 19:28:48,733 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:42
2023-05-06 19:28:48,735 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:28:48,740 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 59G
2023-05-06 19:28:48,746 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 518M
2023-05-06 19:28:48,748 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9993, batch_loss: 0.0078, loss: 0.0052 ||: 100%|##########| 1250/1250 [00:16<00:00, 76.86it/s]

2023-05-06 19:29:05,024 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9172, batch_loss: 0.0010, loss: 0.3342 ||: 100%|##########| 157/157 [00:00<00:00, 195.26it/s]

2023-05-06 19:29:05,837 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:29:05,839 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.999  |     0.917
2023-05-06 19:29:05,840 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   517.914  |       N/A
2023-05-06 19:29:05,843 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.005  |     0.334
2023-05-06 19:29:05,844 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  59975.875  |       N/A





2023-05-06 19:29:06,120 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:17.384255
2023-05-06 19:29:06,121 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:29:06,164 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 59975.875,
  "peak_gpu_0_memory_MB": 1758.39111328125,
  "training_duration": "0:01:25.996209",
  "epoch": 4,
  "training_accuracy": 0.999325,
  "training_loss": 0.005157328358053929,
  "training_worker_0_memory_MB": 59975.875,
  "training_gpu_0_memory_MB": 517.91357421875,
  "validation_accuracy": 0.9172,
  "validation_loss": 0.33420923370259936,
  "best_validation_accuracy": 0.9298,
  "best_validation_loss": 0.1805807486722234
}
2023-05-06 19:29:06,168 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/86/model.tar.gz


[32m[I 2023-05-06 19:29:07,785][0m Trial 86 finished with value: 0.9298 and parameters: {'embedding_dim': 61, 'max_filter_size': 5, 'num_filters': 48}. Best is trial 41 with value: 0.9342.[0m


2023-05-06 19:29:07,942 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:29:07,948 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:29:07,951 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:29:07,952 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:29:07,954 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:29:07,955 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:29:07,959 - INFO - allennlp.common.params - type = default
2023-05-06 19:29:07,961 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:29:07,963 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:29:07,966 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:29:07,968 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:29:07,970 - INFO - all

loading instances: 40000it [01:42, 390.17it/s]

2023-05-06 19:30:50,563 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:30:50,565 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:30:50,569 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:30:50,571 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:30:50,573 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:30:50,575 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:30:50,576 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:30:50,577 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:30:50,578 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:30:50,579 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 536.92it/s]

2023-05-06 19:30:59,897 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:30:59,901 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:30:59,903 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:30:59,909 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:30:59,910 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:30:59,911 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:30:59,912 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:30:59,913 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:30:59,914 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:30:59,915 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:30:59,916 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:06, 6582.66it/s]


2023-05-06 19:31:06,241 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:31:06,246 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:31:06,249 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:31:06,251 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:31:06,256 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 149
2023-05-06 19:31:06,257 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:31:06,259 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:31:06,260 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:31:06,263 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:31:13,960 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:31:13,962 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:31:13,968 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9077, batch_loss: 0.0591, loss: 0.2436 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.72it/s]

2023-05-06 19:31:40,037 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:31:40,050 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:31:40,052 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:31:40,057 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9236, batch_loss: 0.0453, loss: 0.1965 ||: 100%|##########| 157/157 [00:01<00:00, 153.86it/s]

2023-05-06 19:31:41,064 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:31:41,066 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.924
2023-05-06 19:31:41,069 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   517.981  |       N/A
2023-05-06 19:31:41,070 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.244  |     0.197
2023-05-06 19:31:41,072 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  60656.449  |       N/A





2023-05-06 19:31:41,830 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.994491
2023-05-06 19:31:41,834 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:05
2023-05-06 19:31:41,836 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:31:41,837 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 59G
2023-05-06 19:31:41,839 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 989M
2023-05-06 19:31:41,841 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9487, batch_loss: 0.2014, loss: 0.1326 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.20it/s]

2023-05-06 19:32:08,335 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9360, batch_loss: 0.0151, loss: 0.1849 ||: 100%|##########| 157/157 [00:01<00:00, 123.19it/s]

2023-05-06 19:32:09,615 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 19:32:09,618 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.936
2023-05-06 19:32:09,620 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   988.651  |       N/A
2023-05-06 19:32:09,622 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.133  |     0.185
2023-05-06 19:32:09,624 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  60713.250  |       N/A
2023-05-06 19:32:10,551 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.715274
2023-05-06 19:32:10,554 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:43
2023-05-06 19:32:10,556 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:32:10,558 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 59G
2023-05-06 19:32:10,560 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9820, batch_loss: 0.0016, loss: 0.0500 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.11it/s]

2023-05-06 19:32:37,099 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9300, batch_loss: 0.0035, loss: 0.2458 ||: 100%|##########| 157/157 [00:00<00:00, 159.62it/s]

2023-05-06 19:32:38,091 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:32:38,095 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.982  |     0.930
2023-05-06 19:32:38,098 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   988.651  |       N/A
2023-05-06 19:32:38,100 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.050  |     0.246
2023-05-06 19:32:38,103 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  60713.250  |       N/A





2023-05-06 19:32:38,920 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.363876
2023-05-06 19:32:38,921 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:16
2023-05-06 19:32:38,925 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:32:38,927 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 59G
2023-05-06 19:32:38,929 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 989M
2023-05-06 19:32:38,930 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9973, batch_loss: 0.0270, loss: 0.0116 ||: 100%|##########| 1250/1250 [00:25<00:00, 48.45it/s]

2023-05-06 19:33:04,736 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9288, batch_loss: 0.0012, loss: 0.3212 ||: 100%|##########| 157/157 [00:01<00:00, 155.78it/s]

2023-05-06 19:33:05,752 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:33:05,754 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.929
2023-05-06 19:33:05,758 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   988.651  |       N/A
2023-05-06 19:33:05,759 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.012  |     0.321
2023-05-06 19:33:05,761 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  60713.250  |       N/A





2023-05-06 19:33:06,532 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.607253
2023-05-06 19:33:06,533 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:47
2023-05-06 19:33:06,540 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:33:06,544 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 59G
2023-05-06 19:33:06,550 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 989M
2023-05-06 19:33:06,552 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0009, loss: 0.0021 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.94it/s]

2023-05-06 19:33:33,189 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9256, batch_loss: 0.0017, loss: 0.3574 ||: 100%|##########| 157/157 [00:01<00:00, 129.46it/s]

2023-05-06 19:33:34,414 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:33:34,416 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.926
2023-05-06 19:33:34,417 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   988.651  |       N/A
2023-05-06 19:33:34,418 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.357
2023-05-06 19:33:34,420 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  60713.250  |       N/A





2023-05-06 19:33:35,403 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.863255
2023-05-06 19:33:35,405 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:33:35,488 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 60713.25,
  "peak_gpu_0_memory_MB": 988.65087890625,
  "training_duration": "0:02:20.576876",
  "epoch": 4,
  "training_accuracy": 0.999825,
  "training_loss": 0.002090123803749157,
  "training_worker_0_memory_MB": 60713.25,
  "training_gpu_0_memory_MB": 988.65087890625,
  "validation_accuracy": 0.9256,
  "validation_loss": 0.3573515321525514,
  "best_validation_accuracy": 0.936,
  "best_validation_loss": 0.1849305447855383
}
2023-05-06 19:33:35,492 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/87/model.tar.gz


[32m[I 2023-05-06 19:33:39,080][0m Trial 87 finished with value: 0.936 and parameters: {'embedding_dim': 149, 'max_filter_size': 5, 'num_filters': 55}. Best is trial 87 with value: 0.936.[0m


2023-05-06 19:33:39,214 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:33:39,216 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:33:39,219 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:33:39,225 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:33:39,225 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:33:39,227 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:33:39,228 - INFO - allennlp.common.params - type = default
2023-05-06 19:33:39,232 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:33:39,234 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:33:39,235 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:33:39,238 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:33:39,241 - INFO - all

loading instances: 40000it [01:44, 384.08it/s]

2023-05-06 19:35:23,452 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:35:23,459 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:35:23,461 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:35:23,463 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:35:23,464 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:35:23,465 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:35:23,466 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:35:23,467 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:35:23,468 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:35:23,469 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:10, 497.36it/s]

2023-05-06 19:35:33,529 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:35:33,531 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:35:33,535 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:35:33,537 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:35:33,538 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:35:33,542 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:35:33,543 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:35:33,544 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:35:33,545 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:35:33,546 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:35:33,547 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:06, 5804.70it/s]


2023-05-06 19:35:40,759 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:35:40,761 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:35:40,765 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:35:40,767 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:35:40,769 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 99
2023-05-06 19:35:40,770 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:35:40,772 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:35:40,773 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:35:40,774 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:35:46,873 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:35:46,876 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:35:46,881 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9093, batch_loss: 0.0743, loss: 0.2440 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.02it/s]

2023-05-06 19:36:07,242 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:36:07,254 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:36:07,256 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:36:07,261 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9248, batch_loss: 0.0270, loss: 0.1929 ||: 100%|##########| 157/157 [00:00<00:00, 173.38it/s]

2023-05-06 19:36:08,155 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:36:08,157 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.925
2023-05-06 19:36:08,160 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   988.651  |       N/A
2023-05-06 19:36:08,162 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.244  |     0.193
2023-05-06 19:36:08,163 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61258.645  |       N/A





2023-05-06 19:36:08,644 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.895500
2023-05-06 19:36:08,648 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:12
2023-05-06 19:36:08,650 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:36:08,655 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 60G
2023-05-06 19:36:08,657 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 722M
2023-05-06 19:36:08,659 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9476, batch_loss: 0.1952, loss: 0.1351 ||: 100%|##########| 1250/1250 [00:20<00:00, 61.90it/s]

2023-05-06 19:36:28,859 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9330, batch_loss: 0.0158, loss: 0.1787 ||: 100%|##########| 157/157 [00:00<00:00, 172.20it/s]

2023-05-06 19:36:29,781 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:36:29,786 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.948  |     0.933
2023-05-06 19:36:29,787 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   721.745  |       N/A
2023-05-06 19:36:29,792 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.135  |     0.179
2023-05-06 19:36:29,794 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61296.363  |       N/A





2023-05-06 19:36:30,298 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.647787
2023-05-06 19:36:30,302 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:52
2023-05-06 19:36:30,304 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:36:30,306 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 60G
2023-05-06 19:36:30,308 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 722M
2023-05-06 19:36:30,309 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9799, batch_loss: 0.0014, loss: 0.0570 ||: 100%|##########| 1250/1250 [00:19<00:00, 63.22it/s]

2023-05-06 19:36:50,089 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9316, batch_loss: 0.0022, loss: 0.2418 ||: 100%|##########| 157/157 [00:00<00:00, 185.07it/s]

2023-05-06 19:36:50,949 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:36:50,952 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.980  |     0.932
2023-05-06 19:36:50,956 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   721.870  |       N/A
2023-05-06 19:36:50,959 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.057  |     0.242
2023-05-06 19:36:50,960 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61296.363  |       N/A





2023-05-06 19:36:51,527 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.222909
2023-05-06 19:36:51,529 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:29
2023-05-06 19:36:51,531 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:36:51,532 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 60G
2023-05-06 19:36:51,534 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 722M
2023-05-06 19:36:51,536 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9964, batch_loss: 0.0198, loss: 0.0144 ||: 100%|##########| 1250/1250 [00:20<00:00, 62.36it/s]

2023-05-06 19:37:11,585 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9282, batch_loss: 0.0016, loss: 0.2763 ||: 100%|##########| 157/157 [00:00<00:00, 181.56it/s]

2023-05-06 19:37:12,458 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:37:12,460 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.928
2023-05-06 19:37:12,461 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   721.686  |       N/A
2023-05-06 19:37:12,463 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.014  |     0.276
2023-05-06 19:37:12,465 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61296.363  |       N/A





2023-05-06 19:37:12,993 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.462150
2023-05-06 19:37:12,994 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:08
2023-05-06 19:37:13,002 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:37:13,004 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 60G
2023-05-06 19:37:13,008 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 722M
2023-05-06 19:37:13,010 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0012, loss: 0.0027 ||: 100%|##########| 1250/1250 [00:19<00:00, 62.83it/s]

2023-05-06 19:37:32,913 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9294, batch_loss: 0.0016, loss: 0.3242 ||: 100%|##########| 157/157 [00:00<00:00, 182.67it/s]

2023-05-06 19:37:33,793 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:37:33,795 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.929
2023-05-06 19:37:33,797 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   721.855  |       N/A
2023-05-06 19:37:33,799 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.324
2023-05-06 19:37:33,801 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61296.363  |       N/A





2023-05-06 19:37:34,327 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.324699
2023-05-06 19:37:34,331 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:37:34,383 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 61296.36328125,
  "peak_gpu_0_memory_MB": 988.65087890625,
  "training_duration": "0:01:47.043678",
  "epoch": 4,
  "training_accuracy": 0.999725,
  "training_loss": 0.002682677027447062,
  "training_worker_0_memory_MB": 61296.36328125,
  "training_gpu_0_memory_MB": 721.85498046875,
  "validation_accuracy": 0.9294,
  "validation_loss": 0.3242379550260023,
  "best_validation_accuracy": 0.933,
  "best_validation_loss": 0.17865513825112847
}
2023-05-06 19:37:34,385 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/88/model.tar.gz


[32m[I 2023-05-06 19:37:36,541][0m Trial 88 finished with value: 0.933 and parameters: {'embedding_dim': 99, 'max_filter_size': 5, 'num_filters': 54}. Best is trial 87 with value: 0.936.[0m


2023-05-06 19:37:36,705 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:37:36,707 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:37:36,714 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:37:36,715 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:37:36,717 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:37:36,722 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:37:36,723 - INFO - allennlp.common.params - type = default
2023-05-06 19:37:36,725 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:37:36,730 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:37:36,731 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:37:36,732 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:37:36,733 - INFO - all

loading instances: 40000it [01:44, 384.44it/s]

2023-05-06 19:39:20,827 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:39:20,832 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:39:20,837 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:39:20,838 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:39:20,840 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:39:20,841 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None





2023-05-06 19:39:20,844 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:39:20,846 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:39:20,847 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:39:20,849 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 19:39:20,852 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 19:39:20,854 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:12, 396.03it/s]

2023-05-06 19:39:33,484 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:39:33,486 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:39:33,488 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:39:33,489 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:39:33,491 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:39:33,494 - INFO - allennlp.common.params - pretrained_files = None





2023-05-06 19:39:33,501 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:39:33,502 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:39:33,503 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:39:33,504 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:39:33,505 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19:39:33,507 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:04, 8548.79it/s]


2023-05-06 19:39:38,387 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:39:38,388 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:39:38,395 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:39:38,399 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:39:38,403 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 150
2023-05-06 19:39:38,405 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:39:38,407 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:39:38,410 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:39:38,411 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:39:44,002 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:39:44,003 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:39:44,010 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9019, batch_loss: 0.1011, loss: 0.2589 ||: 100%|##########| 1250/1250 [00:17<00:00, 69.54it/s]

2023-05-06 19:40:01,814 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:40:01,824 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:40:01,828 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:40:01,835 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9162, batch_loss: 0.0604, loss: 0.2015 ||: 100%|##########| 157/157 [00:00<00:00, 201.96it/s]

2023-05-06 19:40:02,607 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:40:02,608 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.902  |     0.916
2023-05-06 19:40:02,609 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   721.923  |       N/A
2023-05-06 19:40:02,610 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.259  |     0.201
2023-05-06 19:40:02,612 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61898.547  |       N/A





2023-05-06 19:40:03,385 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.557747
2023-05-06 19:40:03,394 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:48
2023-05-06 19:40:03,396 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:40:03,397 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 61G
2023-05-06 19:40:03,404 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 865M
2023-05-06 19:40:03,405 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9381, batch_loss: 0.2191, loss: 0.1585 ||: 100%|##########| 1250/1250 [00:17<00:00, 70.32it/s]

2023-05-06 19:40:21,186 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9246, batch_loss: 0.0205, loss: 0.1941 ||: 100%|##########| 157/157 [00:00<00:00, 222.19it/s]

2023-05-06 19:40:21,900 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:40:21,904 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.938  |     0.925





2023-05-06 19:40:21,907 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   864.809  |       N/A
2023-05-06 19:40:21,911 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.158  |     0.194
2023-05-06 19:40:21,916 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61955.109  |       N/A
2023-05-06 19:40:22,757 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.361676
2023-05-06 19:40:22,773 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:32
2023-05-06 19:40:22,775 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:40:22,778 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 61G
2023-05-06 19:40:22,781 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 865M
2023-05-06 19:40:22,785 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9670, batch_loss: 0.0064, loss: 0.0903 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.54it/s]

2023-05-06 19:40:40,265 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9282, batch_loss: 0.0124, loss: 0.2092 ||: 100%|##########| 157/157 [00:00<00:00, 228.94it/s]

2023-05-06 19:40:40,956 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:40:40,962 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.967  |     0.928
2023-05-06 19:40:40,963 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   864.809  |       N/A
2023-05-06 19:40:40,965 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.090  |     0.209
2023-05-06 19:40:40,966 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61955.109  |       N/A





2023-05-06 19:40:41,770 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.995028
2023-05-06 19:40:41,771 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:13
2023-05-06 19:40:41,775 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:40:41,777 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 61G
2023-05-06 19:40:41,780 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 865M
2023-05-06 19:40:41,782 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9868, batch_loss: 0.0275, loss: 0.0394 ||: 100%|##########| 1250/1250 [00:17<00:00, 72.39it/s]

2023-05-06 19:40:59,061 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9180, batch_loss: 0.0080, loss: 0.2447 ||: 100%|##########| 157/157 [00:00<00:00, 230.86it/s]

2023-05-06 19:40:59,749 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:40:59,750 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.987  |     0.918
2023-05-06 19:40:59,752 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   864.809  |       N/A
2023-05-06 19:40:59,754 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.039  |     0.245
2023-05-06 19:40:59,755 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61955.109  |       N/A





2023-05-06 19:41:00,490 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:18.714272
2023-05-06 19:41:00,491 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:01:53
2023-05-06 19:41:00,495 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:41:00,497 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 61G
2023-05-06 19:41:00,499 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 865M
2023-05-06 19:41:00,501 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9972, batch_loss: 0.0044, loss: 0.0122 ||: 100%|##########| 1250/1250 [00:17<00:00, 71.85it/s]

2023-05-06 19:41:17,904 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9098, batch_loss: 0.0202, loss: 0.3104 ||: 100%|##########| 157/157 [00:00<00:00, 215.99it/s]


2023-05-06 19:41:18,640 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:41:18,644 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.910
2023-05-06 19:41:18,645 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   864.809  |       N/A
2023-05-06 19:41:18,647 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.012  |     0.310
2023-05-06 19:41:18,649 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  61955.109  |       N/A
2023-05-06 19:41:19,504 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:19.008595
2023-05-06 19:41:19,505 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:41:19,579 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 61955.109375,
  "peak_gpu_0_memory_MB": 864.8

[32m[I 2023-05-06 19:41:22,633][0m Trial 89 finished with value: 0.9246 and parameters: {'embedding_dim': 150, 'max_filter_size': 3, 'num_filters': 37}. Best is trial 87 with value: 0.936.[0m


2023-05-06 19:41:22,773 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:41:22,774 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:41:22,781 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:41:22,785 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:41:22,787 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:41:22,791 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:41:22,793 - INFO - allennlp.common.params - type = default
2023-05-06 19:41:22,796 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:41:22,799 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:41:22,801 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:41:22,802 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:41:22,805 - INFO - all

loading instances: 40000it [01:35, 421.00it/s]


2023-05-06 19:42:59,808 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:42:59,810 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:42:59,812 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:42:59,814 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:42:59,815 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:42:59,817 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:42:59,819 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:42:59,820 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:42:59,821 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:42:59,822 - INFO - allennlp.common.params - validation_data_loader.cuda_de

loading instances: 5000it [00:14, 333.33it/s]

2023-05-06 19:43:14,828 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:43:14,831 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:43:14,832 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:43:14,835 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:43:14,836 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:43:14,839 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:43:14,841 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:43:14,843 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:43:14,845 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:43:14,847 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:43:14,849 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@





2023-05-06 19:43:14,851 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:04, 8907.48it/s]


2023-05-06 19:43:19,514 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:43:19,515 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:43:19,519 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:43:19,521 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:43:19,523 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 127
2023-05-06 19:43:19,525 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:43:19,527 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:43:19,529 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:43:19,530 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:43:25,085 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:43:25,087 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:43:25,093 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9076, batch_loss: 0.0778, loss: 0.2452 ||: 100%|##########| 1250/1250 [00:23<00:00, 53.35it/s]

2023-05-06 19:43:48,414 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:43:48,429 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:43:48,431 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:43:48,439 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9220, batch_loss: 0.0588, loss: 0.1944 ||: 100%|##########| 157/157 [00:01<00:00, 146.06it/s]

2023-05-06 19:43:49,494 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:43:49,498 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.922
2023-05-06 19:43:49,500 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   864.809  |       N/A
2023-05-06 19:43:49,503 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.245  |     0.194
2023-05-06 19:43:49,505 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  62513.570  |       N/A





2023-05-06 19:43:50,230 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.256459
2023-05-06 19:43:50,234 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:40
2023-05-06 19:43:50,238 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:43:50,239 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 61G
2023-05-06 19:43:50,244 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 896M
2023-05-06 19:43:50,246 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9490, batch_loss: 0.2302, loss: 0.1344 ||: 100%|##########| 1250/1250 [00:23<00:00, 54.17it/s]

2023-05-06 19:44:13,326 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9290, batch_loss: 0.0182, loss: 0.1844 ||: 100%|##########| 157/157 [00:01<00:00, 134.34it/s]

2023-05-06 19:44:14,505 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:44:14,507 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.929
2023-05-06 19:44:14,509 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   896.205  |       N/A
2023-05-06 19:44:14,510 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.134  |     0.184
2023-05-06 19:44:14,512 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  62561.902  |       N/A





2023-05-06 19:44:15,284 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.045824
2023-05-06 19:44:15,288 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:18
2023-05-06 19:44:15,289 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:44:15,292 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 61G
2023-05-06 19:44:15,294 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 896M
2023-05-06 19:44:15,297 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9814, batch_loss: 0.0041, loss: 0.0542 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.31it/s]

2023-05-06 19:44:37,904 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0039, loss: 0.2442 ||: 100%|##########| 157/157 [00:01<00:00, 138.17it/s]

2023-05-06 19:44:39,050 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:44:39,052 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.926
2023-05-06 19:44:39,054 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   896.330  |       N/A
2023-05-06 19:44:39,056 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.054  |     0.244
2023-05-06 19:44:39,059 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  62561.902  |       N/A





2023-05-06 19:44:39,825 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.535618
2023-05-06 19:44:39,827 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:52
2023-05-06 19:44:39,829 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:44:39,830 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 61G
2023-05-06 19:44:39,833 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 896M
2023-05-06 19:44:39,835 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9967, batch_loss: 0.0460, loss: 0.0132 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.52it/s]

2023-05-06 19:45:02,771 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9248, batch_loss: 0.0011, loss: 0.3081 ||: 100%|##########| 157/157 [00:00<00:00, 170.39it/s]

2023-05-06 19:45:03,701 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:45:03,702 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.925
2023-05-06 19:45:03,707 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   896.146  |       N/A
2023-05-06 19:45:03,709 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.013  |     0.308
2023-05-06 19:45:03,711 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  62561.902  |       N/A





2023-05-06 19:45:04,330 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.500783
2023-05-06 19:45:04,331 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:28
2023-05-06 19:45:04,339 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:45:04,340 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 61G
2023-05-06 19:45:04,342 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 896M
2023-05-06 19:45:04,348 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0005, loss: 0.0026 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.57it/s]

2023-05-06 19:45:27,257 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9232, batch_loss: 0.0003, loss: 0.3618 ||: 100%|##########| 157/157 [00:00<00:00, 168.44it/s]

2023-05-06 19:45:28,195 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 19:45:28,202 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.923
2023-05-06 19:45:28,206 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   896.315  |       N/A
2023-05-06 19:45:28,208 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.362
2023-05-06 19:45:28,209 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  62561.902  |       N/A
2023-05-06 19:45:28,855 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.516082
2023-05-06 19:45:28,856 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:45:28,917 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 62561.90234375,
  "peak_gpu_0_memory_MB": 896.32958984375,
  "training_duration": "0:02:03.221667",
  "epoch": 4,
  "training_accuracy": 0.999575,
  "training_loss": 0.0

[32m[I 2023-05-06 19:45:31,558][0m Trial 90 finished with value: 0.929 and parameters: {'embedding_dim': 127, 'max_filter_size': 5, 'num_filters': 64}. Best is trial 87 with value: 0.936.[0m


2023-05-06 19:45:31,686 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:45:31,688 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:45:31,693 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:45:31,694 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:45:31,696 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:45:31,701 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:45:31,707 - INFO - allennlp.common.params - type = default
2023-05-06 19:45:31,708 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:45:31,711 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:45:31,712 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:45:31,713 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:45:31,714 - INFO - all

loading instances: 40000it [01:38, 406.68it/s]

2023-05-06 19:47:10,111 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:47:10,121 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:47:10,123 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:47:10,124 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:47:10,126 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:47:10,128 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:47:10,130 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:47:10,131 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:47:10,132 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:47:10,133 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:10, 471.84it/s]

2023-05-06 19:47:20,743 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:47:20,745 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:47:20,747 - INFO - allennlp.common.params - min_count = None





2023-05-06 19:47:20,752 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:47:20,753 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:47:20,758 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:47:20,759 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:47:20,760 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:47:20,761 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:47:20,762 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:47:20,763 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19:47:20,764 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:06, 6460.39it/s]


2023-05-06 19:47:27,127 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:47:27,129 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:47:27,136 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:47:27,141 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:47:27,143 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 122
2023-05-06 19:47:27,146 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:47:27,147 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:47:27,149 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:47:27,150 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:47:32,588 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:47:32,590 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:47:32,595 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9083, batch_loss: 0.0665, loss: 0.2429 ||: 100%|##########| 1250/1250 [00:22<00:00, 54.43it/s]

2023-05-06 19:47:55,447 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:47:55,461 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:47:55,465 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:47:55,472 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9238, batch_loss: 0.0431, loss: 0.1934 ||: 100%|##########| 157/157 [00:00<00:00, 162.83it/s]

2023-05-06 19:47:56,420 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:47:56,421 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.924
2023-05-06 19:47:56,426 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   896.383  |       N/A
2023-05-06 19:47:56,427 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.243  |     0.193
2023-05-06 19:47:56,429 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63139.957  |       N/A





2023-05-06 19:47:57,118 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.646521
2023-05-06 19:47:57,119 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:35
2023-05-06 19:47:57,121 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:47:57,124 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 62G
2023-05-06 19:47:57,125 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 849M
2023-05-06 19:47:57,129 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9492, batch_loss: 0.2125, loss: 0.1323 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.44it/s]

2023-05-06 19:48:19,682 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9320, batch_loss: 0.0208, loss: 0.1846 ||: 100%|##########| 157/157 [00:01<00:00, 143.79it/s]

2023-05-06 19:48:20,782 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:48:20,784 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.932
2023-05-06 19:48:20,787 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   849.223  |       N/A
2023-05-06 19:48:20,788 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.132  |     0.185





2023-05-06 19:48:20,790 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63186.582  |       N/A
2023-05-06 19:48:21,546 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.425344
2023-05-06 19:48:21,550 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:13
2023-05-06 19:48:21,551 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:48:21,554 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 62G
2023-05-06 19:48:21,556 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 849M
2023-05-06 19:48:21,560 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9815, batch_loss: 0.0038, loss: 0.0534 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.49it/s]

2023-05-06 19:48:43,706 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9230, batch_loss: 0.0271, loss: 0.2408 ||: 100%|##########| 157/157 [00:01<00:00, 145.23it/s]

2023-05-06 19:48:44,798 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:48:44,800 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.923
2023-05-06 19:48:44,803 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   849.223  |       N/A
2023-05-06 19:48:44,804 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.053  |     0.241
2023-05-06 19:48:44,807 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63186.582  |       N/A





2023-05-06 19:48:45,560 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.008610
2023-05-06 19:48:45,564 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:48
2023-05-06 19:48:45,565 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:48:45,567 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 62G
2023-05-06 19:48:45,575 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 849M
2023-05-06 19:48:45,576 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9968, batch_loss: 0.0143, loss: 0.0128 ||: 100%|##########| 1250/1250 [00:22<00:00, 56.50it/s]

2023-05-06 19:49:07,710 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9228, batch_loss: 0.0360, loss: 0.3169 ||: 100%|##########| 157/157 [00:01<00:00, 128.41it/s]

2023-05-06 19:49:08,942 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:49:08,944 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.923
2023-05-06 19:49:08,945 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   849.223  |       N/A
2023-05-06 19:49:08,947 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.013  |     0.317
2023-05-06 19:49:08,949 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63186.582  |       N/A





2023-05-06 19:49:09,683 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.118572
2023-05-06 19:49:09,686 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:24
2023-05-06 19:49:09,687 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:49:09,689 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 62G
2023-05-06 19:49:09,693 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 849M
2023-05-06 19:49:09,695 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0006, loss: 0.0022 ||: 100%|##########| 1250/1250 [00:22<00:00, 55.82it/s]

2023-05-06 19:49:32,096 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9206, batch_loss: 0.0616, loss: 0.3682 ||: 100%|##########| 157/157 [00:01<00:00, 142.17it/s]

2023-05-06 19:49:33,210 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:49:33,212 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.921
2023-05-06 19:49:33,213 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   849.223  |       N/A
2023-05-06 19:49:33,214 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.368
2023-05-06 19:49:33,217 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63186.582  |       N/A





2023-05-06 19:49:33,984 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:24.296187
2023-05-06 19:49:33,985 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:49:34,048 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 63186.58203125,
  "peak_gpu_0_memory_MB": 896.3828125,
  "training_duration": "0:02:00.738405",
  "epoch": 4,
  "training_accuracy": 0.999775,
  "training_loss": 0.002245872289084946,
  "training_worker_0_memory_MB": 63186.58203125,
  "training_gpu_0_memory_MB": 849.22314453125,
  "validation_accuracy": 0.9206,
  "validation_loss": 0.368206497139422,
  "best_validation_accuracy": 0.932,
  "best_validation_loss": 0.18457842283662718
}
2023-05-06 19:49:34,051 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/91/model.tar.gz


[32m[I 2023-05-06 19:49:36,650][0m Trial 91 finished with value: 0.932 and parameters: {'embedding_dim': 122, 'max_filter_size': 5, 'num_filters': 58}. Best is trial 87 with value: 0.936.[0m


2023-05-06 19:49:36,773 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:49:36,775 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:49:36,778 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:49:36,780 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:49:36,784 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:49:36,787 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:49:36,791 - INFO - allennlp.common.params - type = default
2023-05-06 19:49:36,795 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:49:36,797 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:49:36,798 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:49:36,799 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:49:36,800 - INFO - all

loading instances: 40000it [01:37, 410.09it/s]

2023-05-06 19:51:14,382 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:51:14,385 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:51:14,389 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:51:14,391 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:51:14,395 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:51:14,397 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:51:14,398 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:51:14,400 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:51:14,401 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 19:51:14,402 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:10, 496.59it/s]

2023-05-06 19:51:24,482 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:51:24,484 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:51:24,486 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:51:24,489 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:51:24,491 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:51:24,494 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:51:24,495 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:51:24,497 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:51:24,499 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:51:24,500 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:51:24,503 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:06, 6109.43it/s]


2023-05-06 19:51:31,265 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:51:31,269 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:51:31,271 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:51:31,275 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:51:31,276 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 132
2023-05-06 19:51:31,278 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:51:31,279 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:51:31,281 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:51:31,282 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:51:36,910 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:51:36,912 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:51:36,918 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9090, batch_loss: 0.0708, loss: 0.2442 ||: 100%|##########| 1250/1250 [00:24<00:00, 50.55it/s]

2023-05-06 19:52:01,527 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:52:01,540 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:52:01,542 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:52:01,547 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9214, batch_loss: 0.0519, loss: 0.1941 ||: 100%|##########| 157/157 [00:01<00:00, 151.01it/s]

2023-05-06 19:52:02,574 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:52:02,576 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.909  |     0.921
2023-05-06 19:52:02,578 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   849.223  |       N/A
2023-05-06 19:52:02,580 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.244  |     0.194
2023-05-06 19:52:02,581 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63788.422  |       N/A





2023-05-06 19:52:03,414 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.623394
2023-05-06 19:52:03,416 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:52
2023-05-06 19:52:03,419 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:52:03,420 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 62G
2023-05-06 19:52:03,421 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 876M
2023-05-06 19:52:03,423 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9488, batch_loss: 0.2210, loss: 0.1338 ||: 100%|##########| 1250/1250 [00:24<00:00, 51.63it/s]

2023-05-06 19:52:27,641 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9312, batch_loss: 0.0236, loss: 0.1843 ||: 100%|##########| 157/157 [00:01<00:00, 123.56it/s]

2023-05-06 19:52:28,922 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:52:28,925 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.949  |     0.931
2023-05-06 19:52:28,927 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   875.542  |       N/A
2023-05-06 19:52:28,929 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.134  |     0.184
2023-05-06 19:52:28,932 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63838.371  |       N/A





2023-05-06 19:52:29,786 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.367896
2023-05-06 19:52:29,790 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:28
2023-05-06 19:52:29,791 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:52:29,794 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 62G
2023-05-06 19:52:29,796 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 876M
2023-05-06 19:52:29,797 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9807, batch_loss: 0.0041, loss: 0.0547 ||: 100%|##########| 1250/1250 [00:24<00:00, 51.54it/s]

2023-05-06 19:52:54,058 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9286, batch_loss: 0.0033, loss: 0.2452 ||: 100%|##########| 157/157 [00:01<00:00, 123.16it/s]

2023-05-06 19:52:55,341 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:52:55,343 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.981  |     0.929
2023-05-06 19:52:55,344 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   875.542  |       N/A
2023-05-06 19:52:55,347 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.055  |     0.245
2023-05-06 19:52:55,349 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63838.371  |       N/A





2023-05-06 19:52:56,255 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.464234
2023-05-06 19:52:56,260 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:03
2023-05-06 19:52:56,262 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:52:56,263 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 62G
2023-05-06 19:52:56,266 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 876M
2023-05-06 19:52:56,268 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9968, batch_loss: 0.0553, loss: 0.0131 ||: 100%|##########| 1250/1250 [00:23<00:00, 52.32it/s]

2023-05-06 19:53:20,166 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9262, batch_loss: 0.0004, loss: 0.3021 ||: 100%|##########| 157/157 [00:00<00:00, 169.23it/s]

2023-05-06 19:53:21,109 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:53:21,110 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.926
2023-05-06 19:53:21,115 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   875.542  |       N/A
2023-05-06 19:53:21,116 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.013  |     0.302
2023-05-06 19:53:21,118 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63838.371  |       N/A





2023-05-06 19:53:21,778 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.516395
2023-05-06 19:53:21,780 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:36
2023-05-06 19:53:21,788 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:53:21,791 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 62G
2023-05-06 19:53:21,793 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 876M
2023-05-06 19:53:21,795 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0003, loss: 0.0023 ||: 100%|##########| 1250/1250 [00:24<00:00, 51.93it/s]

2023-05-06 19:53:45,875 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9248, batch_loss: 0.0001, loss: 0.3568 ||: 100%|##########| 157/157 [00:00<00:00, 173.54it/s]

2023-05-06 19:53:46,788 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:53:46,789 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 19:53:46,790 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   875.542  |       N/A
2023-05-06 19:53:46,791 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.357
2023-05-06 19:53:46,792 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  63838.371  |       N/A





2023-05-06 19:53:47,506 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:25.718714
2023-05-06 19:53:47,508 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:53:47,576 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 63838.37109375,
  "peak_gpu_0_memory_MB": 875.5419921875,
  "training_duration": "0:02:09.997164",
  "epoch": 4,
  "training_accuracy": 0.999825,
  "training_loss": 0.0022934919555846135,
  "training_worker_0_memory_MB": 63838.37109375,
  "training_gpu_0_memory_MB": 875.5419921875,
  "validation_accuracy": 0.9248,
  "validation_loss": 0.35683306881753435,
  "best_validation_accuracy": 0.9312,
  "best_validation_loss": 0.18426247880716992
}
2023-05-06 19:53:47,578 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/92/model.tar.gz


[32m[I 2023-05-06 19:53:50,777][0m Trial 92 finished with value: 0.9312 and parameters: {'embedding_dim': 132, 'max_filter_size': 5, 'num_filters': 48}. Best is trial 87 with value: 0.936.[0m


2023-05-06 19:53:50,976 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:53:50,982 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:53:50,985 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:53:50,987 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:53:50,988 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:53:50,990 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:53:50,993 - INFO - allennlp.common.params - type = default
2023-05-06 19:53:50,996 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:53:51,000 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:53:51,001 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:53:51,002 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:53:51,004 - INFO - all

loading instances: 40000it [01:46, 375.29it/s]

2023-05-06 19:55:37,627 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 19:55:37,630 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 19:55:37,634 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 19:55:37,636 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 19:55:37,639 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 19:55:37,641 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 19:55:37,641 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 19:55:37,642 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 19:55:37,643 - INFO - allennlp.common.params - validation_data_loader.start_method = fork





2023-05-06 19:55:37,644 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
2023-05-06 19:55:37,645 - INFO - allennlp.common.params - validation_data_loader.quiet = False
2023-05-06 19:55:37,646 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f1035c88d60>


loading instances: 5000it [00:09, 512.94it/s]

2023-05-06 19:55:47,399 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 19:55:47,407 - INFO - allennlp.common.params - type = from_instances
2023-05-06 19:55:47,409 - INFO - allennlp.common.params - min_count = None
2023-05-06 19:55:47,410 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 19:55:47,412 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 19:55:47,415 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 19:55:47,417 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 19:55:47,419 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 19:55:47,422 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 19:55:47,423 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 19:55:47,425 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 19


building vocab: 40000it [00:06, 5788.25it/s]


2023-05-06 19:55:54,594 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 19:55:54,598 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 19:55:54,602 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 19:55:54,607 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 19:55:54,609 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 172
2023-05-06 19:55:54,611 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 19:55:54,611 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 19:55:54,615 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 19:55:54,616 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 19:56:01,758 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:56:01,760 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:56:01,767 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9101, batch_loss: 0.0657, loss: 0.2400 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.61it/s]

2023-05-06 19:56:35,778 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 19:56:35,797 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 19:56:35,799 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 19:56:35,806 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9238, batch_loss: 0.0283, loss: 0.1940 ||: 100%|##########| 157/157 [00:01<00:00, 99.35it/s] 

2023-05-06 19:56:37,366 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:56:37,367 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.924
2023-05-06 19:56:37,369 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   875.542  |       N/A
2023-05-06 19:56:37,371 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.240  |     0.194
2023-05-06 19:56:37,373 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  64445.902  |       N/A





2023-05-06 19:56:38,521 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.898905
2023-05-06 19:56:38,526 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:21
2023-05-06 19:56:38,530 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 19:56:38,532 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 63G
2023-05-06 19:56:38,535 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:56:38,537 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9504, batch_loss: 0.1780, loss: 0.1286 ||: 100%|##########| 1250/1250 [00:33<00:00, 37.38it/s]

2023-05-06 19:57:11,981 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9322, batch_loss: 0.0084, loss: 0.1873 ||: 100%|##########| 157/157 [00:01<00:00, 116.48it/s]

2023-05-06 19:57:13,336 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:57:13,337 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.932
2023-05-06 19:57:13,339 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1167.089  |       N/A
2023-05-06 19:57:13,341 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.129  |     0.187
2023-05-06 19:57:13,342 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  64511.234  |       N/A





2023-05-06 19:57:14,318 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.787478
2023-05-06 19:57:14,323 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:46
2023-05-06 19:57:14,325 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 19:57:14,327 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 63G
2023-05-06 19:57:14,329 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:57:14,331 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9846, batch_loss: 0.0033, loss: 0.0463 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.56it/s]

2023-05-06 19:57:48,535 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9272, batch_loss: 0.0130, loss: 0.2435 ||: 100%|##########| 157/157 [00:01<00:00, 120.70it/s]

2023-05-06 19:57:49,845 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:57:49,847 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.985  |     0.927
2023-05-06 19:57:49,857 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1167.089  |       N/A
2023-05-06 19:57:49,858 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.046  |     0.244
2023-05-06 19:57:49,859 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  64511.234  |       N/A





2023-05-06 19:57:50,827 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.502339
2023-05-06 19:57:50,829 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:12
2023-05-06 19:57:50,832 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 19:57:50,834 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 63G
2023-05-06 19:57:50,836 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 19:57:50,838 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9970, batch_loss: 0.0337, loss: 0.0106 ||: 100%|##########| 1250/1250 [00:33<00:00, 36.78it/s]

2023-05-06 19:58:24,826 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9270, batch_loss: 0.0005, loss: 0.3033 ||: 100%|##########| 157/157 [00:01<00:00, 121.60it/s]

2023-05-06 19:58:26,130 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation





2023-05-06 19:58:26,132 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.927
2023-05-06 19:58:26,135 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1167.089  |       N/A
2023-05-06 19:58:26,136 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.303
2023-05-06 19:58:26,137 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  64511.234  |       N/A
2023-05-06 19:58:26,994 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.161953
2023-05-06 19:58:26,995 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:36
2023-05-06 19:58:27,000 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 19:58:27,002 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 63G
2023-05-06 19:58:27,004 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memo

accuracy: 0.9999, batch_loss: 0.0053, loss: 0.0016 ||: 100%|##########| 1250/1250 [00:34<00:00, 36.71it/s]

2023-05-06 19:59:01,065 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9250, batch_loss: 0.0061, loss: 0.3391 ||: 100%|##########| 157/157 [00:01<00:00, 100.70it/s]

2023-05-06 19:59:02,636 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 19:59:02,638 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 19:59:02,639 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1167.089  |       N/A
2023-05-06 19:59:02,640 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.339
2023-05-06 19:59:02,643 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  64511.234  |       N/A





2023-05-06 19:59:03,745 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:36.744389
2023-05-06 19:59:03,749 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 19:59:03,838 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 64511.234375,
  "peak_gpu_0_memory_MB": 1167.0888671875,
  "training_duration": "0:03:01.012751",
  "epoch": 4,
  "training_accuracy": 0.9999,
  "training_loss": 0.0016115250857757929,
  "training_worker_0_memory_MB": 64511.234375,
  "training_gpu_0_memory_MB": 1167.0888671875,
  "validation_accuracy": 0.925,
  "validation_loss": 0.33911533179325143,
  "best_validation_accuracy": 0.9322,
  "best_validation_loss": 0.18729927242514055
}
2023-05-06 19:59:03,844 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/93/model.tar.gz


[32m[I 2023-05-06 19:59:07,510][0m Trial 93 finished with value: 0.9322 and parameters: {'embedding_dim': 172, 'max_filter_size': 5, 'num_filters': 72}. Best is trial 87 with value: 0.936.[0m


2023-05-06 19:59:07,644 - INFO - allennlp.common.params - evaluation = None
2023-05-06 19:59:07,646 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 19:59:07,649 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 19:59:07,651 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 19:59:07,652 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 19:59:07,654 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 19:59:07,657 - INFO - allennlp.common.params - type = default
2023-05-06 19:59:07,658 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 19:59:07,660 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 19:59:07,661 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 19:59:07,662 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 19:59:07,664 - INFO - all

loading instances: 40000it [01:45, 377.63it/s]

2023-05-06 20:00:53,644 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 20:00:53,650 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 20:00:53,651 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 20:00:53,654 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 20:00:53,655 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 20:00:53,657 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 20:00:53,659 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 20:00:53,662 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 20:00:53,665 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 20:00:53,667 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:13, 376.04it/s]

2023-05-06 20:01:06,976 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 20:01:06,978 - INFO - allennlp.common.params - type = from_instances
2023-05-06 20:01:06,980 - INFO - allennlp.common.params - min_count = None
2023-05-06 20:01:06,983 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 20:01:06,984 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 20:01:06,988 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 20:01:06,989 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 20:01:06,990 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 20:01:06,992 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 20:01:06,993 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 20:01:06,994 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 20


building vocab: 40000it [00:05, 7827.98it/s]


2023-05-06 20:01:12,310 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 20:01:12,313 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 20:01:12,323 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 20:01:12,325 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 20:01:12,327 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 148
2023-05-06 20:01:12,329 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 20:01:12,331 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 20:01:12,332 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 20:01:12,334 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 20:01:18,178 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:01:18,181 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:01:18,187 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9110, batch_loss: 0.0523, loss: 0.2387 ||: 100%|##########| 1250/1250 [00:33<00:00, 37.61it/s]

2023-05-06 20:01:51,299 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 20:01:51,320 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:01:51,321 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:01:51,328 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9242, batch_loss: 0.0250, loss: 0.1979 ||: 100%|##########| 157/157 [00:01<00:00, 115.50it/s]

2023-05-06 20:01:52,669 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:01:52,673 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.924
2023-05-06 20:01:52,674 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1167.089  |       N/A
2023-05-06 20:01:52,677 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.239  |     0.198
2023-05-06 20:01:52,680 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65033.977  |       N/A





2023-05-06 20:01:53,449 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.394893
2023-05-06 20:01:53,456 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:11
2023-05-06 20:01:53,458 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 20:01:53,460 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 64G
2023-05-06 20:01:53,461 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:01:53,467 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9516, batch_loss: 0.1817, loss: 0.1259 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.71it/s]

2023-05-06 20:02:25,766 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9300, batch_loss: 0.0147, loss: 0.1834 ||: 100%|##########| 157/157 [00:01<00:00, 107.09it/s]

2023-05-06 20:02:27,243 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:02:27,244 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.952  |     0.930
2023-05-06 20:02:27,247 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.140  |       N/A
2023-05-06 20:02:27,249 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.126  |     0.183
2023-05-06 20:02:27,251 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65090.082  |       N/A





2023-05-06 20:02:28,278 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.820065
2023-05-06 20:02:28,283 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:36
2023-05-06 20:02:28,284 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 20:02:28,286 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 64G
2023-05-06 20:02:28,290 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:02:28,293 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9864, batch_loss: 0.0007, loss: 0.0412 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.83it/s]

2023-05-06 20:03:00,496 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0021, loss: 0.2755 ||: 100%|##########| 157/157 [00:01<00:00, 120.70it/s]

2023-05-06 20:03:01,806 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:03:01,809 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.986  |     0.926
2023-05-06 20:03:01,812 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.264  |       N/A
2023-05-06 20:03:01,812 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.041  |     0.275
2023-05-06 20:03:01,813 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65090.082  |       N/A





2023-05-06 20:03:02,675 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.390520
2023-05-06 20:03:02,677 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:02
2023-05-06 20:03:02,680 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 20:03:02,683 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 64G
2023-05-06 20:03:02,687 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:03:02,689 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9979, batch_loss: 0.0617, loss: 0.0085 ||: 100%|##########| 1250/1250 [00:33<00:00, 37.83it/s]

2023-05-06 20:03:35,738 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9244, batch_loss: 0.0004, loss: 0.3160 ||: 100%|##########| 157/157 [00:01<00:00, 122.55it/s]

2023-05-06 20:03:37,026 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:03:37,027 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.924





2023-05-06 20:03:37,034 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.080  |       N/A
2023-05-06 20:03:37,036 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.009  |     0.316
2023-05-06 20:03:37,040 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65090.082  |       N/A
2023-05-06 20:03:37,799 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:35.118864
2023-05-06 20:03:37,803 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:28
2023-05-06 20:03:37,807 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 20:03:37,809 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 64G
2023-05-06 20:03:37,813 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:03:37,817 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 1.0000, batch_loss: 0.0011, loss: 0.0013 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.78it/s]

2023-05-06 20:04:10,058 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0001, loss: 0.3679 ||: 100%|##########| 157/157 [00:01<00:00, 97.25it/s] 

2023-05-06 20:04:11,680 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:04:11,682 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.926
2023-05-06 20:04:11,686 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.250  |       N/A
2023-05-06 20:04:11,687 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.001  |     0.368
2023-05-06 20:04:11,689 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65090.082  |       N/A





2023-05-06 20:04:12,595 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.787590
2023-05-06 20:04:12,601 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 20:04:12,683 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 65090.08203125,
  "peak_gpu_0_memory_MB": 1167.0888671875,
  "training_duration": "0:02:53.626174",
  "epoch": 4,
  "training_accuracy": 1.0,
  "training_loss": 0.001251511860456958,
  "training_worker_0_memory_MB": 65090.08203125,
  "training_gpu_0_memory_MB": 1158.24951171875,
  "validation_accuracy": 0.926,
  "validation_loss": 0.3678944479520425,
  "best_validation_accuracy": 0.93,
  "best_validation_loss": 0.18337613119963247
}
2023-05-06 20:04:12,685 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/94/model.tar.gz


[32m[I 2023-05-06 20:04:16,189][0m Trial 94 finished with value: 0.93 and parameters: {'embedding_dim': 148, 'max_filter_size': 5, 'num_filters': 97}. Best is trial 87 with value: 0.936.[0m


2023-05-06 20:04:16,331 - INFO - allennlp.common.params - evaluation = None
2023-05-06 20:04:16,333 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 20:04:16,336 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 20:04:16,338 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 20:04:16,340 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 20:04:16,342 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 20:04:16,344 - INFO - allennlp.common.params - type = default
2023-05-06 20:04:16,346 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 20:04:16,348 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 20:04:16,349 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 20:04:16,350 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 20:04:16,351 - INFO - all

loading instances: 40000it [01:46, 375.05it/s]

2023-05-06 20:06:03,044 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 20:06:03,048 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 20:06:03,053 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 20:06:03,055 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 20:06:03,057 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 20:06:03,060 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 20:06:03,061 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 20:06:03,062 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 20:06:03,063 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 20:06:03,064 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:12, 412.89it/s]

2023-05-06 20:06:15,183 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 20:06:15,185 - INFO - allennlp.common.params - type = from_instances
2023-05-06 20:06:15,188 - INFO - allennlp.common.params - min_count = None
2023-05-06 20:06:15,190 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 20:06:15,192 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 20:06:15,193 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 20:06:15,195 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 20:06:15,197 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 20:06:15,199 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 20:06:15,201 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 20:06:15,202 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 20


building vocab: 40000it [00:05, 6700.32it/s]


2023-05-06 20:06:21,375 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 20:06:21,377 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 20:06:21,383 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 20:06:21,385 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 20:06:21,387 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 154
2023-05-06 20:06:21,388 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 20:06:21,393 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 20:06:21,394 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 20:06:21,396 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 20:06:27,073 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:06:27,076 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:06:27,082 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9079, batch_loss: 0.0602, loss: 0.2411 ||: 100%|##########| 1250/1250 [00:32<00:00, 38.32it/s]

2023-05-06 20:06:59,575 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 20:06:59,592 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:06:59,595 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:06:59,602 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9194, batch_loss: 0.0348, loss: 0.2029 ||: 100%|##########| 157/157 [00:01<00:00, 109.67it/s]

2023-05-06 20:07:01,015 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:07:01,017 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.908  |     0.919
2023-05-06 20:07:01,019 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1158.317  |       N/A
2023-05-06 20:07:01,020 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.203
2023-05-06 20:07:01,021 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65679.633  |       N/A





2023-05-06 20:07:01,786 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.843455
2023-05-06 20:07:01,791 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:06
2023-05-06 20:07:01,794 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 20:07:01,795 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 64G
2023-05-06 20:07:01,797 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:07:01,802 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9499, batch_loss: 0.1794, loss: 0.1305 ||: 100%|##########| 1250/1250 [00:31<00:00, 39.44it/s]

2023-05-06 20:07:33,505 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9310, batch_loss: 0.0079, loss: 0.1853 ||: 100%|##########| 157/157 [00:01<00:00, 116.33it/s]

2023-05-06 20:07:34,864 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:07:34,866 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.931
2023-05-06 20:07:34,868 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1088.259  |       N/A
2023-05-06 20:07:34,871 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.185
2023-05-06 20:07:34,873 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65738.461  |       N/A





2023-05-06 20:07:35,786 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:33.992618
2023-05-06 20:07:35,792 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:31
2023-05-06 20:07:35,794 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 20:07:35,796 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 64G
2023-05-06 20:07:35,806 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:07:35,811 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9841, batch_loss: 0.0013, loss: 0.0471 ||: 100%|##########| 1250/1250 [00:31<00:00, 39.07it/s]

2023-05-06 20:08:07,811 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0009, loss: 0.2611 ||: 100%|##########| 157/157 [00:01<00:00, 121.96it/s]

2023-05-06 20:08:09,110 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:08:09,111 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.984  |     0.926
2023-05-06 20:08:09,117 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1088.383  |       N/A
2023-05-06 20:08:09,119 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.047  |     0.261
2023-05-06 20:08:09,122 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65738.461  |       N/A





2023-05-06 20:08:09,899 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.104291
2023-05-06 20:08:09,900 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:58
2023-05-06 20:08:09,905 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 20:08:09,907 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 64G
2023-05-06 20:08:09,913 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:08:09,915 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9976, batch_loss: 0.0410, loss: 0.0098 ||: 100%|##########| 1250/1250 [00:31<00:00, 39.19it/s]

2023-05-06 20:08:41,818 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9246, batch_loss: 0.0041, loss: 0.3381 ||: 100%|##########| 157/157 [00:01<00:00, 108.75it/s]

2023-05-06 20:08:43,269 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:08:43,271 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.998  |     0.925





2023-05-06 20:08:43,273 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1088.199  |       N/A
2023-05-06 20:08:43,277 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.010  |     0.338
2023-05-06 20:08:43,280 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65738.461  |       N/A
2023-05-06 20:08:44,211 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.305584
2023-05-06 20:08:44,214 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:24
2023-05-06 20:08:44,219 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 20:08:44,220 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 64G
2023-05-06 20:08:44,223 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:08:44,225 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9998, batch_loss: 0.0008, loss: 0.0016 ||: 100%|##########| 1250/1250 [00:31<00:00, 39.29it/s]

2023-05-06 20:09:16,042 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9270, batch_loss: 0.0003, loss: 0.3893 ||: 100%|##########| 157/157 [00:01<00:00, 123.91it/s]

2023-05-06 20:09:17,319 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:09:17,321 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.927
2023-05-06 20:09:17,324 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1088.369  |       N/A





2023-05-06 20:09:17,326 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.389
2023-05-06 20:09:17,327 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  65738.461  |       N/A
2023-05-06 20:09:18,251 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:34.032051
2023-05-06 20:09:18,255 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 20:09:18,373 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 65738.4609375,
  "peak_gpu_0_memory_MB": 1158.3173828125,
  "training_duration": "0:02:50.375435",
  "epoch": 4,
  "training_accuracy": 0.999775,
  "training_loss": 0.0016287934250212857,
  "training_worker_0_memory_MB": 65738.4609375,
  "training_gpu_0_memory_MB": 1088.36865234375,
  "validation_accuracy": 0.927,
  "validation_loss": 0.38934677306604926,
  "best_validation_accuracy": 0.931,
  "best_validation

[32m[I 2023-05-06 20:09:22,606][0m Trial 95 finished with value: 0.931 and parameters: {'embedding_dim': 154, 'max_filter_size': 5, 'num_filters': 78}. Best is trial 87 with value: 0.936.[0m


2023-05-06 20:09:22,781 - INFO - allennlp.common.params - evaluation = None
2023-05-06 20:09:22,783 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 20:09:22,786 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 20:09:22,790 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 20:09:22,791 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 20:09:22,794 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 20:09:22,796 - INFO - allennlp.common.params - type = default
2023-05-06 20:09:22,798 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 20:09:22,801 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 20:09:22,802 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 20:09:22,804 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 20:09:22,806 - INFO - all

loading instances: 40000it [01:44, 382.33it/s]

2023-05-06 20:11:07,484 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 20:11:07,487 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 20:11:07,488 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 20:11:07,490 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 20:11:07,492 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 20:11:07,494 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 20:11:07,494 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 20:11:07,495 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 20:11:07,496 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 20:11:07,497 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:15, 319.42it/s]

2023-05-06 20:11:23,158 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 20:11:23,161 - INFO - allennlp.common.params - type = from_instances
2023-05-06 20:11:23,165 - INFO - allennlp.common.params - min_count = None
2023-05-06 20:11:23,168 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 20:11:23,171 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 20:11:23,172 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 20:11:23,173 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 20:11:23,174 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 20:11:23,175 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 20:11:23,180 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 20:11:23,181 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 20


building vocab: 40000it [00:04, 8281.77it/s]


2023-05-06 20:11:28,224 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 20:11:28,225 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 20:11:28,227 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 20:11:28,229 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 20:11:28,233 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 119
2023-05-06 20:11:28,235 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 20:11:28,237 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 20:11:28,239 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 20:11:28,241 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 20:11:35,649 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:11:35,651 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:11:35,657 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9103, batch_loss: 0.0738, loss: 0.2407 ||: 100%|##########| 1250/1250 [00:26<00:00, 46.43it/s]

2023-05-06 20:12:02,425 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 20:12:02,439 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:12:02,441 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:12:02,446 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9210, batch_loss: 0.0225, loss: 0.1930 ||: 100%|##########| 157/157 [00:01<00:00, 125.79it/s]

2023-05-06 20:12:03,679 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:12:03,681 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.921
2023-05-06 20:12:03,682 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1088.437  |       N/A
2023-05-06 20:12:03,688 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.193
2023-05-06 20:12:03,690 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66303.914  |       N/A





2023-05-06 20:12:04,282 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.790683
2023-05-06 20:12:04,291 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:13
2023-05-06 20:12:04,293 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 20:12:04,294 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 65G
2023-05-06 20:12:04,299 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 945M
2023-05-06 20:12:04,300 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9497, batch_loss: 0.1879, loss: 0.1303 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.43it/s]

2023-05-06 20:12:30,666 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9306, batch_loss: 0.0105, loss: 0.1842 ||: 100%|##########| 157/157 [00:01<00:00, 129.70it/s]

2023-05-06 20:12:31,887 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:12:31,889 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.931
2023-05-06 20:12:31,893 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   945.192  |       N/A
2023-05-06 20:12:31,894 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.184
2023-05-06 20:12:31,896 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66354.699  |       N/A





2023-05-06 20:12:32,662 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.369531
2023-05-06 20:12:32,666 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:45
2023-05-06 20:12:32,667 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 20:12:32,670 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 65G
2023-05-06 20:12:32,672 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 945M
2023-05-06 20:12:32,675 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9826, batch_loss: 0.0044, loss: 0.0507 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.05it/s]

2023-05-06 20:12:59,254 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9272, batch_loss: 0.0099, loss: 0.2420 ||: 100%|##########| 157/157 [00:01<00:00, 108.04it/s]

2023-05-06 20:13:00,717 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:13:00,719 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.927
2023-05-06 20:13:00,723 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   945.317  |       N/A
2023-05-06 20:13:00,724 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.051  |     0.242
2023-05-06 20:13:00,728 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66354.699  |       N/A





2023-05-06 20:13:01,518 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.851236
2023-05-06 20:13:01,523 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:18
2023-05-06 20:13:01,524 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 20:13:01,525 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 65G
2023-05-06 20:13:01,528 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 945M
2023-05-06 20:13:01,530 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9970, batch_loss: 0.0850, loss: 0.0117 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.60it/s]

2023-05-06 20:13:27,797 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9184, batch_loss: 0.0020, loss: 0.2910 ||: 100%|##########| 157/157 [00:01<00:00, 137.19it/s]

2023-05-06 20:13:28,953 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:13:28,955 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.918
2023-05-06 20:13:28,957 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   945.133  |       N/A
2023-05-06 20:13:28,958 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.012  |     0.291
2023-05-06 20:13:28,960 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66354.699  |       N/A





2023-05-06 20:13:29,567 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.043051
2023-05-06 20:13:29,568 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:50
2023-05-06 20:13:29,570 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 20:13:29,577 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 65G
2023-05-06 20:13:29,578 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 945M
2023-05-06 20:13:29,583 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9999, batch_loss: 0.0006, loss: 0.0018 ||: 100%|##########| 1250/1250 [00:26<00:00, 47.81it/s]

2023-05-06 20:13:55,739 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9270, batch_loss: 0.0020, loss: 0.3763 ||: 100%|##########| 157/157 [00:01<00:00, 130.21it/s]

2023-05-06 20:13:56,953 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:13:56,955 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.927
2023-05-06 20:13:56,956 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   945.302  |       N/A
2023-05-06 20:13:56,958 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.376
2023-05-06 20:13:56,961 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66354.699  |       N/A





2023-05-06 20:13:57,706 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:28.135290
2023-05-06 20:13:57,708 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 20:13:57,809 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 66354.69921875,
  "peak_gpu_0_memory_MB": 1088.4365234375,
  "training_duration": "0:02:21.460924",
  "epoch": 4,
  "training_accuracy": 0.999875,
  "training_loss": 0.0017552249595508328,
  "training_worker_0_memory_MB": 66354.69921875,
  "training_gpu_0_memory_MB": 945.30224609375,
  "validation_accuracy": 0.927,
  "validation_loss": 0.3762985068005599,
  "best_validation_accuracy": 0.9306,
  "best_validation_loss": 0.1841679867653615
}
2023-05-06 20:13:57,811 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/96/model.tar.gz


[32m[I 2023-05-06 20:14:01,307][0m Trial 96 finished with value: 0.9306 and parameters: {'embedding_dim': 119, 'max_filter_size': 5, 'num_filters': 81}. Best is trial 87 with value: 0.936.[0m


2023-05-06 20:14:01,472 - INFO - allennlp.common.params - evaluation = None
2023-05-06 20:14:01,475 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 20:14:01,478 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 20:14:01,480 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 20:14:01,482 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 20:14:01,484 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 20:14:01,485 - INFO - allennlp.common.params - type = default
2023-05-06 20:14:01,487 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 20:14:01,488 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 20:14:01,490 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 20:14:01,491 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 20:14:01,492 - INFO - all

loading instances: 40000it [01:43, 387.20it/s]

2023-05-06 20:15:44,852 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 20:15:44,855 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 20:15:44,857 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 20:15:44,859 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 20:15:44,861 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 20:15:44,862 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 20:15:44,863 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 20:15:44,864 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 20:15:44,865 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 20:15:44,866 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:09, 506.17it/s]

2023-05-06 20:15:54,750 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 20:15:54,753 - INFO - allennlp.common.params - type = from_instances
2023-05-06 20:15:54,755 - INFO - allennlp.common.params - min_count = None
2023-05-06 20:15:54,757 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 20:15:54,759 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 20:15:54,760 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 20:15:54,761 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 20:15:54,762 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 20:15:54,763 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 20:15:54,764 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 20:15:54,765 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 20


building vocab: 40000it [00:06, 5920.38it/s]


2023-05-06 20:16:01,835 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 20:16:01,841 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 20:16:01,843 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 20:16:01,846 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 20:16:01,847 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 73
2023-05-06 20:16:01,850 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 20:16:01,852 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 20:16:01,853 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 20:16:01,855 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tok

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 20:16:08,725 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:16:08,727 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:16:08,732 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9051, batch_loss: 0.1047, loss: 0.2486 ||: 100%|##########| 1250/1250 [00:20<00:00, 60.82it/s]

2023-05-06 20:16:29,161 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 20:16:29,173 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:16:29,175 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:16:29,180 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9230, batch_loss: 0.0529, loss: 0.1930 ||: 100%|##########| 157/157 [00:00<00:00, 161.50it/s]

2023-05-06 20:16:30,139 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:16:30,141 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.905  |     0.923
2023-05-06 20:16:30,148 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   945.370  |       N/A
2023-05-06 20:16:30,151 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.249  |     0.193
2023-05-06 20:16:30,152 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66939.941  |       N/A





2023-05-06 20:16:30,422 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.821846
2023-05-06 20:16:30,424 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:13
2023-05-06 20:16:30,429 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 20:16:30,431 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 65G
2023-05-06 20:16:30,433 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 662M
2023-05-06 20:16:30,435 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9463, batch_loss: 0.2283, loss: 0.1399 ||: 100%|##########| 1250/1250 [00:19<00:00, 62.96it/s]

2023-05-06 20:16:50,298 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9302, batch_loss: 0.0216, loss: 0.1794 ||: 100%|##########| 157/157 [00:00<00:00, 169.15it/s]

2023-05-06 20:16:51,233 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:16:51,238 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.946  |     0.930
2023-05-06 20:16:51,241 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   662.157  |       N/A
2023-05-06 20:16:51,244 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.140  |     0.179
2023-05-06 20:16:51,246 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66968.301  |       N/A





2023-05-06 20:16:51,510 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.080918
2023-05-06 20:16:51,518 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:50
2023-05-06 20:16:51,523 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 20:16:51,524 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 65G
2023-05-06 20:16:51,527 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 662M
2023-05-06 20:16:51,533 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9780, batch_loss: 0.0050, loss: 0.0624 ||: 100%|##########| 1250/1250 [00:20<00:00, 62.13it/s]

2023-05-06 20:17:11,663 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9264, batch_loss: 0.0267, loss: 0.2185 ||: 100%|##########| 157/157 [00:00<00:00, 171.31it/s]

2023-05-06 20:17:12,589 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:17:12,591 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.978  |     0.926
2023-05-06 20:17:12,592 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   662.282  |       N/A
2023-05-06 20:17:12,594 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.062  |     0.218
2023-05-06 20:17:12,596 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66968.301  |       N/A





2023-05-06 20:17:12,865 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.342094
2023-05-06 20:17:12,867 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:29
2023-05-06 20:17:12,869 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 20:17:12,872 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 65G
2023-05-06 20:17:12,877 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 662M
2023-05-06 20:17:12,881 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9960, batch_loss: 0.0398, loss: 0.0161 ||: 100%|##########| 1250/1250 [00:19<00:00, 62.62it/s]

2023-05-06 20:17:32,856 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9182, batch_loss: 0.1387, loss: 0.2718 ||: 100%|##########| 157/157 [00:00<00:00, 170.16it/s]

2023-05-06 20:17:33,784 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:17:33,786 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.996  |     0.918
2023-05-06 20:17:33,795 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   662.098  |       N/A
2023-05-06 20:17:33,797 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.016  |     0.272
2023-05-06 20:17:33,798 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66968.301  |       N/A





2023-05-06 20:17:34,116 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.246980
2023-05-06 20:17:34,122 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:07
2023-05-06 20:17:34,123 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 20:17:34,125 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 65G
2023-05-06 20:17:34,127 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 662M
2023-05-06 20:17:34,130 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9995, batch_loss: 0.0009, loss: 0.0034 ||: 100%|##########| 1250/1250 [00:20<00:00, 62.09it/s]

2023-05-06 20:17:54,267 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9234, batch_loss: 0.0147, loss: 0.3311 ||: 100%|##########| 157/157 [00:00<00:00, 166.36it/s]

2023-05-06 20:17:55,217 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:17:55,219 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.923
2023-05-06 20:17:55,220 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   662.267  |       N/A
2023-05-06 20:17:55,221 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.003  |     0.331





2023-05-06 20:17:55,227 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  66968.301  |       N/A
2023-05-06 20:17:55,492 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.368963
2023-05-06 20:17:55,502 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 20:17:55,529 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 66968.30078125,
  "peak_gpu_0_memory_MB": 945.3701171875,
  "training_duration": "0:01:46.615166",
  "epoch": 4,
  "training_accuracy": 0.9995,
  "training_loss": 0.0033567046863463472,
  "training_worker_0_memory_MB": 66968.30078125,
  "training_gpu_0_memory_MB": 662.26708984375,
  "validation_accuracy": 0.9234,
  "validation_loss": 0.3311051866454873,
  "best_validation_accuracy": 0.9302,
  "best_validation_loss": 0.17941827902416135
}
2023-05-06 20:17:55,531 - INFO - allennlp.models.archival - archiving weights and vocabulary 

[32m[I 2023-05-06 20:17:57,169][0m Trial 97 finished with value: 0.9302 and parameters: {'embedding_dim': 73, 'max_filter_size': 5, 'num_filters': 66}. Best is trial 87 with value: 0.936.[0m


2023-05-06 20:17:57,303 - INFO - allennlp.common.params - evaluation = None
2023-05-06 20:17:57,305 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 20:17:57,311 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 20:17:57,312 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 20:17:57,314 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 20:17:57,316 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 20:17:57,318 - INFO - allennlp.common.params - type = default
2023-05-06 20:17:57,320 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 20:17:57,322 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 20:17:57,323 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 20:17:57,325 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 20:17:57,327 - INFO - all

loading instances: 40000it [01:47, 371.72it/s]

2023-05-06 20:19:45,003 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 20:19:45,005 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 20:19:45,006 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 20:19:45,009 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 20:19:45,011 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 20:19:45,012 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 20:19:45,014 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 20:19:45,016 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 20:19:45,017 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 20:19:45,021 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:12, 405.52it/s]

2023-05-06 20:19:57,373 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 20:19:57,376 - INFO - allennlp.common.params - type = from_instances
2023-05-06 20:19:57,378 - INFO - allennlp.common.params - min_count = None
2023-05-06 20:19:57,380 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 20:19:57,382 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 20:19:57,383 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 20:19:57,385 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 20:19:57,386 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 20:19:57,388 - INFO - allennlp.common.params - min_pretrained_embeddings = None





2023-05-06 20:19:57,389 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 20:19:57,390 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 20:19:57,391 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.


building vocab: 40000it [00:04, 8068.46it/s]


2023-05-06 20:20:02,548 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 20:20:02,559 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 20:20:02,565 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 20:20:02,569 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 20:20:02,572 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 163
2023-05-06 20:20:02,576 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 20:20:02,578 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 20:20:02,580 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 20:20:02,582 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 20:20:10,630 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:20:10,633 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:20:10,640 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9100, batch_loss: 0.0717, loss: 0.2421 ||: 100%|##########| 1250/1250 [00:27<00:00, 44.84it/s]

2023-05-06 20:20:38,364 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 20:20:38,381 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:20:38,384 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:20:38,391 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9260, batch_loss: 0.0251, loss: 0.1921 ||: 100%|##########| 157/157 [00:01<00:00, 149.90it/s]

2023-05-06 20:20:39,422 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:20:39,424 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.910  |     0.926
2023-05-06 20:20:39,427 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |   662.335  |       N/A
2023-05-06 20:20:39,428 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.242  |     0.192
2023-05-06 20:20:39,429 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  67652.676  |       N/A





2023-05-06 20:20:40,217 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.737734
2023-05-06 20:20:40,219 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:20
2023-05-06 20:20:40,226 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 20:20:40,229 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 66G
2023-05-06 20:20:40,233 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 20:20:40,236 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9498, batch_loss: 0.2023, loss: 0.1303 ||: 100%|##########| 1250/1250 [00:27<00:00, 45.66it/s]

2023-05-06 20:21:07,622 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9342, batch_loss: 0.0105, loss: 0.1830 ||: 100%|##########| 157/157 [00:01<00:00, 126.61it/s]

2023-05-06 20:21:08,870 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:21:08,871 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.950  |     0.934
2023-05-06 20:21:08,873 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1032.574  |       N/A
2023-05-06 20:21:08,875 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.130  |     0.183
2023-05-06 20:21:08,878 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  67714.660  |       N/A





2023-05-06 20:21:09,952 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.726485
2023-05-06 20:21:09,958 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:53
2023-05-06 20:21:09,959 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 20:21:09,960 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 66G
2023-05-06 20:21:09,963 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 20:21:09,964 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9821, batch_loss: 0.0021, loss: 0.0514 ||: 100%|##########| 1250/1250 [00:27<00:00, 44.96it/s]

2023-05-06 20:21:37,780 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9260, batch_loss: 0.0032, loss: 0.2352 ||: 100%|##########| 157/157 [00:00<00:00, 158.73it/s]

2023-05-06 20:21:38,776 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:21:38,778 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.982  |     0.926
2023-05-06 20:21:38,779 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1032.574  |       N/A
2023-05-06 20:21:38,781 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.051  |     0.235
2023-05-06 20:21:38,787 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  67714.660  |       N/A





2023-05-06 20:21:39,681 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.721983
2023-05-06 20:21:39,683 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:26
2023-05-06 20:21:39,685 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 20:21:39,687 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 66G
2023-05-06 20:21:39,690 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 20:21:39,691 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9967, batch_loss: 0.0680, loss: 0.0123 ||: 100%|##########| 1250/1250 [00:27<00:00, 46.03it/s]

2023-05-06 20:22:06,851 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9244, batch_loss: 0.0014, loss: 0.2972 ||: 100%|##########| 157/157 [00:00<00:00, 160.01it/s]

2023-05-06 20:22:07,850 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:22:07,852 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.924
2023-05-06 20:22:07,854 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1032.574  |       N/A
2023-05-06 20:22:07,855 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.012  |     0.297
2023-05-06 20:22:07,857 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  67714.660  |       N/A





2023-05-06 20:22:08,708 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.022851
2023-05-06 20:22:08,710 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:56
2023-05-06 20:22:08,712 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 20:22:08,715 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 66G
2023-05-06 20:22:08,718 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.0G
2023-05-06 20:22:08,720 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9996, batch_loss: 0.0024, loss: 0.0024 ||: 100%|##########| 1250/1250 [00:27<00:00, 45.61it/s]

2023-05-06 20:22:36,143 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9236, batch_loss: 0.0002, loss: 0.3717 ||: 100%|##########| 157/157 [00:01<00:00, 121.68it/s]

2023-05-06 20:22:37,445 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:22:37,447 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.924
2023-05-06 20:22:37,448 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1032.574  |       N/A
2023-05-06 20:22:37,450 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.372
2023-05-06 20:22:37,455 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  67714.660  |       N/A





2023-05-06 20:22:38,510 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:29.797992
2023-05-06 20:22:38,513 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 20:22:38,614 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 67714.66015625,
  "peak_gpu_0_memory_MB": 1032.57373046875,
  "training_duration": "0:02:26.964802",
  "epoch": 4,
  "training_accuracy": 0.999625,
  "training_loss": 0.00243513024370186,
  "training_worker_0_memory_MB": 67714.66015625,
  "training_gpu_0_memory_MB": 1032.57373046875,
  "validation_accuracy": 0.9236,
  "validation_loss": 0.3717114829213972,
  "best_validation_accuracy": 0.9342,
  "best_validation_loss": 0.18304556109319636
}
2023-05-06 20:22:38,616 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/98/model.tar.gz


[32m[I 2023-05-06 20:22:42,557][0m Trial 98 finished with value: 0.9342 and parameters: {'embedding_dim': 163, 'max_filter_size': 5, 'num_filters': 44}. Best is trial 87 with value: 0.936.[0m


2023-05-06 20:22:42,687 - INFO - allennlp.common.params - evaluation = None
2023-05-06 20:22:42,689 - INFO - allennlp.common.params - include_in_archive = None
2023-05-06 20:22:42,692 - INFO - allennlp.common.params - random_seed = 1
2023-05-06 20:22:42,693 - INFO - allennlp.common.params - numpy_seed = 1337
2023-05-06 20:22:42,699 - INFO - allennlp.common.params - pytorch_seed = 1
2023-05-06 20:22:42,700 - INFO - allennlp.common.checks - Pytorch version: 1.11.0+cu102
2023-05-06 20:22:42,702 - INFO - allennlp.common.params - type = default
2023-05-06 20:22:42,703 - INFO - allennlp.common.params - dataset_reader.type = text_classification_json
2023-05-06 20:22:42,704 - INFO - allennlp.common.params - dataset_reader.max_instances = None
2023-05-06 20:22:42,706 - INFO - allennlp.common.params - dataset_reader.manual_distributed_sharding = False
2023-05-06 20:22:42,709 - INFO - allennlp.common.params - dataset_reader.manual_multiprocess_sharding = False
2023-05-06 20:22:42,710 - INFO - all

loading instances: 40000it [01:45, 380.86it/s]

2023-05-06 20:24:27,815 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
2023-05-06 20:24:27,818 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32
2023-05-06 20:24:27,819 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
2023-05-06 20:24:27,821 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
2023-05-06 20:24:27,822 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
2023-05-06 20:24:27,824 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
2023-05-06 20:24:27,825 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
2023-05-06 20:24:27,827 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
2023-05-06 20:24:27,828 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
2023-05-06 20:24:27,829 - INFO - allennlp.common.params - validation_data_loader.cuda_de


loading instances: 5000it [00:15, 329.61it/s]

2023-05-06 20:24:43,009 - INFO - allennlp.commands.train - From dataset instances, train will be considered for vocabulary creation.
2023-05-06 20:24:43,012 - INFO - allennlp.common.params - type = from_instances
2023-05-06 20:24:43,016 - INFO - allennlp.common.params - min_count = None
2023-05-06 20:24:43,017 - INFO - allennlp.common.params - max_vocab_size = None
2023-05-06 20:24:43,020 - INFO - allennlp.common.params - non_padded_namespaces = ('*tags', '*labels')
2023-05-06 20:24:43,021 - INFO - allennlp.common.params - pretrained_files = None
2023-05-06 20:24:43,022 - INFO - allennlp.common.params - only_include_pretrained_words = False
2023-05-06 20:24:43,025 - INFO - allennlp.common.params - tokens_to_add = None
2023-05-06 20:24:43,027 - INFO - allennlp.common.params - min_pretrained_embeddings = None
2023-05-06 20:24:43,029 - INFO - allennlp.common.params - padding_token = @@PADDING@@
2023-05-06 20:24:43,030 - INFO - allennlp.common.params - oov_token = @@UNKNOWN@@
2023-05-06 20


building vocab: 40000it [00:05, 7067.40it/s]


2023-05-06 20:24:48,947 - INFO - allennlp.common.params - model.type = basic_classifier
2023-05-06 20:24:48,951 - INFO - allennlp.common.params - model.regularizer = None
2023-05-06 20:24:48,952 - INFO - allennlp.common.params - model.text_field_embedder.type = basic
2023-05-06 20:24:48,955 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.type = embedding
2023-05-06 20:24:48,957 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.embedding_dim = 184
2023-05-06 20:24:48,960 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.num_embeddings = None
2023-05-06 20:24:48,962 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.projection_dim = None
2023-05-06 20:24:48,963 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.tokens.weight = None
2023-05-06 20:24:48,964 - INFO - allennlp.common.params - model.text_field_embedder.token_embedders.to

  0%|          | 0/1250 [00:00<?, ?it/s]

2023-05-06 20:24:57,044 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:24:57,046 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 553)
tensor([[   76,     8,  1669,  ...,     0,     0,     0],
        [  649,     2,  4983,  ...,     0,     0,     0],
        [96526,  5778,  3597,  ...,     0,     0,     0],
        ...,
        [ 1629,    74,  2011,  ...,     0,     0,     0],
        [ 3612,    12, 16365,  ...,    42,    16,     4],
        [43864,     2, 19482,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:24:57,054 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9107, batch_loss: 0.0548, loss: 0.2407 ||: 100%|##########| 1250/1250 [00:29<00:00, 42.63it/s]

2023-05-06 20:25:26,235 - INFO - allennlp.training.gradient_descent_trainer - Validating



  0%|          | 0/157 [00:00<?, ?it/s]

2023-05-06 20:25:26,250 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
2023-05-06 20:25:26,251 - INFO - allennlp.training.callbacks.console_logger - batch_input/tokens/tokens/tokens (Shape: 32 x 1197)
tensor([[ 1939,    12, 18824,  ...,     0,     0,     0],
        [ 2165,     8,  2168,  ...,     0,     0,     0],
        [  220,   709,   165,  ...,     0,     0,     0],
        ...,
        [  104,    14,     7,  ...,     0,     0,     0],
        [   37, 12076,     2,  ...,     0,     0,     0],
        [   37,    12,  1374,  ...,     0,     0,     0]], device='cuda:0')
2023-05-06 20:25:26,257 - INFO - allennlp.training.callbacks.console_logger - batch_input/label (Shape: 32)
tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')


accuracy: 0.9240, batch_loss: 0.0280, loss: 0.1961 ||: 100%|##########| 157/157 [00:01<00:00, 142.88it/s]

2023-05-06 20:25:27,344 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:25:27,347 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.911  |     0.924
2023-05-06 20:25:27,351 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1032.574  |       N/A
2023-05-06 20:25:27,353 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.241  |     0.196
2023-05-06 20:25:27,357 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  68286.246  |       N/A





2023-05-06 20:25:28,442 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:31.535627
2023-05-06 20:25:28,443 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:33
2023-05-06 20:25:28,445 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
2023-05-06 20:25:28,447 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 67G
2023-05-06 20:25:28,451 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:25:28,452 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9512, batch_loss: 0.2046, loss: 0.1288 ||: 100%|##########| 1250/1250 [00:29<00:00, 41.67it/s]

2023-05-06 20:25:58,456 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9328, batch_loss: 0.0148, loss: 0.1869 ||: 100%|##########| 157/157 [00:01<00:00, 114.07it/s]

2023-05-06 20:25:59,841 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:25:59,842 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.951  |     0.933
2023-05-06 20:25:59,845 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1172.501  |       N/A
2023-05-06 20:25:59,846 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.129  |     0.187
2023-05-06 20:25:59,847 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  68356.246  |       N/A





2023-05-06 20:26:01,079 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.633804
2023-05-06 20:26:01,083 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:04:11
2023-05-06 20:26:01,087 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
2023-05-06 20:26:01,089 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 67G
2023-05-06 20:26:01,093 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:26:01,095 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9834, batch_loss: 0.0009, loss: 0.0477 ||: 100%|##########| 1250/1250 [00:30<00:00, 41.63it/s]

2023-05-06 20:26:31,127 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9276, batch_loss: 0.0202, loss: 0.2614 ||: 100%|##########| 157/157 [00:01<00:00, 134.23it/s]

2023-05-06 20:26:32,305 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:26:32,308 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.983  |     0.928
2023-05-06 20:26:32,310 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1172.501  |       N/A
2023-05-06 20:26:32,312 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.048  |     0.261
2023-05-06 20:26:32,314 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  68356.492  |       N/A





2023-05-06 20:26:33,486 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.399241
2023-05-06 20:26:33,491 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:42
2023-05-06 20:26:33,492 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
2023-05-06 20:26:33,493 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 67G
2023-05-06 20:26:33,495 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:26:33,498 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9968, batch_loss: 0.0321, loss: 0.0114 ||: 100%|##########| 1250/1250 [00:30<00:00, 40.91it/s]

2023-05-06 20:27:04,060 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9266, batch_loss: 0.0003, loss: 0.3004 ||: 100%|##########| 157/157 [00:01<00:00, 141.41it/s]

2023-05-06 20:27:05,176 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:27:05,178 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     0.997  |     0.927
2023-05-06 20:27:05,180 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1172.501  |       N/A
2023-05-06 20:27:05,182 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.011  |     0.300
2023-05-06 20:27:05,183 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  68356.492  |       N/A





2023-05-06 20:27:06,081 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.588685
2023-05-06 20:27:06,083 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:12
2023-05-06 20:27:06,085 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
2023-05-06 20:27:06,087 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 67G
2023-05-06 20:27:06,089 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
2023-05-06 20:27:06,091 - INFO - allennlp.training.gradient_descent_trainer - Training


accuracy: 0.9997, batch_loss: 0.0012, loss: 0.0020 ||: 100%|##########| 1250/1250 [00:29<00:00, 41.98it/s]

2023-05-06 20:27:35,875 - INFO - allennlp.training.gradient_descent_trainer - Validating



accuracy: 0.9254, batch_loss: 0.0027, loss: 0.3618 ||: 100%|##########| 157/157 [00:01<00:00, 130.74it/s]

2023-05-06 20:27:37,084 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
2023-05-06 20:27:37,087 - INFO - allennlp.training.callbacks.console_logger - accuracy           |     1.000  |     0.925
2023-05-06 20:27:37,089 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1172.501  |       N/A
2023-05-06 20:27:37,090 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.002  |     0.362
2023-05-06 20:27:37,092 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  68356.492  |       N/A





2023-05-06 20:27:38,321 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:32.235845
2023-05-06 20:27:38,323 - INFO - allennlp.training.gradient_descent_trainer - Ran out of patience. Stopping training.
2023-05-06 20:27:38,453 - INFO - allennlp.common.util - Metrics: {
  "best_epoch": 1,
  "peak_worker_0_memory_MB": 68356.4921875,
  "peak_gpu_0_memory_MB": 1172.50146484375,
  "training_duration": "0:02:40.176911",
  "epoch": 4,
  "training_accuracy": 0.9997,
  "training_loss": 0.00201418146918877,
  "training_worker_0_memory_MB": 68356.4921875,
  "training_gpu_0_memory_MB": 1172.50146484375,
  "validation_accuracy": 0.9254,
  "validation_loss": 0.36179180754271517,
  "best_validation_accuracy": 0.9328,
  "best_validation_loss": 0.18694678494101688
}
2023-05-06 20:27:38,455 - INFO - allennlp.models.archival - archiving weights and vocabulary to exp_amazon_reviews_optuna/trials/99/model.tar.gz


[32m[I 2023-05-06 20:27:43,423][0m Trial 99 finished with value: 0.9328 and parameters: {'embedding_dim': 184, 'max_filter_size': 5, 'num_filters': 52}. Best is trial 87 with value: 0.936.[0m


In [16]:
!cat amazon_reviews_best.json

{
    "data_loader": {
        "batch_size": 32,
        "shuffle": true
    },
    "dataset_reader": {
        "token_indexers": {
            "tokens": {
                "type": "single_id"
            }
        },
        "tokenizer": {
            "type": "mecab"
        },
        "type": "text_classification_json"
    },
    "datasets_for_vocab_creation": [
        "train"
    ],
    "model": {
        "seq2vec_encoder": {
            "conv_layer_activation": "relu",
            "embedding_dim": 149,
            "ngram_filter_sizes": [
                2,
                3,
                4,
                5
            ],
            "num_filters": 55,
            "type": "cnn"
        },
        "text_field_embedder": {
            "token_embedders": {
                "tokens": {
                    "embedding_dim": 149,
                    "type": "embedding"
                }
            }
        },
        "type": "basic_classifier"
    },
    "pytorch_seed": 1,
    "rando