We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Using backend: pytorch 2022-02-23 15:51:43.263 | Level 20 | dee.tasks.base_task:logging:196 - ====================Check Setting Validity==================== 2022-02-23 15:51:43.264 | Level 20 | dee.tasks.base_task:logging:196 - Setting: { "data_dir": "./Data", "model_dir": "./Exps/jiao/Model", "output_dir": "./Exps/jiao/Output", "bert_model": "bert", "train_file_name": "typed_train.json", "dev_file_name": "typed_dev.json", "test_file_name": "typed_test.json", "max_seq_len": 128, "train_batch_size": 16, "eval_batch_size": 2, "learning_rate": 0.0001, "num_train_epochs": 10, "warmup_proportion": 0.1, "no_cuda": false, "local_rank": -1, "seed": 99, "gradient_accumulation_steps": 8, "optimize_on_cpu": false, "fp16": false, "loss_scale": 128, "cpt_file_name": "Doc2EDAG", "summary_dir_name": "./Exps/jiao/Summary/Summary", "event_type_template": "jiao", "max_sent_len": 128, "max_sent_num": 64, "use_lr_scheduler": false, "lr_scheduler_step": 20, "use_bert": false, "use_biaffine_ner": false, "use_masked_crf": false, "only_master_logging": true, "resume_latest_cpt": true, "remove_last_cpt": false, "save_best_cpt": false, "model_type": "Doc2EDAG", "rearrange_sent": false, "use_crf_layer": true, "min_teacher_prob": 0.1, "schedule_epoch_start": 10, "schedule_epoch_length": 10, "loss_lambda": 0.05, "loss_gamma": 1.0, "add_greedy_dec": true, "use_token_role": true, "seq_reduce_type": "MaxPooling", "hidden_size": 768, "dropout": 0.1, "ff_size": 1024, "num_tf_layers": 4, "use_path_mem": true, "use_scheduled_sampling": true, "use_doc_enc": true, "neg_field_loss_scaling": 3.0, "gcn_layer": 3, "ner_num_tf_layers": 4, "num_lstm_layers": 1, "use_span_lstm": false, "span_lstm_num_layer": 1, "use_span_att": false, "span_att_heads": 4, "dot_att_head": 4, "comb_samp_min_num_span": 2, "comb_samp_num_samp": 100, "comb_samp_max_samp_times": 1000, "use_span_lstm_projection": false, "biaffine_hidden_size": 256, "triaffine_hidden_size": 150, "vi_max_iter": 3, "biaffine_hard_threshold": 0.5, "event_cls_loss_weight": 1.0, "smooth_attn_loss_weight": 1.0, "combination_loss_weight": 1.0, "comb_cls_loss_weight": 1.0, "comb_sim_loss_weight": 1.0, "span_cls_loss_weight": 1.0, "use_comb_cls_pred": false, "role_loss_weight": 1.0, "event_relevant_combination": false, "run_mode": "full", "drop_irr_ents": false, "at_least_one_comb": true, "include_complementary_ents": true, "filtered_data_types": "o2o", "ent_context_window": 20, "biaffine_grad_clip": false, "global_grad_clip": false, "ent_fix_mode": "n", "span_mention_sum": false, "add_adj_mat_weight_bias": false, "optimizer": "adam", "num_triggers": 1, "eval_num_triggers": 1, "with_left_trigger": true, "with_all_one_trigger_comb": false, "directed_trigger_graph": false, "adj_sim_head": 1, "adj_sim_agg": "mean", "adj_sim_split_head": false, "num_triggering_steps": 1, "use_shared_dropout_proj": false, "use_layer_norm_b4_biaffine": false, "remove_mention_type_layer_norm": false, "use_token_drop": false, "guessing_decode": false, "max_clique_decode": true, "try_to_make_up": false, "self_loop": false, "incremental_min_conn": -1, "use_span_self_att": false, "use_smooth_span_self_att": false, "ment_feature_type": "plus", "ment_type_hidden_size": 32, "num_mention_lstm_layer": 1, "gat_alpha": 0.2, "gat_num_heads": 4, "gat_num_layers": 2, "role_by_encoding": false, "use_mention_lstm": false, "mlp_before_adj_measure": false, "use_field_cls_mlp": false, "build_dense_connected_doc_graph": false, "stop_gradient": false, "doc_lang": "zh" } 2022-02-23 15:51:43.264 | Level 20 | dee.tasks.base_task:logging:196 - ====================Init Device==================== 2022-02-23 15:51:43.296 | Level 20 | dee.tasks.base_task:logging:196 - device cuda n_gpu 2 distributed training False 2022-02-23 15:51:43.296 | Level 20 | dee.tasks.base_task:logging:196 - ====================Reset Random Seed to 99==================== 2022-02-23 15:51:43.297 | Level 20 | dee.tasks.base_task:logging:196 - Init Summary Writer /root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_qint8 = np.dtype([("qint8", np.int8, 1)]) /root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_quint8 = np.dtype([("quint8", np.uint8, 1)]) /root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_qint16 = np.dtype([("qint16", np.int16, 1)]) /root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_quint16 = np.dtype([("quint16", np.uint16, 1)]) /root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_qint32 = np.dtype([("qint32", np.int32, 1)]) /root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. np_resource = np.dtype([("resource", np.ubyte, 1)]) 2022-02-23 15:51:44.384 | Level 20 | dee.tasks.base_task:logging:196 - Writing summary into ./Exps/jiao/Summary/Summary-Feb23_15-51-43 2022-02-23 15:51:44.384 | Level 20 | dee.tasks.base_task:logging:196 - Initializing DEETask file bert/config.json not found The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. The tokenizer class you load from this checkpoint is 'BertTokenizer'. The class this function is called from is 'BertTokenizerForDocEE'. [('Build', ['CompanyName', 'Product', 'Address', 'StartTime', 'Country'], {1: ['CompanyName'], 2: ['CompanyName', 'StartTime'], 3: ['CompanyName', 'Product', 'StartTime'], 4: ['Address', 'CompanyName', 'Product', 'StartTime'], 5: ['Address', 'CompanyName', 'Country', 'Product', 'StartTime'], 'all': ['CompanyName', 'Product', 'Address', 'StartTime', 'Country']}, 5), ('Violated', ['CompanyName', 'Law', 'StartTime', 'Address', 'Character'], {1: ['CompanyName'], 2: ['CompanyName', 'StartTime'], 3: ['Character', 'CompanyName', 'StartTime'], 4: ['Address', 'Character', 'CompanyName', 'StartTime'], 5: ['Address', 'Character', 'CompanyName', 'Law', 'StartTime'], 'all': ['CompanyName', 'Law', 'StartTime', 'Address', 'Character']}, 5)] 2022-02-23 15:51:44.651 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.token_embedding.weight torch.Size([21128, 768]) 16226304 2022-02-23 15:51:44.651 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.pos_embedding.weight torch.Size([128, 768]) 98304 2022-02-23 15:51:44.651 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.layer_norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.layer_norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.norm.betatorch.Size([768]) 768 2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.crf_layer.trans_mat torch.Size([17, 17]) 289 2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.crf_layer.hidden2tag.weight torch.Size([17, 768]) 13056 2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.crf_layer.hidden2tag.bias torch.Size([17]) 17 2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.event_query torch.Size([1, 768]) 768 2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.event_cls.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.event_cls.bias torch.Size([2]) 2 2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.0.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.0.bias torch.Size([2]) 2 2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.1.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.1.bias torch.Size([2]) 2 2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.2.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.2.bias torch.Size([2]) 2 2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.3.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.3.bias torch.Size([2]) 2 2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.4.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.4.bias torch.Size([2]) 2 2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.0 torch.Size([1, 768]) 768 2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.1 torch.Size([1, 768]) 768 2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.2 torch.Size([1, 768]) 768 2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.3 torch.Size([1, 768]) 768 2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.4 torch.Size([1, 768]) 768 2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.event_query torch.Size([1, 768]) 768 2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.event_cls.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.event_cls.bias torch.Size([2]) 2 2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.0.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.0.bias torch.Size([2]) 2 2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.1.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.1.bias torch.Size([2]) 2 2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.2.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.2.bias torch.Size([2]) 2 2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.3.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.3.bias torch.Size([2]) 2 2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.4.weight torch.Size([2, 768]) 1536 2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.4.bias torch.Size([2]) 2 2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.0 torch.Size([1, 768]) 768 2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.1 torch.Size([1, 768]) 768 2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.2 torch.Size([1, 768]) 768 2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.3 torch.Size([1, 768]) 768 2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.4 torch.Size([1, 768]) 768 2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: sent_pos_encoder.embedding.weighttorch.Size([64, 768]) 49152 2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: sent_pos_encoder.layer_norm.gammatorch.Size([768]) 768 2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: sent_pos_encoder.layer_norm.betatorch.Size([768]) 768 2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: ment_type_encoder.embedding.weight torch.Size([15, 768]) 11520 2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: ment_type_encoder.layer_norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: ment_type_encoder.layer_norm.betatorch.Size([768]) 768 2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.0.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.0.bias torch.Size([768]) 768 2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.1.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.1.bias torch.Size([768]) 768 2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.2.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.2.bias torch.Size([768]) 768 2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.3.weight torch.Size([768, 768]) 589824 2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.3.bias torch.Size([768]) 768 2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_1.weight torch.Size([1024, 768]) 786432 2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_1.bias torch.Size([1024]) 1024 2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_2.weight torch.Size([768, 1024]) 786432 2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_2.bias torch.Size([768]) 768 2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.0.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.0.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.1.norm.gamma torch.Size([768]) 768 2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.1.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.norm.gammatorch.Size([768]) 768 2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.norm.beta torch.Size([768]) 768 2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:389 - #Total Trainable Parameters: 63716682 2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:390 - #Total Fixed Parameters: 0 2022-02-23 15:51:44.693 | Level 20 | dee.tasks.base_task:logging:196 - ====================Decorate Model==================== Traceback (most recent call last): File "/home/jiaojiaxin/DocEE/run_dee_task.py", line 208, in parallel_decorate=in_argv.parallel_decorate, File "/home/jiaojiaxin/DocEE/dee/tasks/dee_task.py", line 392, in init self._decorate_model(parallel_decorate=parallel_decorate) File "/home/jiaojiaxin/DocEE/dee/tasks/base_task.py", line 474, in _decorate_model self.model.to(self.device) File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 612, in to return self._apply(convert) File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 359, in _apply module._apply(fn) File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 359, in _apply module._apply(fn) File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 359, in _apply module._apply(fn) File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 381, in _apply param_applied = fn(param) File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 610, in convert return t.to(device, dtype if t.is_floating_point() else None, non_blocking) RuntimeError: CUDA error: out of memory
The text was updated successfully, but these errors were encountered:
嗨,您好。
LSTMMTL
LSTMMTL2CompleteGraphModel
建议使用dee/models/trigger_aware.py中的TriggerAwarePrunedCompleteGraph,也是我们论文中提到的PTPCG模型,训练速度快,且相较上述两个模型的最终效果好。参数可见scripts/run_ptpcg.sh。如果是自己的数据的话,可能需要再调调参,改改train_batch_size,gradient_accumulation_steps和learning_rate。
dee/models/trigger_aware.py
TriggerAwarePrunedCompleteGraph
scripts/run_ptpcg.sh
train_batch_size
gradient_accumulation_steps
learning_rate
Sorry, something went wrong.
感谢,已经在我数据集上运行完毕。速度果然是Doc2EDAG的n倍的n倍。我现在想使用CPU运行predict_one(),应该如何做?现在仍报cuda_out_of_memory,以下是我的修改:skip_train调为True,load_eval和load_test改为False,nocuda都改为True,load_inference相关改为True(看着像是推理相关参数),model_type、save_cpt_file改为TriggerAwarePrunedCompleteGraph,然后在run_dee_task里200多行Build_dee_tasking之后加上了dee_task.predict_one(我的文本)运行。
嗨您好,load_inference是用来打比赛的时候预测线上测试集时使用,如果不需要的话也可以设置为False。如果想使用CPU预测的话,可以将CUDA_VISIBLE_DEVICES环境变量置空:CUDA_VISIBLE_DEVICES=""。 另外需要在task实例化之后,将最佳模型权重导入:dee_task.resume_cpt_at(最佳轮次, resume_model=True),之后就可以调用dee_task.predict_one(字符串)方法进行推理了。
load_inference
CUDA_VISIBLE_DEVICES
CUDA_VISIBLE_DEVICES=""
dee_task.resume_cpt_at(最佳轮次, resume_model=True)
dee_task.predict_one(字符串)
经过测试,单事件文章效果确实实测比Doc2EDAG好,再次感谢您的开源
感谢您的关注和支持~
No branches or pull requests
Using backend: pytorch
2022-02-23 15:51:43.263 | Level 20 | dee.tasks.base_task:logging:196 - ====================Check Setting Validity====================
2022-02-23 15:51:43.264 | Level 20 | dee.tasks.base_task:logging:196 - Setting: {
"data_dir": "./Data",
"model_dir": "./Exps/jiao/Model",
"output_dir": "./Exps/jiao/Output",
"bert_model": "bert",
"train_file_name": "typed_train.json",
"dev_file_name": "typed_dev.json",
"test_file_name": "typed_test.json",
"max_seq_len": 128,
"train_batch_size": 16,
"eval_batch_size": 2,
"learning_rate": 0.0001,
"num_train_epochs": 10,
"warmup_proportion": 0.1,
"no_cuda": false,
"local_rank": -1,
"seed": 99,
"gradient_accumulation_steps": 8,
"optimize_on_cpu": false,
"fp16": false,
"loss_scale": 128,
"cpt_file_name": "Doc2EDAG",
"summary_dir_name": "./Exps/jiao/Summary/Summary",
"event_type_template": "jiao",
"max_sent_len": 128,
"max_sent_num": 64,
"use_lr_scheduler": false,
"lr_scheduler_step": 20,
"use_bert": false,
"use_biaffine_ner": false,
"use_masked_crf": false,
"only_master_logging": true,
"resume_latest_cpt": true,
"remove_last_cpt": false,
"save_best_cpt": false,
"model_type": "Doc2EDAG",
"rearrange_sent": false,
"use_crf_layer": true,
"min_teacher_prob": 0.1,
"schedule_epoch_start": 10,
"schedule_epoch_length": 10,
"loss_lambda": 0.05,
"loss_gamma": 1.0,
"add_greedy_dec": true,
"use_token_role": true,
"seq_reduce_type": "MaxPooling",
"hidden_size": 768,
"dropout": 0.1,
"ff_size": 1024,
"num_tf_layers": 4,
"use_path_mem": true,
"use_scheduled_sampling": true,
"use_doc_enc": true,
"neg_field_loss_scaling": 3.0,
"gcn_layer": 3,
"ner_num_tf_layers": 4,
"num_lstm_layers": 1,
"use_span_lstm": false,
"span_lstm_num_layer": 1,
"use_span_att": false,
"span_att_heads": 4,
"dot_att_head": 4,
"comb_samp_min_num_span": 2,
"comb_samp_num_samp": 100,
"comb_samp_max_samp_times": 1000,
"use_span_lstm_projection": false,
"biaffine_hidden_size": 256,
"triaffine_hidden_size": 150,
"vi_max_iter": 3,
"biaffine_hard_threshold": 0.5,
"event_cls_loss_weight": 1.0,
"smooth_attn_loss_weight": 1.0,
"combination_loss_weight": 1.0,
"comb_cls_loss_weight": 1.0,
"comb_sim_loss_weight": 1.0,
"span_cls_loss_weight": 1.0,
"use_comb_cls_pred": false,
"role_loss_weight": 1.0,
"event_relevant_combination": false,
"run_mode": "full",
"drop_irr_ents": false,
"at_least_one_comb": true,
"include_complementary_ents": true,
"filtered_data_types": "o2o",
"ent_context_window": 20,
"biaffine_grad_clip": false,
"global_grad_clip": false,
"ent_fix_mode": "n",
"span_mention_sum": false,
"add_adj_mat_weight_bias": false,
"optimizer": "adam",
"num_triggers": 1,
"eval_num_triggers": 1,
"with_left_trigger": true,
"with_all_one_trigger_comb": false,
"directed_trigger_graph": false,
"adj_sim_head": 1,
"adj_sim_agg": "mean",
"adj_sim_split_head": false,
"num_triggering_steps": 1,
"use_shared_dropout_proj": false,
"use_layer_norm_b4_biaffine": false,
"remove_mention_type_layer_norm": false,
"use_token_drop": false,
"guessing_decode": false,
"max_clique_decode": true,
"try_to_make_up": false,
"self_loop": false,
"incremental_min_conn": -1,
"use_span_self_att": false,
"use_smooth_span_self_att": false,
"ment_feature_type": "plus",
"ment_type_hidden_size": 32,
"num_mention_lstm_layer": 1,
"gat_alpha": 0.2,
"gat_num_heads": 4,
"gat_num_layers": 2,
"role_by_encoding": false,
"use_mention_lstm": false,
"mlp_before_adj_measure": false,
"use_field_cls_mlp": false,
"build_dense_connected_doc_graph": false,
"stop_gradient": false,
"doc_lang": "zh"
}
2022-02-23 15:51:43.264 | Level 20 | dee.tasks.base_task:logging:196 - ====================Init Device====================
2022-02-23 15:51:43.296 | Level 20 | dee.tasks.base_task:logging:196 - device cuda n_gpu 2 distributed training False
2022-02-23 15:51:43.296 | Level 20 | dee.tasks.base_task:logging:196 - ====================Reset Random Seed to 99====================
2022-02-23 15:51:43.297 | Level 20 | dee.tasks.base_task:logging:196 - Init Summary Writer
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint8 = np.dtype([("qint8", np.int8, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint16 = np.dtype([("qint16", np.int16, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint32 = np.dtype([("qint32", np.int32, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
np_resource = np.dtype([("resource", np.ubyte, 1)])
2022-02-23 15:51:44.384 | Level 20 | dee.tasks.base_task:logging:196 - Writing summary into ./Exps/jiao/Summary/Summary-Feb23_15-51-43
2022-02-23 15:51:44.384 | Level 20 | dee.tasks.base_task:logging:196 - Initializing DEETask
file bert/config.json not found
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization.
The tokenizer class you load from this checkpoint is 'BertTokenizer'.
The class this function is called from is 'BertTokenizerForDocEE'.
[('Build', ['CompanyName', 'Product', 'Address', 'StartTime', 'Country'], {1: ['CompanyName'], 2: ['CompanyName', 'StartTime'], 3: ['CompanyName', 'Product', 'StartTime'], 4: ['Address', 'CompanyName', 'Product', 'StartTime'], 5: ['Address', 'CompanyName', 'Country', 'Product', 'StartTime'], 'all': ['CompanyName', 'Product', 'Address', 'StartTime', 'Country']}, 5), ('Violated', ['CompanyName', 'Law', 'StartTime', 'Address', 'Character'], {1: ['CompanyName'], 2: ['CompanyName', 'StartTime'], 3: ['Character', 'CompanyName', 'StartTime'], 4: ['Address', 'Character', 'CompanyName', 'StartTime'], 5: ['Address', 'Character', 'CompanyName', 'Law', 'StartTime'], 'all': ['CompanyName', 'Law', 'StartTime', 'Address', 'Character']}, 5)]
2022-02-23 15:51:44.651 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.token_embedding.weight torch.Size([21128, 768]) 16226304
2022-02-23 15:51:44.651 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.pos_embedding.weight torch.Size([128, 768]) 98304
2022-02-23 15:51:44.651 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.layer_norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.layer_norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.norm.betatorch.Size([768]) 768
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.crf_layer.trans_mat torch.Size([17, 17]) 289
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.crf_layer.hidden2tag.weight torch.Size([17, 768]) 13056
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.crf_layer.hidden2tag.bias torch.Size([17]) 17
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.event_query torch.Size([1, 768]) 768
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.event_cls.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.event_cls.bias torch.Size([2]) 2
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.0.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.0.bias torch.Size([2]) 2
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.1.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.1.bias torch.Size([2]) 2
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.2.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.2.bias torch.Size([2]) 2
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.3.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.3.bias torch.Size([2]) 2
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.4.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.4.bias torch.Size([2]) 2
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.0 torch.Size([1, 768]) 768
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.1 torch.Size([1, 768]) 768
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.2 torch.Size([1, 768]) 768
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.3 torch.Size([1, 768]) 768
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.4 torch.Size([1, 768]) 768
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.event_query torch.Size([1, 768]) 768
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.event_cls.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.event_cls.bias torch.Size([2]) 2
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.0.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.0.bias torch.Size([2]) 2
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.1.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.1.bias torch.Size([2]) 2
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.2.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.2.bias torch.Size([2]) 2
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.3.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.3.bias torch.Size([2]) 2
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.4.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.4.bias torch.Size([2]) 2
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.0 torch.Size([1, 768]) 768
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.1 torch.Size([1, 768]) 768
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.2 torch.Size([1, 768]) 768
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.3 torch.Size([1, 768]) 768
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.4 torch.Size([1, 768]) 768
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: sent_pos_encoder.embedding.weighttorch.Size([64, 768]) 49152
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: sent_pos_encoder.layer_norm.gammatorch.Size([768]) 768
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: sent_pos_encoder.layer_norm.betatorch.Size([768]) 768
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: ment_type_encoder.embedding.weight torch.Size([15, 768]) 11520
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: ment_type_encoder.layer_norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: ment_type_encoder.layer_norm.betatorch.Size([768]) 768
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.norm.gammatorch.Size([768]) 768
2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:389 - #Total Trainable Parameters: 63716682
2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:390 - #Total Fixed Parameters: 0
2022-02-23 15:51:44.693 | Level 20 | dee.tasks.base_task:logging:196 - ====================Decorate Model====================
Traceback (most recent call last):
File "/home/jiaojiaxin/DocEE/run_dee_task.py", line 208, in
parallel_decorate=in_argv.parallel_decorate,
File "/home/jiaojiaxin/DocEE/dee/tasks/dee_task.py", line 392, in init
self._decorate_model(parallel_decorate=parallel_decorate)
File "/home/jiaojiaxin/DocEE/dee/tasks/base_task.py", line 474, in _decorate_model
self.model.to(self.device)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 612, in to
return self._apply(convert)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 359, in _apply
module._apply(fn)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 359, in _apply
module._apply(fn)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 359, in _apply
module._apply(fn)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 381, in _apply
param_applied = fn(param)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 610, in convert
return t.to(device, dtype if t.is_floating_point() else None, non_blocking)
RuntimeError: CUDA error: out of memory
The text was updated successfully, but these errors were encountered: