In [1]:
from src.models import FtModel
from src.config import parse_args
from src.dataset import FinetuneDataset
from src.utils import *

from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler, Subset, WeightedRandomSampler
import os
from tqdm import tqdm,trange
import pandas as pd
import torch
import sys
import gc

args = parse_args()

args.gpu_ids='2'
args.bert_seq_length=128
args.ema_start=0
args.ema_decay=0.99
args.result_file="result.tsv"
args.bert_dir='digitalepidemiologylab/covid-twitter-bert'

os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_ids
setup_device(args)
setup_seed(args)
test_dataset = FinetuneDataset(args, args.test_path, True)
test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset,
                                batch_size=args.val_batch_size,
                                sampler=test_sampler,
                                drop_last=False,
                                pin_memory=True)
print('The test data length: ',len(test_dataloader))

The test data length:  32


In [None]:
ckpoint_list = ['data/checkpoint/fold0/model_epoch_1_f1_0.9292_1100.bin',
                'data/checkpoint/fold1/model_epoch_1_f1_0.9282_900.bin',
                'data/checkpoint/fold2/model_epoch_1_f1_0.9350_1000.bin',
                'data/checkpoint/fold3/model_epoch_1_f1_0.9407_1000.bin',
                'data/checkpoint/fold4/model_epoch_1_f1_0.9358_1000.bin']

for i in range(5):
    probabilities = None
    model = FtModel(args).to(args.device)
    ckpoint = torch.load(ckpoint_list[i])
    model.load_state_dict(ckpoint['model_state_dict'])
    print("The epoch {} and the best mean f1 {:.4f} of the validation set.".format(ckpoint['epoch'],ckpoint['mean_f1']))

    if args.ema_start >= 0:
        ema = EMA(model, args.ema_decay)
        ema.resume(ckpoint['shadow'][0], ckpoint['backup'][0])
        # ema.shadow = 
        ema.apply_shadow()

    model.eval()
    predictions = []
    with torch.no_grad():
        for step, batch in enumerate(tqdm(test_dataloader,desc="Evaluating")):
            for k in batch:
                batch[k] = batch[k].cuda()
            if probabilities == None:
                probabilities = model(batch,True)
            else:
                probabilities = torch.cat([probabilities,model(batch,True)])
    if i == 0:
        probability = probabilities
    else:
        probability += probabilities
        
#     del model
#     del ckpoint
#     del ema
#     gc.collect()
        
pred_label_id = torch.argmax(probabilities/5, dim=1)
predictions.extend(pred_label_id.cpu().numpy())

with open(f"data/{args.result_file}","w+") as f:
    print('task a')
    f.write(f"id\ttext\tlabel\n")
    for i in trange(len(predictions)):
        i_d = test_dataset.data['id'].iloc[i]
        text = test_dataset.data['text'].iloc[i]
        label = int(predictions[i])

        f.write(f"{i_d}\t{text}\t{label}\n")

In [2]:
ckpoint_list = ['data/checkpoint/fold0/model_epoch_1_f1_0.9292_600.bin',
                'data/checkpoint/fold1/model_epoch_2_f1_0.9319_1400.bin',
                'data/checkpoint/fold2/model_epoch_3_f1_0.9367_1600.bin',
                'data/checkpoint/fold3/model_epoch_3_f1_0.9365_1800.bin',
                'data/checkpoint/fold4/model_epoch_3_f1_0.9271_1600.bin']

for i in range(5):
    probabilities = None
    model = FtModel(args).to(args.device)
    ckpoint = torch.load(ckpoint_list[i])
    model.load_state_dict(ckpoint['model_state_dict'])
    print("The epoch {} and the best mean f1 {:.4f} of the validation set.".format(ckpoint['epoch'],ckpoint['mean_f1']))

    if args.ema_start >= 0:
        ema = EMA(model, args.ema_decay)
        ema.resume(ckpoint['shadow'][0], ckpoint['backup'][0])
        # ema.shadow = 
        ema.apply_shadow()

    model.eval()
    predictions = []
    with torch.no_grad():
        for step, batch in enumerate(tqdm(test_dataloader,desc="Evaluating")):
            for k in batch:
                batch[k] = batch[k].cuda()
            if probabilities == None:
                probabilities = model(batch,True)
            else:
                probabilities = torch.cat([probabilities,model(batch,True)])
    if i == 0:
        probability = probabilities
    else:
        probability += probabilities
        
#     del model
#     del ckpoint
#     del ema
#     gc.collect()
        
pred_label_id = torch.argmax(probabilities/5, dim=1)
predictions.extend(pred_label_id.cpu().numpy())

with open(f"data/{args.result_file}","w+") as f:
    print('task a')
    f.write(f"id\tlabel\n")
    for i in trange(len(predictions)):
        i_d = test_dataset.data['id'].iloc[i]
#         text = test_dataset.data['text'].iloc[i]
        label = int(predictions[i])

        f.write(f"{i_d}\t{label}\n")

Some weights of the model checkpoint at digitalepidemiologylab/covid-twitter-bert were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The epoch 1 and the best mean f1 0.9292 of the validation set.


  probability = nn.functional.softmax(logits)
Evaluating: 100%|██████████| 32/32 [00:14<00:00,  2.25it/s]
Some weights of the model checkpoint at digitalepidemiologylab/covid-twitter-bert were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification m

The epoch 2 and the best mean f1 0.9319 of the validation set.


Evaluating: 100%|██████████| 32/32 [00:14<00:00,  2.24it/s]
Some weights of the model checkpoint at digitalepidemiologylab/covid-twitter-bert were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The epoch 3 and the best mean f1 0.9367 of the validation set.


Evaluating: 100%|██████████| 32/32 [00:14<00:00,  2.23it/s]
Some weights of the model checkpoint at digitalepidemiologylab/covid-twitter-bert were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The epoch 3 and the best mean f1 0.9365 of the validation set.


Evaluating: 100%|██████████| 32/32 [00:14<00:00,  2.21it/s]
Some weights of the model checkpoint at digitalepidemiologylab/covid-twitter-bert were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The epoch 3 and the best mean f1 0.9271 of the validation set.


Evaluating: 100%|██████████| 32/32 [00:14<00:00,  2.20it/s]


task a


100%|██████████| 2000/2000 [00:00<00:00, 84974.60it/s]
