In [None]:
!pip install transformers==4.11.3.

Collecting transformers==4.11.3.
  Downloading transformers-4.11.3-py3-none-any.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 29.5 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 58.6 MB/s 
Collecting huggingface-hub>=0.0.17
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 7.9 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 62.0 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 57.6 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attem

In [None]:
import sys
from google.colab import drive
drive.mount('/content/gdrive/')
sys.path.append('/content/gdrive/MyDrive/data')

Mounted at /content/gdrive/


In [None]:
SYSPATH = '/content/gdrive/MyDrive/data/'
import torch
import random
import numpy as np
config = {
    'train_file_path': SYSPATH + 'train.csv',
    'test_file_path': SYSPATH + 'test.csv',
    'train_val_ratio': 0.1,
    'model_path': SYSPATH + 'BERT_model',
    
    'batch_size': 16,
    'num_epochs': 2,
    'learning_rate': 2e-5,
    'logging_step': 500,
    'seed': 2021}

config['device']='cuda' if torch.cuda.is_available() else 'cpu'

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    return seed

seed_everything(config['seed'])

2021

In [None]:
import pandas as pd
from tqdm import tqdm
from collections import defaultdict
def read_data(config, tokenizer, mode='train'):
    df = pd.read_csv(config[mode + '_file_path'], sep=',')
    if mode == 'train':
        X_train, y_train = defaultdict(list), []
        X_val, y_val = defaultdict(list), []
        num_val = int(len(df) * config['train_val_ratio'])
    else:
        X_test, y_test = defaultdict(list), []

    for i, row in df.iterrows():
      # get label
        label=row[1] if mode == 'train' else 0
      # get sentence
        sentence = row[-1]

        # add_special_tokens  CLS、 SEP
        inputs = tokenizer.encode_plus(sentence, add_special_tokens=True, return_token_type_ids=True, return_attention_mask=True)

        if mode == 'train':
            if i < num_val:
                X_val['inputs_ids'].append(inputs['input_ids'])
                y_val.append(label)
                X_val['token_type_ids'].append(inputs['token_type_ids'])
                X_val['attention_mask'].append(inputs['attention_mask'])
                                    
            else:
                X_train['inputs_ids'].append(inputs['input_ids'])
                y_train.append(label)
                X_train['token_type_ids'].append(inputs['token_type_ids'])
                X_train['attention_mask'].append(inputs['attention_mask'])

        else:
            X_test['inputs_ids'].append(inputs['input_ids'])
            y_test.append(label) 
            X_test['token_type_ids'].append(inputs['token_type_ids'])
            X_test['attention_mask'].append(inputs['attention_mask'])
            
    # 返回
    if mode == 'train':
        label2id = {label: i for i, label in enumerate(np.unique(y_train))} 
        id2label = {i: label for label, i in label2id.items()} 
        y_train = torch.tensor([label2id[i] for i in y_train], dtype=torch.long)  
        y_val = torch.tensor([label2id[i] for i in y_val], dtype=torch.long)  
        return X_train, y_train, X_val, y_val, label2id, id2label
        
    else:
        y_test = torch.tensor(y_test, dtype=torch.long)
        return X_test, y_test

In [None]:
from torch.utils.data import Dataset
class NEWSData(Dataset):
    def __init__(self, X, y):
        self.x = X
        self.y = y

    def __getitem__(self, idx):
        return {
            'inputs_ids' : self.x['inputs_ids'][idx],
            'label' : self.y[idx],
            'token_type_ids': self.x['token_type_ids'][idx],
            'attention_mask': self.x['attention_mask'][idx]
        }
    
    def __len__(self):
        return self.y.size(0)

In [None]:
def collate_fn(examples):
    input_ids_list = []
    labels = []
    token_type_ids_list = []
    attention_mask_list = []

    for example in examples:
        input_ids_list.append(example['inputs_ids'])
        labels.append(example['label'])
        token_type_ids_list.append(example['token_type_ids'])
        attention_mask_list.append(example['attention_mask'])
    
    # get max_length
    max_length = max(len(input_ids) for input_ids in input_ids_list)
    input_ids_tensor = torch.zeros((len(labels), max_length), dtype=torch.long)
    token_type_ids_tensor = torch.zeros_like(input_ids_tensor)
    attention_mask_tensor = torch.zeros_like(input_ids_tensor)

    # insert values into the tensor
    for i, input_ids in enumerate(input_ids_list):
        input_ids_tensor[i, :len(input_ids)] = torch.tensor(input_ids, dtype=torch.long)
        token_type_ids_tensor[i, :len(input_ids)] = torch.tensor(token_type_ids_list[i], dtype=torch.long)
        attention_mask_tensor[i, :len(input_ids)] = torch.tensor(attention_mask_list[i], dtype=torch.long)
    
    return{
        'input_ids' : input_ids_tensor,
        'labels' : torch.tensor(labels, dtype=torch.long),
        'token_type_ids': token_type_ids_tensor,
        'attention_mask': attention_mask_tensor
        }

In [None]:
from transformers import BertTokenizer
from torch.utils.data import DataLoader
def build_dataloader(config):
    # get bert pretrain
    tokenizer = BertTokenizer.from_pretrained(config['model_path'])
    X_train, y_train, X_val, y_val, label2id, id2label = read_data(config, tokenizer, mode='train')
    X_test, y_test = read_data(config, tokenizer, mode='test')

    train_dataset = NEWSData(X_train, y_train)
    val_dataset = NEWSData(X_val, y_val)
    test_dataset = NEWSData(X_test, y_test)

    train_dataloader = DataLoader(train_dataset, batch_size=config['batch_size'], num_workers=4, shuffle=True, collate_fn=collate_fn)
    val_dataloader = DataLoader(val_dataset, batch_size=config['batch_size'], num_workers=4, shuffle=False, collate_fn=collate_fn)
    test_dataloader = DataLoader(test_dataset, batch_size=config['batch_size'], num_workers=4, shuffle=False, collate_fn=collate_fn)

    return train_dataloader, val_dataloader, test_dataloader, id2label

train_dataloader, val_dataloader, test_dataloader, id2label = build_dataloader(config)

  cpuset_checked))


In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

def evaluation(config, model, val_dataloader):
    model.eval()
    preds = []
    labels = []
    val_loss = 0.
    val_iterator = tqdm(val_dataloader, desc='Evaluation', total=len(val_dataloader))

    with torch.no_grad():
        for batch in val_iterator:
            labels.append(batch['labels'])
            batch = {item: value.to(config['device']) for item, value in batch.items()}
            loss, logits = model(**batch)[:2]

            val_loss += loss.item()
            preds.append(logits.argmax(dim=-1).detach().cpu())

    avg_val_loss = val_loss / len(val_dataloader)
    labels = torch.cat(labels, dim=0).numpy()
    preds = torch.cat(preds, dim=0).numpy()

    accuracy = accuracy_score(labels, preds)
    recall = recall_score(labels, preds, average='macro')
    precision = precision_score(labels, preds, average='macro')
    f1 = f1_score(labels, preds, average='macro')
    return [avg_val_loss, accuracy, recall, precision, f1]

In [None]:
from transformers import BertConfig, BertForSequenceClassification
from transformers import AdamW
from tqdm import trange

def train(config, id2label, train_dataloader, val_dataloader):
    bert_config = BertConfig.from_pretrained(config['model_path'])
    bert_config.num_labels = len(id2label)
    model = BertForSequenceClassification.from_pretrained(config['model_path'], config=bert_config)

    # define optimizer
    optimizer = AdamW(model.parameters(), lr=config['learning_rate'])
    model.to(config['device'])
    global_steps = 0
    train_loss = 0.
    logging_loss = 0.

    
    for epoch in trange(config['num_epochs']):
      train_iterator = tqdm(train_dataloader, desc='Training', total=len(train_dataloader))
      model.train()
      for batch in train_dataloader:
        batch = {item: value.to(config['device']) for item, value in batch.items()}
        loss = model(**batch)[0]
        model.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        global_steps += 1

        if global_steps % config['logging_step'] == 0:
            print_train_loss = (train_loss - logging_loss) / config['logging_step']
            logging_loss = train_loss
            result = evaluation(config, model, val_dataloader)
            avg_val_loss, accuracy = result[0], result[1]
            print_log = f'>>> training loss: {print_train_loss:.4f}, valid loss: {avg_val_loss:.4f}, ' \
                        f'valid accuracy score: {accuracy:.4f}'
            print(print_log)
            model.train()

    return model

In [None]:
model = train(config, id2label, train_dataloader, val_dataloader)

Some weights of the model checkpoint at /content/gdrive/MyDrive/data/BERT_model were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from t

>>> training loss: 1.6510, valid loss: 1.3553, valid accuracy score: 0.5480




  cpuset_checked))


Evaluation:   0%|          | 1/334 [00:00<01:07,  4.97it/s][A[A

Evaluation:   1%|          | 4/334 [00:00<00:22, 14.57it/s][A[A

Evaluation:   2%|▏         | 8/334 [00:00<00:14, 21.78it/s][A[A

Evaluation:   3%|▎         | 11/334 [00:00<00:13, 23.87it/s][A[A

Evaluation:   4%|▍         | 15/334 [00:00<00:12, 26.52it/s][A[A

Evaluation:   6%|▌         | 19/334 [00:00<00:11, 28.14it/s][A[A

Evaluation:   7%|▋         | 22/334 [00:00<00:11, 28.33it/s][A[A

Evaluation:   7%|▋         | 25/334 [00:01<00:11, 27.87it/s][A[A

Evaluation:   8%|▊         | 28/334 [00:01<00:10, 28.13it/s][A[A

Evaluation:   9%|▉         | 31/334 [00:01<00:10, 27.84it/s][A[A

Evaluation:  10%|█         | 34/334 [00:01<00:10, 27.59it/s][A[A

Evaluation:  11%|█         | 37/334 [00:01<00:10, 27.73it/s][A[A

Evaluation:  12%|█▏        | 41/334 [00:01<00:10, 28.92it/s][A[A

Evaluation:  13%|█▎        | 44/334 [00:01<00:10, 27.86it/s][A[A

Evaluation:  14%|█▍        |

>>> training loss: 1.3626, valid loss: 1.3106, valid accuracy score: 0.5560




  cpuset_checked))


Evaluation:   0%|          | 1/334 [00:00<01:09,  4.77it/s][A[A

Evaluation:   1%|          | 4/334 [00:00<00:23, 14.03it/s][A[A

Evaluation:   2%|▏         | 8/334 [00:00<00:15, 21.15it/s][A[A

Evaluation:   3%|▎         | 11/334 [00:00<00:13, 23.47it/s][A[A

Evaluation:   4%|▍         | 14/334 [00:00<00:12, 24.80it/s][A[A

Evaluation:   5%|▌         | 18/334 [00:00<00:11, 27.14it/s][A[A

Evaluation:   6%|▋         | 21/334 [00:00<00:11, 27.47it/s][A[A

Evaluation:   7%|▋         | 24/334 [00:01<00:11, 27.30it/s][A[A

Evaluation:   8%|▊         | 27/334 [00:01<00:11, 27.17it/s][A[A

Evaluation:   9%|▉         | 30/334 [00:01<00:11, 26.86it/s][A[A

Evaluation:  10%|▉         | 33/334 [00:01<00:11, 26.50it/s][A[A

Evaluation:  11%|█         | 36/334 [00:01<00:11, 26.72it/s][A[A

Evaluation:  12%|█▏        | 40/334 [00:01<00:10, 28.04it/s][A[A

Evaluation:  13%|█▎        | 43/334 [00:01<00:10, 27.65it/s][A[A

Evaluation:  14%|█▍        |

>>> training loss: 1.2917, valid loss: 1.2984, valid accuracy score: 0.5476




  cpuset_checked))


Evaluation:   0%|          | 1/334 [00:00<01:11,  4.65it/s][A[A

Evaluation:   1%|          | 4/334 [00:00<00:24, 13.59it/s][A[A

Evaluation:   2%|▏         | 8/334 [00:00<00:15, 20.48it/s][A[A

Evaluation:   3%|▎         | 11/334 [00:00<00:13, 23.09it/s][A[A

Evaluation:   4%|▍         | 14/334 [00:00<00:12, 24.98it/s][A[A

Evaluation:   5%|▌         | 18/334 [00:00<00:11, 27.25it/s][A[A

Evaluation:   6%|▋         | 21/334 [00:00<00:11, 27.26it/s][A[A

Evaluation:   7%|▋         | 24/334 [00:01<00:11, 27.14it/s][A[A

Evaluation:   8%|▊         | 27/334 [00:01<00:11, 26.55it/s][A[A

Evaluation:   9%|▉         | 30/334 [00:01<00:11, 26.50it/s][A[A

Evaluation:  10%|▉         | 33/334 [00:01<00:11, 25.92it/s][A[A

Evaluation:  11%|█         | 36/334 [00:01<00:11, 26.14it/s][A[A

Evaluation:  12%|█▏        | 39/334 [00:01<00:10, 26.95it/s][A[A

Evaluation:  13%|█▎        | 42/334 [00:01<00:10, 27.62it/s][A[A

Evaluation:  13%|█▎        |

>>> training loss: 1.2650, valid loss: 1.2645, valid accuracy score: 0.5579




  cpuset_checked))


Evaluation:   0%|          | 1/334 [00:00<01:11,  4.69it/s][A[A

Evaluation:   1%|          | 4/334 [00:00<00:23, 14.10it/s][A[A

Evaluation:   2%|▏         | 8/334 [00:00<00:15, 20.75it/s][A[A

Evaluation:   3%|▎         | 11/334 [00:00<00:14, 22.99it/s][A[A

Evaluation:   4%|▍         | 14/334 [00:00<00:12, 24.77it/s][A[A

Evaluation:   5%|▌         | 17/334 [00:00<00:12, 26.32it/s][A[A

Evaluation:   6%|▌         | 20/334 [00:00<00:11, 27.05it/s][A[A

Evaluation:   7%|▋         | 23/334 [00:00<00:11, 26.81it/s][A[A

Evaluation:   8%|▊         | 26/334 [00:01<00:11, 26.10it/s][A[A

Evaluation:   9%|▊         | 29/334 [00:01<00:11, 26.70it/s][A[A

Evaluation:  10%|▉         | 32/334 [00:01<00:11, 25.71it/s][A[A

Evaluation:  10%|█         | 35/334 [00:01<00:11, 25.81it/s][A[A

Evaluation:  11%|█▏        | 38/334 [00:01<00:11, 26.89it/s][A[A

Evaluation:  12%|█▏        | 41/334 [00:01<00:10, 27.53it/s][A[A

Evaluation:  13%|█▎        |

>>> training loss: 1.2761, valid loss: 1.2482, valid accuracy score: 0.5545




  cpuset_checked))


Evaluation:   0%|          | 1/334 [00:00<01:12,  4.62it/s][A[A

Evaluation:   1%|          | 4/334 [00:00<00:24, 13.58it/s][A[A

Evaluation:   2%|▏         | 7/334 [00:00<00:17, 19.13it/s][A[A

Evaluation:   3%|▎         | 10/334 [00:00<00:14, 22.19it/s][A[A

Evaluation:   4%|▍         | 13/334 [00:00<00:13, 24.16it/s][A[A

Evaluation:   5%|▌         | 17/334 [00:00<00:11, 26.52it/s][A[A

Evaluation:   6%|▌         | 20/334 [00:00<00:11, 27.23it/s][A[A

Evaluation:   7%|▋         | 23/334 [00:01<00:11, 26.88it/s][A[A

Evaluation:   8%|▊         | 26/334 [00:01<00:11, 26.63it/s][A[A

Evaluation:   9%|▊         | 29/334 [00:01<00:11, 27.26it/s][A[A

Evaluation:  10%|▉         | 32/334 [00:01<00:11, 25.79it/s][A[A

Evaluation:  10%|█         | 35/334 [00:01<00:11, 25.76it/s][A[A

Evaluation:  11%|█▏        | 38/334 [00:01<00:11, 26.76it/s][A[A

Evaluation:  12%|█▏        | 41/334 [00:01<00:10, 27.53it/s][A[A

Evaluation:  13%|█▎        |

>>> training loss: 1.2516, valid loss: 1.2397, valid accuracy score: 0.5527


 50%|█████     | 1/2 [07:21<07:21, 441.76s/it]

Training:   0%|          | 0/3002 [07:21<?, ?it/s]
  cpuset_checked))

Evaluation:   0%|          | 0/334 [00:00<?, ?it/s][A
Evaluation:   0%|          | 1/334 [00:00<01:08,  4.85it/s][A
Evaluation:   1%|          | 4/334 [00:00<00:23, 13.84it/s][A
Evaluation:   2%|▏         | 8/334 [00:00<00:15, 20.88it/s][A
Evaluation:   3%|▎         | 11/334 [00:00<00:13, 23.26it/s][A
Evaluation:   4%|▍         | 14/334 [00:00<00:12, 24.85it/s][A
Evaluation:   5%|▌         | 18/334 [00:00<00:11, 26.94it/s][A
Evaluation:   6%|▋         | 21/334 [00:00<00:11, 27.11it/s][A
Evaluation:   7%|▋         | 24/334 [00:01<00:11, 27.06it/s][A
Evaluation:   8%|▊         | 27/334 [00:01<00:11, 26.69it/s][A
Evaluation:   9%|▉         | 30/334 [00:01<00:11, 26.66it/s][A
Evaluation:  10%|▉         | 33/334 [00:01<00:11, 26.06it/s][A
Evaluation:  11%|█         | 36/334 [00:01<00:11, 26.20it/s][A
Evaluation:  12%|█▏        | 40/334 [00:01<00:10, 27.71it/s]

>>> training loss: 1.0262, valid loss: 1.2530, valid accuracy score: 0.5506



  cpuset_checked))

Evaluation:   0%|          | 1/334 [00:00<01:07,  4.91it/s][A
Evaluation:   1%|          | 4/334 [00:00<00:23, 13.98it/s][A
Evaluation:   2%|▏         | 7/334 [00:00<00:16, 19.39it/s][A
Evaluation:   3%|▎         | 10/334 [00:00<00:14, 22.42it/s][A
Evaluation:   4%|▍         | 13/334 [00:00<00:13, 24.38it/s][A
Evaluation:   5%|▍         | 16/334 [00:00<00:12, 26.00it/s][A
Evaluation:   6%|▌         | 19/334 [00:00<00:11, 26.90it/s][A
Evaluation:   7%|▋         | 22/334 [00:00<00:11, 27.26it/s][A
Evaluation:   7%|▋         | 25/334 [00:01<00:11, 25.80it/s][A
Evaluation:   8%|▊         | 28/334 [00:01<00:11, 26.23it/s][A
Evaluation:   9%|▉         | 31/334 [00:01<00:11, 26.15it/s][A
Evaluation:  10%|█         | 34/334 [00:01<00:11, 25.76it/s][A
Evaluation:  11%|█         | 37/334 [00:01<00:11, 26.27it/s][A
Evaluation:  12%|█▏        | 40/334 [00:01<00:10, 27.11it/s][A
Evaluation:  13%|█▎        | 43/334 [00:01<00:10, 26.59it/s][A
Evaluation:  14%|█▍   

>>> training loss: 1.0259, valid loss: 1.2588, valid accuracy score: 0.5459



  cpuset_checked))

Evaluation:   0%|          | 1/334 [00:00<01:11,  4.68it/s][A
Evaluation:   1%|          | 4/334 [00:00<00:23, 13.79it/s][A
Evaluation:   2%|▏         | 7/334 [00:00<00:17, 19.09it/s][A
Evaluation:   3%|▎         | 10/334 [00:00<00:14, 22.41it/s][A
Evaluation:   4%|▍         | 13/334 [00:00<00:13, 24.29it/s][A
Evaluation:   5%|▌         | 17/334 [00:00<00:11, 26.64it/s][A
Evaluation:   6%|▌         | 20/334 [00:00<00:11, 27.37it/s][A
Evaluation:   7%|▋         | 23/334 [00:01<00:11, 26.39it/s][A
Evaluation:   8%|▊         | 26/334 [00:01<00:11, 26.38it/s][A
Evaluation:   9%|▊         | 29/334 [00:01<00:11, 27.03it/s][A
Evaluation:  10%|▉         | 32/334 [00:01<00:11, 25.75it/s][A
Evaluation:  10%|█         | 35/334 [00:01<00:11, 25.71it/s][A
Evaluation:  11%|█▏        | 38/334 [00:01<00:11, 26.85it/s][A
Evaluation:  12%|█▏        | 41/334 [00:01<00:10, 27.46it/s][A
Evaluation:  13%|█▎        | 44/334 [00:01<00:10, 26.66it/s][A
Evaluation:  14%|█▍   

>>> training loss: 1.0276, valid loss: 1.2626, valid accuracy score: 0.5536



  cpuset_checked))

Evaluation:   0%|          | 1/334 [00:00<01:08,  4.86it/s][A
Evaluation:   1%|          | 4/334 [00:00<00:23, 14.02it/s][A
Evaluation:   2%|▏         | 7/334 [00:00<00:16, 19.46it/s][A
Evaluation:   3%|▎         | 10/334 [00:00<00:14, 22.45it/s][A
Evaluation:   4%|▍         | 13/334 [00:00<00:13, 23.99it/s][A
Evaluation:   5%|▍         | 16/334 [00:00<00:12, 25.74it/s][A
Evaluation:   6%|▌         | 19/334 [00:00<00:11, 26.52it/s][A
Evaluation:   7%|▋         | 22/334 [00:00<00:11, 26.90it/s][A
Evaluation:   7%|▋         | 25/334 [00:01<00:11, 25.93it/s][A
Evaluation:   8%|▊         | 28/334 [00:01<00:11, 26.57it/s][A
Evaluation:   9%|▉         | 31/334 [00:01<00:11, 26.39it/s][A
Evaluation:  10%|█         | 34/334 [00:01<00:11, 26.33it/s][A
Evaluation:  11%|█         | 37/334 [00:01<00:11, 26.38it/s][A
Evaluation:  12%|█▏        | 40/334 [00:01<00:10, 27.33it/s][A
Evaluation:  13%|█▎        | 43/334 [00:01<00:10, 27.05it/s][A
Evaluation:  14%|█▍   

>>> training loss: 1.0172, valid loss: 1.2763, valid accuracy score: 0.5499



  cpuset_checked))

Evaluation:   0%|          | 1/334 [00:00<01:08,  4.89it/s][A
Evaluation:   1%|          | 4/334 [00:00<00:23, 13.91it/s][A
Evaluation:   2%|▏         | 7/334 [00:00<00:16, 19.29it/s][A
Evaluation:   3%|▎         | 10/334 [00:00<00:14, 22.34it/s][A
Evaluation:   4%|▍         | 13/334 [00:00<00:13, 24.21it/s][A
Evaluation:   5%|▌         | 17/334 [00:00<00:11, 26.55it/s][A
Evaluation:   6%|▌         | 20/334 [00:00<00:11, 27.08it/s][A
Evaluation:   7%|▋         | 23/334 [00:00<00:11, 26.83it/s][A
Evaluation:   8%|▊         | 26/334 [00:01<00:11, 26.74it/s][A
Evaluation:   9%|▊         | 29/334 [00:01<00:11, 27.30it/s][A
Evaluation:  10%|▉         | 32/334 [00:01<00:11, 25.77it/s][A
Evaluation:  10%|█         | 35/334 [00:01<00:11, 25.78it/s][A
Evaluation:  11%|█▏        | 38/334 [00:01<00:11, 26.82it/s][A
Evaluation:  12%|█▏        | 41/334 [00:01<00:10, 27.25it/s][A
Evaluation:  13%|█▎        | 44/334 [00:01<00:11, 26.04it/s][A
Evaluation:  14%|█▍   

>>> training loss: 1.0577, valid loss: 1.2486, valid accuracy score: 0.5543



  cpuset_checked))

Evaluation:   0%|          | 1/334 [00:00<01:09,  4.79it/s][A
Evaluation:   1%|          | 4/334 [00:00<00:23, 14.01it/s][A
Evaluation:   2%|▏         | 7/334 [00:00<00:16, 19.61it/s][A
Evaluation:   3%|▎         | 10/334 [00:00<00:14, 22.65it/s][A
Evaluation:   4%|▍         | 13/334 [00:00<00:13, 24.54it/s][A
Evaluation:   5%|▍         | 16/334 [00:00<00:12, 26.20it/s][A
Evaluation:   6%|▌         | 20/334 [00:00<00:11, 27.79it/s][A
Evaluation:   7%|▋         | 23/334 [00:00<00:11, 27.09it/s][A
Evaluation:   8%|▊         | 26/334 [00:01<00:11, 26.84it/s][A
Evaluation:   9%|▊         | 29/334 [00:01<00:11, 27.28it/s][A
Evaluation:  10%|▉         | 32/334 [00:01<00:11, 25.76it/s][A
Evaluation:  10%|█         | 35/334 [00:01<00:11, 25.93it/s][A
Evaluation:  12%|█▏        | 39/334 [00:01<00:10, 27.45it/s][A
Evaluation:  13%|█▎        | 42/334 [00:01<00:10, 27.85it/s][A
Evaluation:  13%|█▎        | 45/334 [00:01<00:10, 26.78it/s][A
Evaluation:  14%|█▍   

>>> training loss: 1.0398, valid loss: 1.2086, valid accuracy score: 0.5663


100%|██████████| 2/2 [14:49<00:00, 444.60s/it]
Training:   0%|          | 0/3002 [07:27<?, ?it/s]


In [None]:
def predict(config, id2label, model, test_dataloader):
    test_iterator = tqdm(test_dataloader, desc='Predicting', total=len(test_dataloader))
    model.eval()
    test_preds = []
    with torch.no_grad():
        for batch in test_iterator:
            batch = {item: value.to(config['device']) for item, value in batch.items()}
            logits = model(**batch)[1]

            test_preds.append(logits.argmax(dim=-1).detach().cpu())
    test_preds = torch.cat(test_preds, dim=0).numpy()
    test_preds = [id2label[id_] for id_ in test_preds]
    test_df = pd.read_csv(config['test_file_path'], sep=',')
    test_df.insert(1, column='label', value=test_preds)
    test_df.drop(columns=['sentence'], inplace=True)
    
    return test_df

In [None]:
prediction = predict(config, id2label, model, test_dataloader)
actual = pd.read_csv(SYSPATH  + 'solution.csv')

  cpuset_checked))
Predicting: 100%|██████████| 625/625 [00:22<00:00, 27.71it/s]


In [None]:
accuracy_score(actual['label'], prediction['label'])

0.5723

In [None]:
recall_score(actual['label'], prediction['label'], average='weighted')

0.5723

In [None]:
precision_score(actual['label'], prediction['label'], average='weighted')

0.5742307101239282

In [None]:
f1_score(actual['label'], prediction['label'], average='weighted')

0.5722309621762148