In [None]:
!pip install -q transformers
!pip install -q torch_optimizer

[K     |████████████████████████████████| 3.1 MB 5.4 MB/s 
[K     |████████████████████████████████| 895 kB 35.2 MB/s 
[K     |████████████████████████████████| 59 kB 4.2 MB/s 
[K     |████████████████████████████████| 596 kB 35.8 MB/s 
[K     |████████████████████████████████| 3.3 MB 32.1 MB/s 
[K     |████████████████████████████████| 61 kB 403 kB/s 
[?25h

In [None]:
import numpy as np
import pandas as pd

from sklearn import metrics
from sklearn.metrics import log_loss

import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import set_seed

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

import os
import random
import re

import warnings
warnings.filterwarnings('ignore')

import torch_optimizer
from sklearn.model_selection import KFold

In [None]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
def seed_everything(seed: int):
    """Seeds and fixes every possible random state."""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    set_seed(seed)


SEED = 3407
seed_everything(SEED)

In [None]:
!nvidia-smi

Wed Nov 17 16:32:27 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P8    29W / 149W |      3MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Training & Validating

In [None]:
!wget -q -O train.csv https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/fmWGQJvwU5ejog

In [None]:
train_df = pd.read_csv("train.csv")
train_df['list'] = train_df[train_df.columns[2:]].values.tolist()

new_train_df = train_df[['text', 'list']].copy()
new_train_df.head()

Unnamed: 0,text,list
0,"Корова, видимо вставая, раздавила себе сосок. ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"
1,Корове 8 лет! Месяц назад промеж четвертей вым...,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]"
2,"Молоко течёт само у коровы. Что делать, если у...","[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0]"
3,У нетели болячки на вымени.\nЗдравствуйте. Нет...,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]"
4,"У меня первотелка, на днях отёл, у неё левый п...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"


In [None]:
!wget -q -O train_labels.json https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/0nJ2QTRb9-U7tA
!wget -q -O labeled_train_by_hand.json https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/Oq154PAxDGeIFg

In [None]:
train_labels = pd.read_json('train_labels.json').T
labeled_by_hand_df = pd.read_json('labeled_by_hand_version_2.json')
labeled_by_hand_df['id'] = labeled_by_hand_df['id'] - np.ones(len(labeled_by_hand_df['id']))
labeled_by_hand_df['id'] = labeled_by_hand_df['id'].astype(int)

In [None]:
labeled_by_hand_df  = labeled_by_hand_df[labeled_by_hand_df.id != 0].reset_index()

In [None]:
dct = {'text_id':[], 'span_text':[]}

for i in range(0, len(labeled_by_hand_df)):
    data = labeled_by_hand_df['annotations'][i][0]['result']
    id = labeled_by_hand_df.id[i]

    spans_text = []
    for span in data:
        spans_text.append(span['value']['text'])

    dct['span_text'].append(spans_text)
    dct['text_id'].append(id)


for i in range(0, 30):
    spans = train_labels.span[i]
    spans_text = []
    for span in spans:
        spans_text.append(
            train_df.text[i][span[0]:span[1]])
    
    dct['span_text'].append(spans_text)
    dct['text_id'].append(i)

In [None]:
dct = pd.DataFrame(dct)

In [None]:
ы

In [None]:

model_checkpoint = "cointegrated/rubert-tiny2"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

Downloading:   0%|          | 0.00/401 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.03M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.66M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
MAX_LEN = 1200
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4

In [None]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.list
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())
        
        
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=MAX_LEN,
            pad_to_max_length=True,
            truncation=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [None]:
train_size = 1
train_dataset = new_train_df.sample(frac=train_size, random_state=200)

valid_dataset = new_train_df.drop(train_dataset.index).reset_index(drop=True)

train_dataset = train_dataset.reset_index(drop=True)


print("FULL Dataset: {}".format(new_train_df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("VALID Dataset: {}".format(valid_dataset.shape))

training_set = CustomDataset(train_dataset, tokenizer, None)
validating_set = CustomDataset(valid_dataset, tokenizer, None)

FULL Dataset: (294, 2)
TRAIN Dataset: (294, 2)
VALID Dataset: (0, 2)


In [None]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32

g = torch.Generator()
g.manual_seed(SEED)

<torch._C.Generator at 0x7f573f3438f0>

In [None]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0,
                'worker_init_fn' : seed_worker,
                'generator': g
                }

valid_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0,
                'worker_init_fn' : seed_worker,
                'generator': g
                }

training_loader = DataLoader(training_set, **train_params)
if train_size != 1:
    validating_loader = DataLoader(validating_set, **valid_params)

In [None]:
from lsep_loss import LSEPLoss
def lsep_fn(outputs, targets):
    return LSEPLoss()(outputs, targets)

def loss_fn(outputs, targets):
    return torch.nn.MultiLabelSoftMarginLoss()(outputs, targets)

def bce_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [None]:
LR = 0.0001
EPOCH = 16

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=11, output_attentions=True)

model.to(device);

optimizer = torch.optim.Adam(params=model.parameters(), lr=LR)
# optimizer = torch_optimizer.RAdam(params=model.parameters(), lr=LR)

# optimizer = torch.optim.Adam([
#                 {'params': model.bert.parameters()},
#                 {'params': model.classifier.parameters(), 'lr': 1e-3}
#             ], lr=1e-4)

Downloading:   0%|          | 0.00/715 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112M [00:00<?, ?B/s]

Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not 

In [None]:
def train(epoch, loader, model):
    for ep in range(1, epoch+1):
        model.train()
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)

            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)

            outputs = model(ids, mask, token_type_ids)['logits']
            
            loss = lsep_fn(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch: {ep}, Loss:  {loss.item()}')

In [None]:
def log_loss_score(gt, pr):
    
    log_loss_ = 0
    
    gt = np.array(gt)
    
    for i in range(10):
        log_loss_ += log_loss(gt[:, i], pr[:, i])
        
    return log_loss_ / 10

    
def validation(model, loader):
    model.eval()

    fin_targets=[]
    fin_outputs=[]

    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)

            outputs = model(ids, mask, token_type_ids)['logits']

            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
            
    return fin_outputs, fin_targets

In [None]:
n = 5
kf = KFold(n_splits=n, random_state=200, shuffle=True)

In [None]:
score_list = []
for train_index, val_index in kf.split(new_train_df):
    train_d = new_train_df.iloc[train_index].reset_index(drop=True)
    val_d = new_train_df.iloc[val_index].reset_index(drop=True)

    training_d_set = CustomDataset(train_d, tokenizer, None)
    validating_d_set = CustomDataset(val_d, tokenizer, None)

    training_d_loader = DataLoader(training_d_set, **train_params)
    validating_d_loader = DataLoader(validating_d_set, **valid_params)

    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=11, output_attentions=True)
    model.to(device);

    optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-4)

    train(EPOCH, training_d_loader, model)

    outputs, targets = validation(model, validating_d_loader)
    outputs = np.array(outputs)
    lg = log_loss_score(targets, outputs)

    score = (1 - lg) * 0.8
    print(score)
    score_list.append(score)

Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not 

Epoch: 1, Loss:  4.221324920654297, Val: 0
Epoch: 2, Loss:  3.839358329772949, Val: 0
Epoch: 3, Loss:  2.786884307861328, Val: 0
Epoch: 4, Loss:  3.629636764526367, Val: 0
Epoch: 5, Loss:  2.422412395477295, Val: 0
Epoch: 6, Loss:  2.261437177658081, Val: 0
Epoch: 7, Loss:  1.5427753925323486, Val: 0
Epoch: 8, Loss:  0.9135900735855103, Val: 0
Epoch: 9, Loss:  1.135256290435791, Val: 0
Epoch: 10, Loss:  0.8144914507865906, Val: 0
Epoch: 11, Loss:  0.49697932600975037, Val: 0
Epoch: 12, Loss:  0.3323259949684143, Val: 0
Epoch: 13, Loss:  0.9386532306671143, Val: 0
Epoch: 14, Loss:  0.26677531003952026, Val: 0
Epoch: 15, Loss:  0.09529383480548859, Val: 0
Epoch: 16, Loss:  0.5036966800689697, Val: 0
0.6203218316624427


Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not 

Epoch: 1, Loss:  3.8330276012420654, Val: 0
Epoch: 2, Loss:  3.518420934677124, Val: 0
Epoch: 3, Loss:  2.786388397216797, Val: 0
Epoch: 4, Loss:  2.3488965034484863, Val: 0
Epoch: 5, Loss:  2.945274591445923, Val: 0
Epoch: 6, Loss:  2.3112375736236572, Val: 0
Epoch: 7, Loss:  1.1420576572418213, Val: 0
Epoch: 8, Loss:  1.4509105682373047, Val: 0
Epoch: 9, Loss:  1.9048237800598145, Val: 0
Epoch: 10, Loss:  0.29596054553985596, Val: 0
Epoch: 11, Loss:  0.6307318210601807, Val: 0
Epoch: 12, Loss:  0.582968533039093, Val: 0
Epoch: 13, Loss:  0.1424594670534134, Val: 0
Epoch: 14, Loss:  0.0691007599234581, Val: 0
Epoch: 15, Loss:  0.08400192856788635, Val: 0
Epoch: 16, Loss:  0.048614803701639175, Val: 0
0.6106682564735867


Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not 

Epoch: 1, Loss:  3.856786012649536, Val: 0
Epoch: 2, Loss:  3.5951344966888428, Val: 0
Epoch: 3, Loss:  3.6209750175476074, Val: 0
Epoch: 4, Loss:  2.385484457015991, Val: 0
Epoch: 5, Loss:  1.6929454803466797, Val: 0
Epoch: 6, Loss:  2.2049877643585205, Val: 0
Epoch: 7, Loss:  0.9845533967018127, Val: 0
Epoch: 8, Loss:  0.744962215423584, Val: 0
Epoch: 9, Loss:  0.8585320711135864, Val: 0
Epoch: 10, Loss:  1.5561460256576538, Val: 0
Epoch: 11, Loss:  1.1341124773025513, Val: 0
Epoch: 12, Loss:  0.1724914163351059, Val: 0
Epoch: 13, Loss:  0.6010246276855469, Val: 0
Epoch: 14, Loss:  0.16780896484851837, Val: 0
Epoch: 15, Loss:  0.08547326922416687, Val: 0
Epoch: 16, Loss:  0.08445053547620773, Val: 0
0.6186972263090921


Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not 

Epoch: 1, Loss:  3.484072208404541, Val: 0
Epoch: 2, Loss:  3.103679656982422, Val: 0
Epoch: 3, Loss:  3.1949474811553955, Val: 0
Epoch: 4, Loss:  2.2991726398468018, Val: 0
Epoch: 5, Loss:  1.5728552341461182, Val: 0
Epoch: 6, Loss:  1.9519362449645996, Val: 0
Epoch: 7, Loss:  1.6056864261627197, Val: 0
Epoch: 8, Loss:  1.1315994262695312, Val: 0
Epoch: 9, Loss:  1.060767650604248, Val: 0
Epoch: 10, Loss:  0.6715308427810669, Val: 0
Epoch: 11, Loss:  0.4026953876018524, Val: 0
Epoch: 12, Loss:  0.44787079095840454, Val: 0
Epoch: 13, Loss:  0.47051146626472473, Val: 0
Epoch: 14, Loss:  0.17706897854804993, Val: 0
Epoch: 15, Loss:  0.07454472780227661, Val: 0
Epoch: 16, Loss:  0.47691094875335693, Val: 0
0.5504753069634457


Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not 

Epoch: 1, Loss:  4.178610324859619, Val: 0
Epoch: 2, Loss:  3.8612241744995117, Val: 0
Epoch: 3, Loss:  3.3726377487182617, Val: 0
Epoch: 4, Loss:  2.693899393081665, Val: 0
Epoch: 5, Loss:  2.9555013179779053, Val: 0
Epoch: 6, Loss:  3.001298189163208, Val: 0
Epoch: 7, Loss:  2.3781933784484863, Val: 0
Epoch: 8, Loss:  1.4363811016082764, Val: 0
Epoch: 9, Loss:  1.1124293804168701, Val: 0
Epoch: 10, Loss:  0.9060420989990234, Val: 0
Epoch: 11, Loss:  0.7493139505386353, Val: 0
Epoch: 12, Loss:  0.5001494884490967, Val: 0
Epoch: 13, Loss:  0.2776065766811371, Val: 0
Epoch: 14, Loss:  1.033622145652771, Val: 0
Epoch: 15, Loss:  0.471049964427948, Val: 0
Epoch: 16, Loss:  0.14120103418827057, Val: 0
0.6469610693668513


In [None]:
np.mean(score_list)

0.6094247381550837

# Submission


In [None]:
!wget -q -O test.csv https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/Wo70d4_PAwujqA

In [None]:
test = pd.read_csv('test.csv')
test_spans = pd.read_json('test_df_with_spans247_final.json')
test_spans_not_for_clf = pd.read_json('test_df_with_spans247_test_like_229_cased.json')

In [None]:
test_dct = {'text_id':[], 'span_text':[]}

for i in range(0, len(test_spans)):
    data = test_spans.span[i]
    id = test_spans.text_id[i]

    spans_text = []
    for span in data:
        text = test_spans.text[i][span[0]:span[1]]
        if text not in spans_text:
            spans_text.append(text)
    
    test_dct['span_text'].append(spans_text)
    test_dct['text_id'].append(id)

test_dct = pd.DataFrame(test_dct)

In [None]:
test['list'] = test[test.columns[2:]].values.tolist()

new_df_test = test[['text', 'list']].copy()
new_df_test.head()

Unnamed: 0,text,list
0,Понос у месячных телят. Подскажите методы и сп...,[]
1,"Понос у телят, чем лечить? \nЧем можно вылечит...",[]
2,По какой причине у телёнка отнимаются ноги?\nП...,[]
3,"Срочно! Ребятки, помогите, корову что-то укуси...",[]
4,"Сгустки у коровы.\nЗдравствуйте, помогите пожа...",[]


In [None]:
for i in range(0, len(new_df_test)):
    new_df_test['text'][i] = " ".join(test_dct['span_text'][i]) +  " | " + new_df_test['text'][i]

In [None]:
test_dataset = new_df_test.reset_index(drop=True)

print("TEST Dataset: {}".format(test_dataset.shape))

testing_set = CustomDataset(test_dataset, tokenizer, None)

TEST Dataset: (99, 2)


In [None]:
test_params = {'batch_size': 4,
                'shuffle': False,
                'num_workers': 0,
                'worker_init_fn' : seed_worker,
                'generator': g
                }

testing_loader = DataLoader(testing_set, **test_params)

In [None]:
model.eval()
prediction = []

with torch.no_grad():
    for _, data in enumerate(testing_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)

        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)['logits']

        prediction.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())

In [None]:
submission_columns = ['text_id'] + list(train_df.columns[2:-2])
submission = pd.concat([test['text_id'], pd.DataFrame(np.array(prediction)[:, :10])], axis=1)
submission.columns = submission_columns

In [None]:
submission_json = {str(k): {"span": test_spans_not_for_clf[test_spans_not_for_clf.text_id == k].span.item(), "label": list(v.values())} \
                   for k,v in submission.set_index('text_id').to_dict('index').items()}

In [None]:
import json
with open('submisson_final_vers4.json', 'w') as final_submit:
    json.dump(submission_json, final_submit, indent=4)