In [1]:
import pandas as pd
import torch
from torch import nn
import numpy as np
from transformers import AutoTokenizer,AutoModelForSequenceClassification
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import f1_score
import random
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_data = pd.read_csv('dataset/train.tsv', sep='\t')
val_data = pd.read_csv('dataset/val.tsv', sep='\t')
test_data = pd.read_csv('dataset/test.tsv', sep='\t')
train_data = pd.concat([train_data, val_data], axis=0)
train_data

Unnamed: 0,id,source,utterance,classes
0,356,6,Start by reading the preliminary information,OTHER
1,357,6,"""Hello Ms. Klein, I am responsible for you in ...",OTHER
2,358,6,Since yesterday afternoon?,OTHER
3,359,6,"Hmm, but the shortness of breath only came whi...",OTHER
4,360,6,Have you ever laid down during the day when yo...,OTHER
...,...,...,...,...
402,1526,23,Is this the right career for you? Do you have ...,SF
403,1527,23,Your nightmares aren't about your work either?...,OTHER
404,1528,23,But that started at some point? So it's been m...,OTHER
405,1529,23,Are there any illnesses in your family?,SF


In [3]:
num_classes = 6 
model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tag_list=["AM","MS","OTHER","PH","SF","SR"]
tag_to_idx={}
for i in range(len(tag_list)):
    tag_to_idx[tag_list[i]]=i
    
class CustomDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        utterance = str(self.data.iloc[idx]['utterance'])
        labels = self.data.iloc[idx]['classes'] 
        labels = self.label_encoded(labels)
        encoding = self.tokenizer.encode_plus(
            utterance,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(labels, dtype=torch.float32)
        }
    def label_encoded(self,labels):
        encode_label = np.zeros(6,dtype=int)
        list_label = labels.split(",")
        for i in list_label:
            idx = tag_to_idx[i]
            encode_label[idx] = 1
        return encode_label 

max_len = 256
batch_size = 8

train_dataset = CustomDataset(train_data, tokenizer, max_len)
val_dataset = CustomDataset(val_data, tokenizer, max_len)



train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)





In [4]:
import torch

if torch.cuda.is_available():       
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [5]:
from transformers import  get_cosine_schedule_with_warmup
from torch.optim import AdamW 
def init_model(model,epochs):
    model.to(device)
    optimizer = AdamW(model.parameters(),lr=2e-5,eps=1e-6,betas=(0.9, 0.999))
    total_steps = len(train_dataloader) * epochs
    scheduler = get_cosine_schedule_with_warmup(optimizer,num_warmup_steps=500,num_training_steps=total_steps)
    return model , optimizer , scheduler

In [6]:
weights = [1,1,0.2,1,1,1]
weights = torch.tensor(weights).to(device)
loss_fn = nn.BCEWithLogitsLoss()
def evalute(dataloader,model):
    model.eval()
    y_pred = []
    y_target = []
    with torch.no_grad():
        for _, batch in enumerate(dataloader):
            b_input_ids = batch['input_ids'].to(device)
            b_attn_mask = batch['attention_mask'].to(device)
            b_labels = batch['labels'].to(device)
            logits = model(b_input_ids, b_attn_mask)
            y_pred.extend(torch.sigmoid(logits.logits).cpu().detach().numpy().tolist())         
            y_target.extend(b_labels.cpu().detach().numpy().tolist())
    y_preds = (np.array(y_pred)>0.5).astype(int)
    marco_f1= f1_score(y_target,y_preds,average='macro')
    # print("marco f1 score : ",marco_f1)
    return marco_f1
def train(model,train_dataloader,val_dataloader,optimizer,scheduler,path,epochs,evaluation):
    max_score = 0
    print(f"{'Epoch':^7} | {'Train Loss':^12} | {'F1 score':^9} | {'Elapsed':^9}")
    for epoch_i in range(epochs):
        t0_epoch = time.time()
        total_loss = 0
        for step, batch in enumerate(train_dataloader):
            model.train()
            input_ids = batch['input_ids'].to(device)
            attn_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attn_mask, labels=labels)
            logits = outputs.logits
            loss = loss_fn(logits, labels)
            total_loss += loss.item()
            loss.backward()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            # torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()
            scheduler.step()    
            model.zero_grad()
            
        avg_train_loss = total_loss / len(train_dataloader)
        if evaluation == True:
            time_elapsed = time.time() - t0_epoch
            score = evalute(val_dataloader,model)
            if score > max_score:
                torch.save(model.state_dict(),path)
                # print('save model')
                max_score = score
            print(f"{epoch_i + 1:^7} | {avg_train_loss:^12.6f} | {score:^9.6f} | {time_elapsed:^9.2f}")
        print("")
    print("best score: ",max_score)



In [7]:
model_list = []
for i in range(15):
    epochs = 10
    model = AutoModelForSequenceClassification.from_pretrained(model_name,num_labels=num_classes)
    bert_classifier, optimizer, scheduler = init_model(model,epochs=epochs)
    path = f"w_weight{i}.pth"
    train(bert_classifier, train_dataloader, val_dataloader,optimizer,scheduler ,path,epochs=epochs, evaluation=True)
    model_list.append(model)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.455426   | 0.359184  |   25.30  

   2    |   0.267134   | 0.579066  |   25.12  

   3    |   0.186373   | 0.802905  |   25.12  

   4    |   0.139381   | 0.857821  |   24.91  

   5    |   0.112082   | 0.893845  |   24.87  

   6    |   0.082744   | 0.929405  |   24.99  

   7    |   0.067112   | 0.950980  |   24.99  

   8    |   0.056745   | 0.958547  |   25.04  

   9    |   0.050904   | 0.959204  |   25.02  

  10    |   0.047139   | 0.963462  |   25.09  

best score:  0.9634623000657211


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.445409   | 0.153328  |   25.08  

   2    |   0.280010   | 0.697994  |   25.08  

   3    |   0.190338   | 0.810380  |   25.08  

   4    |   0.139049   | 0.869351  |   25.05  

   5    |   0.101937   | 0.896669  |   24.66  

   6    |   0.080233   | 0.945969  |   25.07  

   7    |   0.064078   | 0.962625  |   25.04  

   8    |   0.055440   | 0.962510  |   25.11  

   9    |   0.050203   | 0.964070  |   25.02  

  10    |   0.045236   | 0.961114  |   24.39  

best score:  0.9640699658605593


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.443088   | 0.123077  |   25.00  

   2    |   0.279471   | 0.673096  |   25.05  

   3    |   0.190385   | 0.781777  |   24.97  

   4    |   0.138291   | 0.848288  |   25.00  

   5    |   0.105600   | 0.896505  |   25.03  

   6    |   0.079883   | 0.934363  |   25.07  

   7    |   0.066515   | 0.949486  |   25.06  

   8    |   0.055982   | 0.956073  |   25.04  

   9    |   0.048644   | 0.965029  |   25.05  

  10    |   0.046596   | 0.963675  |   24.94  

best score:  0.9650287425240235


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.457994   | 0.124066  |   24.81  

   2    |   0.298475   | 0.675603  |   25.03  

   3    |   0.196961   | 0.835213  |   25.01  

   4    |   0.136942   | 0.888423  |   25.05  

   5    |   0.102976   | 0.912664  |   25.05  

   6    |   0.079759   | 0.929790  |   25.06  

   7    |   0.064073   | 0.940005  |   25.01  

   8    |   0.055600   | 0.947066  |   25.00  

   9    |   0.047919   | 0.954752  |   25.02  

  10    |   0.044308   | 0.956464  |   25.03  

best score:  0.9564637251926119


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.438998   | 0.196548  |   24.99  

   2    |   0.277621   | 0.619761  |   25.06  

   3    |   0.192805   | 0.790778  |   25.00  

   4    |   0.145628   | 0.825238  |   24.86  

   5    |   0.114507   | 0.889540  |   25.01  

   6    |   0.089674   | 0.921067  |   25.04  

   7    |   0.072389   | 0.944456  |   25.01  

   8    |   0.060238   | 0.955345  |   25.08  

   9    |   0.053861   | 0.963382  |   25.07  

  10    |   0.050157   | 0.963382  |   25.17  

best score:  0.9633815040016871


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.445202   | 0.121274  |   24.90  

   2    |   0.292268   | 0.598174  |   25.09  

   3    |   0.198114   | 0.771699  |   24.64  

   4    |   0.142934   | 0.852276  |   25.08  

   5    |   0.105097   | 0.888667  |   25.13  

   6    |   0.086121   | 0.931431  |   25.17  

   7    |   0.067958   | 0.949588  |   25.11  

   8    |   0.057961   | 0.955989  |   25.06  

   9    |   0.050002   | 0.956051  |   25.08  

  10    |   0.048514   | 0.956860  |   25.13  

best score:  0.9568600361168675


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.450384   | 0.120198  |   24.87  

   2    |   0.288392   | 0.658644  |   24.21  

   3    |   0.192405   | 0.792902  |   25.01  

   4    |   0.140690   | 0.847920  |   25.01  

   5    |   0.111932   | 0.883597  |   24.92  

   6    |   0.085108   | 0.914420  |   24.76  

   7    |   0.069786   | 0.937754  |   24.90  

   8    |   0.061098   | 0.951267  |   24.98  

   9    |   0.053326   | 0.959681  |   24.66  

  10    |   0.050003   | 0.959681  |   24.60  

best score:  0.9596813182443104


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.457152   | 0.116366  |   25.03  

   2    |   0.291832   | 0.660338  |   25.04  

   3    |   0.193489   | 0.779600  |   24.89  

   4    |   0.140483   | 0.823199  |   25.03  

   5    |   0.103745   | 0.902313  |   25.02  

   6    |   0.080398   | 0.929390  |   25.03  

   7    |   0.066071   | 0.954399  |   25.00  

   8    |   0.053470   | 0.959024  |   25.03  

   9    |   0.048846   | 0.966702  |   25.04  

  10    |   0.044158   | 0.968325  |   25.05  

best score:  0.9683248182768422


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.450990   | 0.123667  |   25.11  

   2    |   0.324525   | 0.448498  |   25.14  

   3    |   0.224309   | 0.758295  |   25.08  

   4    |   0.163174   | 0.822042  |   25.01  

   5    |   0.123893   | 0.860791  |   25.10  

   6    |   0.097112   | 0.899014  |   25.00  

   7    |   0.081784   | 0.930565  |   25.08  

   8    |   0.066890   | 0.945297  |   25.13  

   9    |   0.058132   | 0.949810  |   25.04  

  10    |   0.056099   | 0.951750  |   25.02  

best score:  0.9517497704316634


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.452114   | 0.155369  |   25.01  

   2    |   0.282009   | 0.680537  |   25.07  

   3    |   0.194315   | 0.802372  |   25.03  

   4    |   0.137307   | 0.836478  |   24.67  

   5    |   0.107195   | 0.910047  |   25.07  

   6    |   0.083758   | 0.933651  |   25.07  

   7    |   0.066623   | 0.945339  |   25.10  

   8    |   0.057326   | 0.961374  |   25.12  

   9    |   0.050581   | 0.964471  |   25.11  

  10    |   0.047286   | 0.965493  |   24.84  

best score:  0.9654931742772148


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.456252   | 0.094595  |   24.70  

   2    |   0.310408   | 0.574401  |   25.10  

   3    |   0.221612   | 0.742196  |   25.10  

   4    |   0.168353   | 0.819720  |   24.91  

   5    |   0.132952   | 0.845227  |   24.29  

   6    |   0.104168   | 0.893921  |   24.97  

   7    |   0.081875   | 0.909027  |   25.04  

   8    |   0.068690   | 0.931946  |   24.92  

   9    |   0.060670   | 0.931568  |   25.08  

  10    |   0.057032   | 0.931643  |   24.87  

best score:  0.9319456393143078


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.447816   | 0.114996  |   25.09  

   2    |   0.296782   | 0.551500  |   25.07  

   3    |   0.202058   | 0.769935  |   25.10  

   4    |   0.156782   | 0.834250  |   25.08  

   5    |   0.117233   | 0.888918  |   25.05  

   6    |   0.094090   | 0.897027  |   25.10  

   7    |   0.078933   | 0.920986  |   24.93  

   8    |   0.066387   | 0.936963  |   24.94  

   9    |   0.059546   | 0.949267  |   25.11  

  10    |   0.056836   | 0.947685  |   25.09  

best score:  0.949267198664789


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.454921   | 0.115717  |   25.11  

   2    |   0.290890   | 0.636284  |   25.03  

   3    |   0.196743   | 0.775691  |   25.08  

   4    |   0.153940   | 0.840033  |   25.09  

   5    |   0.120065   | 0.860979  |   25.05  

   6    |   0.091093   | 0.906707  |   24.98  

   7    |   0.069914   | 0.941420  |   24.99  

   8    |   0.060896   | 0.952266  |   25.03  

   9    |   0.052625   | 0.955804  |   24.78  

  10    |   0.049422   | 0.957530  |   24.70  

best score:  0.9575303385887938


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.458347   | 0.120459  |   24.97  

   2    |   0.289441   | 0.528455  |   24.89  

   3    |   0.210908   | 0.774943  |   25.11  

   4    |   0.162373   | 0.845530  |   25.13  

   5    |   0.119272   | 0.881308  |   25.12  

   6    |   0.091046   | 0.906263  |   25.14  

   7    |   0.078660   | 0.940096  |   25.07  

   8    |   0.065308   | 0.947408  |   25.14  

   9    |   0.056325   | 0.951958  |   25.13  

  10    |   0.052300   | 0.951958  |   25.00  

best score:  0.9519579009409477


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 Epoch  |  Train Loss  | F1 score  |  Elapsed 
   1    |   0.450514   | 0.174143  |   25.03  

   2    |   0.298316   | 0.509090  |   25.03  

   3    |   0.203539   | 0.744770  |   24.80  

   4    |   0.159381   | 0.822267  |   25.21  

   5    |   0.120653   | 0.868048  |   25.14  

   6    |   0.095058   | 0.915715  |   25.14  

   7    |   0.077401   | 0.927647  |   25.17  

   8    |   0.067333   | 0.940945  |   25.06  

   9    |   0.060306   | 0.944802  |   25.07  

  10    |   0.054952   | 0.944802  |   25.08  

best score:  0.9448023213641658


In [7]:
def essem_evalute(dataloader,model_list):
    for i in range(len(model_list)):
        model_list[i].eval()
    y_pred = []
    y_target = []
    with torch.no_grad():
        for step, batch in enumerate(dataloader):
            b_input_ids = batch['input_ids'].to(device)
            b_attn_mask = batch['attention_mask'].to(device)
            b_labels = batch['labels'].to(device)
            tmp = 0
            for i in range(len(model_list)):
                logits = model_list[i](b_input_ids, b_attn_mask)
                tmp += torch.sigmoid(logits.logits)
            tmp = tmp / len(model_list)
            y_pred.extend(tmp.cpu().detach().numpy().tolist())         
            y_target.extend(b_labels.cpu().detach().numpy().tolist())
    y_preds = (np.array(y_pred)>0.5).astype(int)
    marco_f1= f1_score(y_target,y_preds,average='macro')
    # print("marco f1 score : ",marco_f1)
    return marco_f1
print(essem_evalute(val_dataloader,model_list))

NameError: name 'model_list' is not defined

In [8]:
class TestDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        ids = self.data.iloc[idx]['id']
        utterance = str(self.data.iloc[idx]['utterance'])
        encoding = self.tokenizer.encode_plus(
            utterance,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'id' : ids,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }
test_dataset = TestDataset(test_data, tokenizer, max_len)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [9]:
prdict_model_list=[]
for i in range(15):
    path = f'./w_weight{i}.pth'
    bert_classifier = AutoModelForSequenceClassification.from_pretrained(model_name,num_labels=num_classes).to(device)
    bert_classifier.load_state_dict(torch.load(path))
    prdict_model_list.append(bert_classifier)
    bert_classifier.eval()
y_pred = []
ids = []
with torch.no_grad():
    for batch  in test_dataloader:
        id_0 = batch['id'].cpu().item()
        ids.append(id_0)
        b_input_ids = batch['input_ids'].to(device)
        b_attn_mask = batch['attention_mask'].to(device)
        tmp = 0
        for i in range(len(prdict_model_list)):
            logits = prdict_model_list[i](b_input_ids, b_attn_mask)
            
            tmp += torch.sigmoid(logits.logits)
        tmp = tmp / len(prdict_model_list)
        y_pred.extend(tmp.cpu().detach().numpy().tolist())  

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pooler.dense.weight', 'pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pooler.dense.weight', 'pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'pooler.dense.weight', 'pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to

In [14]:
y_preds = (np.array(y_pred) > 0.6).astype(int)
# y_preds

In [15]:
df = pd.DataFrame(y_preds, columns=tag_list)
df_id =pd.DataFrame(ids, columns=["id"])
merged_df = pd.concat([df_id, df], axis=1)
# merged_df

In [16]:
data_rows = merged_df.to_dict(orient='records')

In [17]:
import csv
with open("submission.csv", 'w', newline='') as csvfile:
    fieldnames=["id","AM","MS","OTHER","PH","SF","SR"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(data_rows)