https://luv-bansal.medium.com/fine-tuning-bert-for-text-classification-in-pytorch-503d97342db2 (code below)


https://huggingface.co/transformers/v3.2.0/custom_datasets.html 

In [11]:
import pandas as pd
import numpy as np
import transformers
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn.functional as F
from torchsummary import summary #what is that, it works ?
from tqdm import tqdm
from sklearn.model_selection import train_test_split


In [12]:
#import the dataset and create a trainig and testing and validation set


isarcasm_data = pd.read_csv('isarcasm2022.csv')

text = isarcasm_data.iloc[: , 1]
isitsarcasm = isarcasm_data.iloc[: , 2]

print(text)
print(isitsarcasm)



0       The only thing I got from college is a caffein...
1       I love it when professors draw a big question ...
2       Remember the hundred emails from companies whe...
3       Today my pop-pop told me I was not “forced” to...
4       @VolphanCarol @littlewhitty @mysticalmanatee I...
                              ...                        
3462    The population spike in Chicago in 9 months is...
3463    You'd think in the second to last English clas...
3464    I’m finally surfacing after a holiday to Scotl...
3465    Couldn't be prouder today. Well done to every ...
3466    Overheard as my 13 year old games with a frien...
Name: tweet, Length: 3467, dtype: object
0       1
1       1
2       1
3       1
4       1
       ..
3462    0
3463    0
3464    0
3465    0
3466    0
Name: sarcastic, Length: 3467, dtype: int64


In [13]:
#create the class that represent the dataset

class IsarcasmDataset(Dataset):
   
    def __init__(self, csv_file, root_dir, transform=None, train_size=0.7, test_size=0.15, val_size=0.15):
        """
        Arguments:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.rawisarcasmData = pd.read_csv(csv_file)
        self.isarcasmData = self.rawisarcasmData.sample(frac=1).reset_index(drop=True) #randomly shuffle the set
        self.root_dir = root_dir
        self.transform = transform
        
        train_data, temp_data = train_test_split(self.isarcasmData, test_size=test_size+val_size, random_state=42)
        test_data, val_data = train_test_split(temp_data, test_size=val_size/(test_size+val_size), random_state=42)

        self.train_set = train_data
        self.test_set = test_data
        self.validation_set = val_data

        self.save_sets()
        
       
    def __len__(self):
        return len(self.isarcasmData)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        text = self.isarcasmData.iloc[idx , 1]
        label = self.isarcasmData.iloc[idx , 2]
        
        sample = {'tweet': text, 'label': label}

        if self.transform:
            sample = self.transform(sample)

        return sample
    
    def save_sets(self):
       
        # Save the datasets to separate CSV files
        self.train_set.to_csv('train_dataset.csv', index=False)
        self.test_set.to_csv('test_dataset.csv', index=False)
        self.validation_set.to_csv('val_dataset.csv', index=False)


In [14]:
#instantiate the class

Isarcasm_dataset = IsarcasmDataset(csv_file='isarcasm2022.csv',
                                    root_dir='/')

print(Isarcasm_dataset.__len__()) 
Isarcasm_dataset.__getitem__(4)


#print(Isarcasm_dataset.test_set)

3467


{'tweet': 'white bitches vomiting on my shoes,, please take a step back before i elbow your face and one inch punch the fetus growing inside of you',
 'label': 0}

In [15]:
#dataloader 

#dataloader = DataLoader(transformed_dataset, batch_size=4,
                        #shuffle=True, num_workers=0)

In [16]:
class BertDataset(Dataset):
    def __init__(self, tokenizer,max_length):
        super(BertDataset, self).__init__()
        #self.root_dir=root_dir
        self.train_csv=pd.read_csv('val_dataset.csv')
        self.tokenizer=tokenizer
        self.target=self.train_csv.iloc[:,2] #label in column 2
        self.max_length=max_length
        
    def __len__(self):
        return len(self.train_csv)
    
    def __getitem__(self, index):
        
        text1 = self.train_csv.iloc[index,1] #text im column 1
        
        inputs = self.tokenizer.encode_plus( 
            
        #tokenizer settings https://stackoverflow.com/questions/61708486/whats-difference-between-tokenizer-encode-and-tokenizer-encode-plus-in-hugging
        #https://huggingface.co/docs/transformers/en/main_classes/tokenizer   
        #https://huggingface.co/docs/transformers/v4.40.1/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode_plus
        
            text1 , #sequnce to be encoded
            None,  #text_pair ??
            pad_to_max_length=True,
            add_special_tokens=True, #https://stackoverflow.com/questions/71679626/what-is-so-special-about-special-tokens
            return_attention_mask=True,
            max_length=self.max_length,
        )
        
        # encodes the text using the BERT tokenizer's encode_plus method, which returns the input IDs, token type IDs, and attention mask.
        ids = inputs["input_ids"] #not really understood this part...
        token_type_ids = inputs["token_type_ids"] #DOES THIS PART NEED TO BE CHANGED FOR THE TEST TO WORK ?
        mask = inputs["attention_mask"]

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'target': torch.tensor(self.train_csv.iloc[index, 2], dtype=torch.long) #label im column 2
            }
tokenizer = transformers.BertTokenizer.from_pretrained("bert-base-uncased") 

'''
#in an other project they use that
DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

'''


dataset= BertDataset(tokenizer, max_length=100) #instanciate the dataset, adapt
dataloader=DataLoader(dataset=dataset,batch_size=32) #from, torch adapt

In line 4, we have initialized our pre-trained ‘bert-base-uncased’ BERT model from Hugging face library and followed by initializing our linear dense layer for classifying movie reviews.

Here, we use BCEWithLogitsLoss which combines a Sigmoid layer and the BCELoss in one single class because this version is more numerically stable than using a plain Sigmoid followed by a BCELoss

In [17]:
#https://huggingface.co/docs/transformers/en/model_doc/bert
class BERT(nn.Module):
    def __init__(self):
        super(BERT, self).__init__()
        self.bert_model = transformers.BertModel.from_pretrained("bert-base-uncased")
        self.out = nn.Linear(768, 1)
        
    def forward(self,ids,mask,token_type_ids):
        _,o2= self.bert_model(ids,attention_mask=mask,token_type_ids=token_type_ids, return_dict=False)
        
        out= self.out(o2)
        
        return out
    
model=BERT()

loss_fn = nn.BCEWithLogitsLoss()

#Initialize Optimizer
optimizer= optim.Adam(model.parameters(),lr= 0.0001)


irst, we do not retrain our pre-trained BERT and train only the last linear dense layer.
For this, we need to define it as follows :

In [18]:
for param in model.bert_model.parameters():
    param.requires_grad = False #fine tuning only not from 0

In [19]:
def finetune(epochs,dataloader,model,loss_fn,optimizer):
    model.train()
    for  epoch in range(epochs):
        print(epoch)
        
        loop=tqdm(enumerate(dataloader),leave=False,total=len(dataloader))
        for batch, dl in loop:
            ids=dl['ids']
            token_type_ids=dl['token_type_ids']
            mask= dl['mask']
            label=dl['target']
            label = label.unsqueeze(1)
            
            optimizer.zero_grad()
            
            output=model(
                ids=ids,
                mask=mask,
                token_type_ids=token_type_ids)
            label = label.type_as(output)

            loss=loss_fn(output,label)
            loss.backward()
            
            optimizer.step()
            
            pred = np.where(output >= 0, 1, 0)

            num_correct = sum(1 for a, b in zip(pred, label) if a[0] == b[0])
            num_samples = pred.shape[0]
            accuracy = num_correct/num_samples
            
            print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
            
            # Show progress while training
            loop.set_description(f'Epoch={epoch}/{epochs}')
            loop.set_postfix(loss=loss.item(),acc=accuracy)

    return model

In [20]:
model=finetune(5, dataloader, model, loss_fn, optimizer)

0


  0%|          | 0/17 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch=0/5:   6%|▌         | 1/17 [00:04<01:08,  4.27s/it, acc=0.781, loss=0.607]

Got 25 / 32 with accuracy 78.12


Epoch=0/5:  12%|█▏        | 2/17 [00:08<01:03,  4.21s/it, acc=0.875, loss=0.565]

Got 28 / 32 with accuracy 87.50


Epoch=0/5:  18%|█▊        | 3/17 [00:12<00:59,  4.23s/it, acc=0.719, loss=0.629]

Got 23 / 32 with accuracy 71.88


Epoch=0/5:  24%|██▎       | 4/17 [00:16<00:53,  4.14s/it, acc=0.688, loss=0.633]

Got 22 / 32 with accuracy 68.75


Epoch=0/5:  29%|██▉       | 5/17 [00:21<00:50,  4.20s/it, acc=0.844, loss=0.56] 

Got 27 / 32 with accuracy 84.38


Epoch=0/5:  35%|███▌      | 6/17 [00:26<00:52,  4.74s/it, acc=0.75, loss=0.609]

Got 24 / 32 with accuracy 75.00


Epoch=0/5:  41%|████      | 7/17 [00:32<00:49,  4.96s/it, acc=0.688, loss=0.623]

Got 22 / 32 with accuracy 68.75


Epoch=0/5:  47%|████▋     | 8/17 [00:39<00:50,  5.65s/it, acc=0.844, loss=0.535]

Got 27 / 32 with accuracy 84.38


Epoch=0/5:  53%|█████▎    | 9/17 [00:44<00:45,  5.65s/it, acc=0.812, loss=0.541]

Got 26 / 32 with accuracy 81.25


Epoch=0/5:  59%|█████▉    | 10/17 [00:50<00:39,  5.58s/it, acc=0.781, loss=0.561]

Got 25 / 32 with accuracy 78.12


Epoch=0/5:  65%|██████▍   | 11/17 [00:55<00:32,  5.49s/it, acc=0.781, loss=0.566]

Got 25 / 32 with accuracy 78.12


Epoch=0/5:  71%|███████   | 12/17 [01:01<00:27,  5.46s/it, acc=0.719, loss=0.613]

Got 23 / 32 with accuracy 71.88


Epoch=0/5:  76%|███████▋  | 13/17 [01:06<00:22,  5.58s/it, acc=0.75, loss=0.584] 

Got 24 / 32 with accuracy 75.00


Epoch=0/5:  82%|████████▏ | 14/17 [01:12<00:16,  5.51s/it, acc=0.688, loss=0.623]

Got 22 / 32 with accuracy 68.75


Epoch=0/5:  88%|████████▊ | 15/17 [01:17<00:10,  5.48s/it, acc=0.812, loss=0.53] 

Got 26 / 32 with accuracy 81.25


Epoch=0/5:  94%|█████████▍| 16/17 [01:22<00:05,  5.40s/it, acc=0.781, loss=0.553]

Got 25 / 32 with accuracy 78.12


                                                                                 

Got 7 / 9 with accuracy 77.78
1


Epoch=1/5:   6%|▌         | 1/17 [00:05<01:22,  5.16s/it, acc=0.781, loss=0.543]

Got 25 / 32 with accuracy 78.12


Epoch=1/5:  12%|█▏        | 2/17 [00:11<01:25,  5.73s/it, acc=0.875, loss=0.454]

Got 28 / 32 with accuracy 87.50


Epoch=1/5:  18%|█▊        | 3/17 [00:16<01:18,  5.58s/it, acc=0.719, loss=0.581]

Got 23 / 32 with accuracy 71.88


Epoch=1/5:  24%|██▎       | 4/17 [00:22<01:11,  5.53s/it, acc=0.719, loss=0.609]

Got 23 / 32 with accuracy 71.88


Epoch=1/5:  29%|██▉       | 5/17 [00:27<01:06,  5.51s/it, acc=0.844, loss=0.48] 

Got 27 / 32 with accuracy 84.38


Epoch=1/5:  35%|███▌      | 6/17 [00:33<01:00,  5.53s/it, acc=0.75, loss=0.587]

Got 24 / 32 with accuracy 75.00


Epoch=1/5:  41%|████      | 7/17 [00:38<00:54,  5.48s/it, acc=0.688, loss=0.599]

Got 22 / 32 with accuracy 68.75


Epoch=1/5:  47%|████▋     | 8/17 [00:44<00:49,  5.47s/it, acc=0.844, loss=0.477]

Got 27 / 32 with accuracy 84.38


Epoch=1/5:  53%|█████▎    | 9/17 [00:49<00:43,  5.43s/it, acc=0.812, loss=0.512]

Got 26 / 32 with accuracy 81.25


Epoch=1/5:  59%|█████▉    | 10/17 [00:54<00:37,  5.42s/it, acc=0.781, loss=0.534]

Got 25 / 32 with accuracy 78.12


Epoch=1/5:  65%|██████▍   | 11/17 [00:59<00:32,  5.36s/it, acc=0.781, loss=0.547]

Got 25 / 32 with accuracy 78.12


Epoch=1/5:  71%|███████   | 12/17 [01:05<00:26,  5.33s/it, acc=0.719, loss=0.61] 

Got 23 / 32 with accuracy 71.88


Epoch=1/5:  76%|███████▋  | 13/17 [01:11<00:22,  5.55s/it, acc=0.75, loss=0.574]

Got 24 / 32 with accuracy 75.00


Epoch=1/5:  82%|████████▏ | 14/17 [01:16<00:16,  5.49s/it, acc=0.688, loss=0.614]

Got 22 / 32 with accuracy 68.75


Epoch=1/5:  88%|████████▊ | 15/17 [01:22<00:10,  5.46s/it, acc=0.812, loss=0.482]

Got 26 / 32 with accuracy 81.25


Epoch=1/5:  94%|█████████▍| 16/17 [01:27<00:05,  5.40s/it, acc=0.781, loss=0.551]

Got 25 / 32 with accuracy 78.12


                                                                                 

Got 7 / 9 with accuracy 77.78
2


Epoch=2/5:   6%|▌         | 1/17 [00:05<01:21,  5.11s/it, acc=0.781, loss=0.541]

Got 25 / 32 with accuracy 78.12


Epoch=2/5:  12%|█▏        | 2/17 [00:10<01:18,  5.22s/it, acc=0.875, loss=0.44] 

Got 28 / 32 with accuracy 87.50


Epoch=2/5:  18%|█▊        | 3/17 [00:15<01:14,  5.30s/it, acc=0.719, loss=0.593]

Got 23 / 32 with accuracy 71.88


Epoch=2/5:  24%|██▎       | 4/17 [00:21<01:10,  5.44s/it, acc=0.719, loss=0.61] 

Got 23 / 32 with accuracy 71.88


Epoch=2/5:  29%|██▉       | 5/17 [00:26<01:05,  5.46s/it, acc=0.844, loss=0.451]

Got 27 / 32 with accuracy 84.38


Epoch=2/5:  35%|███▌      | 6/17 [00:32<01:00,  5.54s/it, acc=0.75, loss=0.578] 

Got 24 / 32 with accuracy 75.00


Epoch=2/5:  41%|████      | 7/17 [00:38<00:55,  5.54s/it, acc=0.688, loss=0.623]

Got 22 / 32 with accuracy 68.75


Epoch=2/5:  47%|████▋     | 8/17 [00:44<00:52,  5.80s/it, acc=0.844, loss=0.446]

Got 27 / 32 with accuracy 84.38


Epoch=2/5:  53%|█████▎    | 9/17 [00:50<00:46,  5.76s/it, acc=0.812, loss=0.49] 

Got 26 / 32 with accuracy 81.25


Epoch=2/5:  59%|█████▉    | 10/17 [00:55<00:39,  5.68s/it, acc=0.781, loss=0.529]

Got 25 / 32 with accuracy 78.12


Epoch=2/5:  65%|██████▍   | 11/17 [01:01<00:34,  5.68s/it, acc=0.781, loss=0.544]

Got 25 / 32 with accuracy 78.12


Epoch=2/5:  71%|███████   | 12/17 [01:06<00:28,  5.62s/it, acc=0.719, loss=0.619]

Got 23 / 32 with accuracy 71.88


Epoch=2/5:  76%|███████▋  | 13/17 [01:12<00:22,  5.66s/it, acc=0.75, loss=0.567] 

Got 24 / 32 with accuracy 75.00


Epoch=2/5:  82%|████████▏ | 14/17 [01:18<00:16,  5.65s/it, acc=0.688, loss=0.639]

Got 22 / 32 with accuracy 68.75


Epoch=2/5:  88%|████████▊ | 15/17 [01:24<00:11,  5.68s/it, acc=0.812, loss=0.48] 

Got 26 / 32 with accuracy 81.25


Epoch=2/5:  94%|█████████▍| 16/17 [01:29<00:05,  5.64s/it, acc=0.781, loss=0.544]

Got 25 / 32 with accuracy 78.12


                                                                                 

Got 7 / 9 with accuracy 77.78
3


Epoch=3/5:   6%|▌         | 1/17 [00:05<01:23,  5.22s/it, acc=0.781, loss=0.525]

Got 25 / 32 with accuracy 78.12


Epoch=3/5:  12%|█▏        | 2/17 [00:10<01:18,  5.20s/it, acc=0.875, loss=0.431]

Got 28 / 32 with accuracy 87.50


Epoch=3/5:  18%|█▊        | 3/17 [00:16<01:18,  5.62s/it, acc=0.719, loss=0.595]

Got 23 / 32 with accuracy 71.88


Epoch=3/5:  24%|██▎       | 4/17 [00:22<01:13,  5.63s/it, acc=0.719, loss=0.619]

Got 23 / 32 with accuracy 71.88


Epoch=3/5:  29%|██▉       | 5/17 [00:27<01:07,  5.66s/it, acc=0.844, loss=0.442]

Got 27 / 32 with accuracy 84.38


Epoch=3/5:  35%|███▌      | 6/17 [00:33<01:02,  5.64s/it, acc=0.75, loss=0.583] 

Got 24 / 32 with accuracy 75.00


Epoch=3/5:  41%|████      | 7/17 [00:39<00:56,  5.65s/it, acc=0.688, loss=0.619]

Got 22 / 32 with accuracy 68.75


Epoch=3/5:  47%|████▋     | 8/17 [00:44<00:49,  5.55s/it, acc=0.844, loss=0.44] 

Got 27 / 32 with accuracy 84.38


Epoch=3/5:  53%|█████▎    | 9/17 [00:49<00:44,  5.51s/it, acc=0.812, loss=0.493]

Got 26 / 32 with accuracy 81.25


Epoch=3/5:  59%|█████▉    | 10/17 [00:55<00:37,  5.42s/it, acc=0.781, loss=0.523]

Got 25 / 32 with accuracy 78.12


Epoch=3/5:  65%|██████▍   | 11/17 [01:00<00:32,  5.40s/it, acc=0.781, loss=0.543]

Got 25 / 32 with accuracy 78.12


Epoch=3/5:  71%|███████   | 12/17 [01:05<00:26,  5.35s/it, acc=0.719, loss=0.627]

Got 23 / 32 with accuracy 71.88


Epoch=3/5:  76%|███████▋  | 13/17 [01:11<00:21,  5.44s/it, acc=0.75, loss=0.572] 

Got 24 / 32 with accuracy 75.00


Epoch=3/5:  82%|████████▏ | 14/17 [01:17<00:16,  5.53s/it, acc=0.688, loss=0.632]

Got 22 / 32 with accuracy 68.75


Epoch=3/5:  88%|████████▊ | 15/17 [01:22<00:10,  5.47s/it, acc=0.812, loss=0.475]

Got 26 / 32 with accuracy 81.25


Epoch=3/5:  94%|█████████▍| 16/17 [01:27<00:05,  5.41s/it, acc=0.781, loss=0.532]

Got 25 / 32 with accuracy 78.12


                                                                                 

Got 7 / 9 with accuracy 77.78
4


Epoch=4/5:   6%|▌         | 1/17 [00:05<01:20,  5.05s/it, acc=0.781, loss=0.536]

Got 25 / 32 with accuracy 78.12


Epoch=4/5:  12%|█▏        | 2/17 [00:10<01:16,  5.11s/it, acc=0.875, loss=0.426]

Got 28 / 32 with accuracy 87.50


Epoch=4/5:  18%|█▊        | 3/17 [00:15<01:12,  5.18s/it, acc=0.719, loss=0.597]

Got 23 / 32 with accuracy 71.88


Epoch=4/5:  24%|██▎       | 4/17 [00:20<01:08,  5.24s/it, acc=0.719, loss=0.616]

Got 23 / 32 with accuracy 71.88


Epoch=4/5:  29%|██▉       | 5/17 [00:26<01:03,  5.25s/it, acc=0.844, loss=0.448]

Got 27 / 32 with accuracy 84.38


Epoch=4/5:  35%|███▌      | 6/17 [00:31<00:57,  5.24s/it, acc=0.75, loss=0.585] 

Got 24 / 32 with accuracy 75.00


Epoch=4/5:  41%|████      | 7/17 [00:36<00:52,  5.26s/it, acc=0.688, loss=0.607]

Got 22 / 32 with accuracy 68.75


Epoch=4/5:  47%|████▋     | 8/17 [00:41<00:47,  5.29s/it, acc=0.844, loss=0.456]

Got 27 / 32 with accuracy 84.38


Epoch=4/5:  53%|█████▎    | 9/17 [00:47<00:44,  5.53s/it, acc=0.812, loss=0.489]

Got 26 / 32 with accuracy 81.25


Epoch=4/5:  59%|█████▉    | 10/17 [00:53<00:38,  5.48s/it, acc=0.781, loss=0.527]

Got 25 / 32 with accuracy 78.12


Epoch=4/5:  65%|██████▍   | 11/17 [00:58<00:32,  5.45s/it, acc=0.781, loss=0.536]

Got 25 / 32 with accuracy 78.12


Epoch=4/5:  71%|███████   | 12/17 [01:04<00:27,  5.48s/it, acc=0.719, loss=0.61] 

Got 23 / 32 with accuracy 71.88


Epoch=4/5:  76%|███████▋  | 13/17 [01:10<00:22,  5.55s/it, acc=0.75, loss=0.565]

Got 24 / 32 with accuracy 75.00


Epoch=4/5:  82%|████████▏ | 14/17 [01:15<00:16,  5.57s/it, acc=0.688, loss=0.635]

Got 22 / 32 with accuracy 68.75


Epoch=4/5:  88%|████████▊ | 15/17 [01:21<00:11,  5.57s/it, acc=0.812, loss=0.488]

Got 26 / 32 with accuracy 81.25


Epoch=4/5:  94%|█████████▍| 16/17 [01:27<00:05,  5.66s/it, acc=0.781, loss=0.525]

Got 25 / 32 with accuracy 78.12


                                                                                 

Got 7 / 9 with accuracy 77.78




In [43]:

#https://discuss.huggingface.co/t/how-to-test-my-text-classification-model-after-training-it/6689  IF IT CAN HELP..
model.eval()
#torch.no_grad()

sentence = 'ah yes, of the course the summer rain pours when im on foot.' 
# encode sentence (i.e. create input_ids, attention_mask)
encoding = tokenizer(sentence)
# make sure the keys of the "encoding" dict are on the same device as the model
#encoding = {k: v.to(args.device) for k, v in encoding.items()} #WHAT WAS THE PURPOSE OF THAT ?
# forward pass through the model
with torch.no_grad():
    print(encoding)
    print('hello')
    print(**encoding)
    #outputs = model(encoding)

    outputs = model(**encoding)


logits = outputs.logits 
print("Predicted class index:", logits.argmax(-1))




{'input_ids': [101, 2017, 2298, 2204, 2651, 1010, 2054, 2079, 2017, 2228, 2055, 2008, 1029, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
hello


TypeError: 'input_ids' is an invalid keyword argument for print()