- Load the dataset

In [1]:
# Load the data set
from datasets import load_dataset
dataset = load_dataset("imdb")

  from pandas.core.computation.check import NUMEXPR_INSTALLED
Reusing dataset imdb (/home/lli23/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1)


  0%|          | 0/3 [00:00<?, ?it/s]

In [2]:
'''
# Extract text and label
train = dataset['train']
train_texts, train_labels = train['text'],train['label']
print('The size of training set is %d' % len(train))
'''

The size of training set is 25000


In [31]:
#type(train['text'])

- We now have a train and test dataset, but let’s also also create a validation set which we can use for for evaluation and tuning without training our test set results. Sklearn has a convenient utility for creating such splits:

In [4]:
'''
from sklearn.model_selection import train_test_split
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=.2)
print('The size of training/validation set is %d/%d' % (len(train_texts),len(val_texts)))
'''

The size of training/validation set is 20000/5000


- Tokenize
 - Now we can simply pass our texts to the tokenizer. We’ll pass truncation=True and padding=True, which will ensure that all of our sequences are padded to the same length and are truncated to be no longer model’s maximum input length. This will allow us to feed batches of sequences into the model at the same time.



In [5]:
'''
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
train_encodings = tokenizer(train_texts, truncation=True, padding="max_length",max_length=50)
val_encodings = tokenizer(val_texts, truncation=True, padding="max_length",max_length=50)
# train_encodings has 'input_ids','attention_mask'
#{'input_ids': [[101, 1045, 2572, 1037, 2879, 102], [101, 1045, 2572, 1037, 2611, 102]], 
# 'attention_mask': [[1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1]]}
'''

In [40]:
from transformers import DistilBertTokenizerFast
def my_tokenize(pre_train,train_texts,val_texts,max_len):
    if pre_train == 'distilbert':
        tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
    train_encodings = tokenizer(train_texts, truncation=True, padding=True,max_length=max_len)
    val_encodings = tokenizer(val_texts, truncation=True, padding=True,max_length=max_len)
    return train_encodings,val_encodings

def my_tokenizeCV(pre_train,train_texts,max_len):
    if pre_train == 'distilbert':
        tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
    train_encodings = tokenizer(train_texts, truncation=True, padding=True,max_length=max_len)
    return train_encodings

In [4]:
#len(train_encodings['input_ids']),len(train_encodings['attention_mask'])

- Now, let’s turn our labels and encodings into a Dataset object. 
  - In PyTorch, this is done by subclassing a torch.utils.data.Dataset object and implementing \__len__ and \__getitem__. 
  - We put the data in this format so that the data can be easily batched such that each key in the batch encoding corresponds to a named parameter of the forward() method of the model we will train.


In [6]:

from torch.utils.data import Dataset
import torch
# Transfer the dataset into torch dataset
class my_dataset(Dataset):
    def __init__(self,encoding,labels):
        self.encoding = encoding
        self.labels = labels
        self.length = len(labels)
    
    def __getitem__(self,idx):
        item = {key:torch.tensor(val[idx]) for key,val in self.encoding.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
        
    def __len__(self):
        return self.length
    
#train_dataset = my_dataset(train_encodings, train_labels)
#val_dataset = my_dataset(val_encodings, val_labels)


In [7]:
'''
from torch.utils.data import Dataset
from transformers import DistilBertTokenizerFast
import torch
# Built a class to cover all above
class My_Data(Dataset):
    '''
    Create the torch dataset from our original dataset
    '''
    def __init__(self,comments,labels,pre_train='distilbert',max_length=50):
        '''
        comments: list of texts, lables: list of label, pre_train: tokenize model type
        '''
        self.comments = comments
        self.labels = labels
        self.pre_train = pre_train
        self.max_length = max_length
        self.tokenizer = None
        if pre_train == 'distilbert':
            self.tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
        self.size = len(labels)
        # self.preprocessor = TextPreprocessor(model_type=pre_trained)
    def __len__(self):
        """
        Return the length of the dataset
        """
        return self.size
    def __getitem__(self,idx):
        '''
        Return a instance in dictionary
        '''
        # text = self.preprocessor.preprocess(self.comments)
        train_encodings = self.tokenizer(self.comments,truncation = True, padding = True,max_length = self.max_length)
        item = {key:torch.tensor(val[idx]) for key,val in train_encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    
# train_dataset_ = My_Data(train_texts, train_labels,'distilbert',max_length=50) 
# val_dataset_ = My_Data(val_texts, val_labels,'distilbert',max_length=50)
'''

In [8]:
#len(train_dataset_),len(val_dataset_)

- Fine-tuning with native PyTorch
 - model
 - GPU
 - DataLoader
 - Training loop

In [145]:
'''
from torch.utils.data import DataLoader
from transformers import DistilBertForSequenceClassification, AdamW

# GPU statement
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# import the training model
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
model.to(device)
model.train()
# trining set to batch
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
optim = AdamW(model.parameters(), lr=5e-5)

# Training loop
for epoch in range(3):
    for batch in train_loader:
        optim.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids,attention_mask = attention_mask, labels = labels)
        loss = outputs[0]
        loss.backward()
        optim.step()
model.eval()
'''       

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /home/lli23/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.18.0",
  "vocab_size": 30522
}

loading weights file https://huggingface.co/distilbert-base-uncased/resolve/main/pytorch_model.bin from cache at /home/lli23/.cache/huggingface/transformers/9c169103d7e5a73936dd2b627e42851bec0831212b677c6

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [114]:
from torch.utils.data import DataLoader
from transformers import DistilBertForSequenceClassification, AdamW
from tqdm import tqdm
def training_network(dataloader,pre_train,device):
    if pre_train == 'distilbert':
        model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
    model.to(device)
    model.train()
    optimizer = AdamW(model.parameters(), lr=5e-5)
    # Training loop
    for epoch in range(3):
        print('epoch%d:'% epoch)
        for batch in tqdm(dataloader):
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids,attention_mask = attention_mask, labels = labels)
            loss = outputs[0]
            loss.backward()
            optimizer.step()
    #model.eval()
    return model



In [150]:
from sklearn.metrics import classification_report
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import roc_auc_score
def test_network(dataloader,model,device):
    """
    """
    acc = 0.0
    preds_ = []
    preds_proba_ = []
    truths_ = []
    model.eval()
    with torch.no_grad():
        for t_idx,t_batch in enumerate(dataloader):
            feats = t_batch['input_ids']
            labels = t_batch['labels']
            
            inputs = feats.to(device)
            labels = labels.to(device)
            outs = model(inputs)
            
            preds = F.softmax(outs.logits,dim=1)
            preds_labels = torch.argmax(preds,dim=1)
            
            preds_proba_.append(preds[:,1].detach().cpu().numpy())
            preds_.append(preds_labels.detach().cpu().numpy())
            truths_.append(labels.data.cpu().numpy())


    preds_ = np.concatenate(preds_)
    preds_proba_ = np.concatenate(preds_proba_)
    truths_ = np.concatenate(truths_)
    
    auc_score = roc_auc_score(truths_,preds_proba_)
    print(classification_report(truths_,preds_,zero_division=1,digits=4))
    print(f"ROC AUC : {auc_score : .4}")
    return preds_,preds_proba_,truths_,auc_score
    


## Combine all:
- 1. Prepare dataset
- 2. Load pretrained Tokenizer, call it with dataset -> encoding
- 3. Build PyTorch Dataset with encodings
- 4. Load pretrained Model
- 5. Native PyTorch training loop

In [139]:
# Define a function to combine all
def fine_turn_bert(texts,labels,pre_train,device):
    '''
    texts: list of reviews, labels: list of label,pre_train: model name
    '''
    # Prepare dataset
    train_texts, train_labels = texts,labels
    print('The size of training set is %d' % len(texts))
    train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=.2)
    print('The size of training/validation set is %d/%d' % (len(train_texts),len(val_texts)))

    # Tokenize
    train_encodings,val_encodings = my_tokenize(pre_train,train_texts,val_texts,max_len=50)
    # Create PyTorch Dataset object using word encoding
    train_dataset = my_dataset(train_encodings, train_labels)
    val_dataset = my_dataset(val_encodings, val_labels)

    # DataLoader: training/validation set to batch
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)
    print('Training...')
    model = training_network(train_loader,pre_train,device)
    print('Validation...')
    preds_,preds_proba_,auc_score = test_network(val_loader,model,device)
    return model

In [140]:
# Original test
train = dataset['train']
texts,labels = train['text'],train['label']
pre_train = 'distilbert'
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = fine_turn_bert(texts,labels,pre_train,device)

The size of training set is 25000
The size of training/validation set is 20000/5000
Training...


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.w

epoch0:


100%|██████████| 1250/1250 [00:27<00:00, 46.19it/s]


epoch1:


100%|██████████| 1250/1250 [00:27<00:00, 45.52it/s]


epoch2:


100%|██████████| 1250/1250 [00:27<00:00, 45.18it/s]


Validation...
              precision    recall  f1-score   support

           0     0.8183    0.7705    0.7937      2536
           1     0.7772    0.8239    0.7998      2464

    accuracy                         0.7968      5000
   macro avg     0.7977    0.7972    0.7968      5000
weighted avg     0.7980    0.7968    0.7967      5000

ROC AUC :  0.8849


In [152]:
from sklearn.model_selection import KFold
import random
# Shuffle two lists with same order
def shuffle_2list(test_list1,test_list2):
    # Using zip() + * operator + shuffle()
    temp = list(zip(test_list1, test_list2))
    random.shuffle(temp)
    res1, res2 = zip(*temp)
    # res1 and res2 come out as tuples, and so must be converted to lists.
    res1, res2 = list(res1), list(res2)
    return res1, res2


In [153]:
# Apply Cross Validation
def fine_turn_bert_CV5(texts,labels,pre_train,device):
    '''
    texts: list of reviews, labels: list of label,pre_train: model name
    '''
    # Prepare dataset
    train_texts, train_labels = texts,labels
    print('The size of training set is %d' % len(texts))
    # Tokenize
    train_encodings = my_tokenizeCV(pre_train,train_texts,max_len=50)
    
    kf = KFold(n_splits=5)
    preds_text = []
    probas_text = []
    truths = []
    aucs_text = []
    print('cross validation...')
    i = 1
    for traini, testi in kf.split(train_texts):
        print('CV iter %d:' % i)
        # Split training/validation set
        tra_encod = {'input_ids':train_encodings['input_ids'][traini[0]:traini[-1]+1],
                     'attention_mask':train_encodings['attention_mask'][traini[0]:traini[-1]+1]}
        val_encod = {'input_ids':train_encodings['input_ids'][testi[0]:testi[-1]+1],
                     'attention_mask':train_encodings['attention_mask'][testi[0]:testi[-1]+1]}
        train_dataset = my_dataset(tra_encod, train_labels[traini[0]:traini[-1]+1])
        val_dataset = my_dataset(val_encod, train_labels[testi[0]:testi[-1]+1])
        # DataLoader: training/validation set to batch
        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)
        # Train the model
        model = training_network(train_loader,pre_train,device)
        # Validate
        preds_,preds_proba_,truths_,auc_score = test_network(val_loader,model,device)
        preds_text.append(preds_)
        probas_text.append(preds_proba_)
        truths.append(truths_)
        aucs_text.append(auc_score)
        i = i + 1
    preds_text = np.concatenate(preds_text)
    truths = np.concatenate(truths)
    print('CV5 result:')
    print(classification_report(preds_text, truths))
    return {'CV5_auc_mean': np.mean(aucs_text), 
            'CV5_auc_std': np.std(aucs_text)}
        
    

In [154]:
# CV5 test
train = dataset['train']
texts,labels = train['text'],train['label']
texts,labels = shuffle_2list(texts,labels)
pre_train = 'distilbert'
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
result = fine_turn_bert_CV5(texts,labels,pre_train,device)

The size of training set is 25000
cross validation...
CV iter 1:


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.w

epoch0:


100%|██████████| 1250/1250 [00:27<00:00, 46.27it/s]


epoch1:


100%|██████████| 1250/1250 [00:27<00:00, 45.57it/s]


epoch2:


100%|██████████| 1250/1250 [00:27<00:00, 45.30it/s]


              precision    recall  f1-score   support

           0     0.8111    0.7740    0.7921      2513
           1     0.7817    0.8179    0.7994      2487

    accuracy                         0.7958      5000
   macro avg     0.7964    0.7959    0.7957      5000
weighted avg     0.7965    0.7958    0.7957      5000

ROC AUC :  0.8827
CV iter 2:


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.w

epoch0:


100%|██████████| 1563/1563 [00:34<00:00, 45.24it/s]


epoch1:


100%|██████████| 1563/1563 [00:34<00:00, 45.08it/s]


epoch2:


100%|██████████| 1563/1563 [00:34<00:00, 45.09it/s]


              precision    recall  f1-score   support

           0     0.9819    0.9960    0.9889      2508
           1     0.9959    0.9815    0.9887      2492

    accuracy                         0.9888      5000
   macro avg     0.9889    0.9888    0.9888      5000
weighted avg     0.9889    0.9888    0.9888      5000

ROC AUC :  0.9994
CV iter 3:


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.w

epoch0:


100%|██████████| 1563/1563 [00:34<00:00, 45.15it/s]


epoch1:


100%|██████████| 1563/1563 [00:34<00:00, 45.14it/s]


epoch2:


100%|██████████| 1563/1563 [00:34<00:00, 45.08it/s]


              precision    recall  f1-score   support

           0     0.9932    0.9873    0.9902      2519
           1     0.9872    0.9931    0.9902      2481

    accuracy                         0.9902      5000
   macro avg     0.9902    0.9902    0.9902      5000
weighted avg     0.9902    0.9902    0.9902      5000

ROC AUC :  0.999
CV iter 4:


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.w

epoch0:


100%|██████████| 1563/1563 [00:34<00:00, 45.13it/s]


epoch1:


100%|██████████| 1563/1563 [00:34<00:00, 45.13it/s]


epoch2:


100%|██████████| 1563/1563 [00:34<00:00, 45.11it/s]


              precision    recall  f1-score   support

           0     0.9858    0.9905    0.9881      2517
           1     0.9903    0.9855    0.9879      2483

    accuracy                         0.9880      5000
   macro avg     0.9880    0.9880    0.9880      5000
weighted avg     0.9880    0.9880    0.9880      5000

ROC AUC :  0.9991
CV iter 5:


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.w

epoch0:


100%|██████████| 1250/1250 [00:27<00:00, 45.16it/s]


epoch1:


100%|██████████| 1250/1250 [00:27<00:00, 45.12it/s]


epoch2:


100%|██████████| 1250/1250 [00:27<00:00, 45.08it/s]


              precision    recall  f1-score   support

           0     0.7887    0.8207    0.8044      2443
           1     0.8218    0.7900    0.8056      2557

    accuracy                         0.8050      5000
   macro avg     0.8053    0.8054    0.8050      5000
weighted avg     0.8057    0.8050    0.8050      5000

ROC AUC :  0.892
CV5 result:
              precision    recall  f1-score   support

           0       0.91      0.91      0.91     12517
           1       0.91      0.91      0.91     12483

    accuracy                           0.91     25000
   macro avg       0.91      0.91      0.91     25000
weighted avg       0.91      0.91      0.91     25000



In [155]:
result

{'CV5_auc_mean': 0.954427351055178, 'CV5_auc_std': 0.054851539216691225}

In [28]:
'''
# train_texts, val_texts, train_labels, val_labels are defined
# Tokenize
pre_train = 'distilbert'
train_encodings,val_encodings = my_tokenize(pre_train,train_texts,val_texts,max_len=50)
# Create dataset object using word encoding
train_dataset = my_dataset(train_encodings, train_labels)
val_dataset = my_dataset(val_encodings, val_labels)

# DataLoader: training/validation set to batch
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)


# GPU statement
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('Training...')
model = training_network(train_loader,pre_train,device)
print('Validation...')
test_network(val_loader,model,device)
'''

"\n# train_texts, val_texts, train_labels, val_labels are defined\n# Tokenize\npre_train = 'distilbert'\ntrain_encodings,val_encodings = my_tokenize(pre_train,train_texts,val_texts,max_len=50)\n# Create dataset object using word encoding\ntrain_dataset = my_dataset(train_encodings, train_labels)\nval_dataset = my_dataset(val_encodings, val_labels)\n\n# DataLoader: training/validation set to batch\ntrain_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)\nval_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)\n\n\n# GPU statement\ndevice = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\nprint('Training...')\nmodel = training_network(train_loader,pre_train,device)\nprint('Validation...')\ntest_network(val_loader,model,device)\n"