In [3]:
from pathlib import Path

def read_imdb_split(split_dir):
    split_dir = Path(split_dir)
    texts = []
    labels = []
    for label_dir in ["pos", "neg"]:
        for text_file in (split_dir/label_dir).iterdir():
            texts.append(text_file.read_text(encoding="utf-8"))
            labels.append(0 if label_dir is "neg" else 1)

    return texts, labels

train_texts, train_labels = read_imdb_split('aclImdb/train')
test_texts, test_labels = read_imdb_split('aclImdb/test')

In [4]:
from sklearn.model_selection import train_test_split
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=.2)

In [5]:
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

In [6]:
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

In [7]:
import torch

class IMDbDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = IMDbDataset(train_encodings, train_labels)
val_dataset = IMDbDataset(val_encodings, val_labels)
test_dataset = IMDbDataset(test_encodings, test_labels)

In [8]:
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # total number of training epochs
    per_device_train_batch_size=16,  # batch size per device during training
    per_device_eval_batch_size=64,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
)

model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")

trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset             # evaluation dataset
)

trainer.train()

I0904 05:05:40.411082  6692 filelock.py:274] Lock 1908298639816 acquired on C:\Users\User/.cache\torch\transformers\a41e817d5c0743e29e86ff85edc8c257e61bc8d88e4271bb1b243b6e7614c633.8949e27aafafa845a18d98a0e3a88bc2d248bbc32a1b75947366664658f23b1c.lock


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=442.0, style=ProgressStyle(description_…

I0904 05:05:41.280439  6692 filelock.py:318] Lock 1908298639816 released on C:\Users\User/.cache\torch\transformers\a41e817d5c0743e29e86ff85edc8c257e61bc8d88e4271bb1b243b6e7614c633.8949e27aafafa845a18d98a0e3a88bc2d248bbc32a1b75947366664658f23b1c.lock





I0904 05:05:41.824819  6692 filelock.py:274] Lock 1908299549000 acquired on C:\Users\User/.cache\torch\transformers\ae9df7a8d658c4f3e1917a471a8a21cf678fa1d4cb91e7702dfe0598dbdcf354.c2015533705b9dff680ae707e205a35e2860e8d148b45d35085419d74fe57ac5.lock


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=267967963.0, style=ProgressStyle(descri…

I0904 05:05:48.716256  6692 filelock.py:318] Lock 1908299549000 released on C:\Users\User/.cache\torch\transformers\ae9df7a8d658c4f3e1917a471a8a21cf678fa1d4cb91e7702dfe0598dbdcf354.c2015533705b9dff680ae707e205a35e2860e8d148b45d35085419d74fe57ac5.lock





Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=1250.0, style=ProgressStyle(description_w…

{'loss': 0.6864083290100098, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.008, 'step': 10}
{'loss': 0.694526195526123, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.016, 'step': 20}
{'loss': 0.6883115768432617, 'learning_rate': 3e-06, 'epoch': 0.024, 'step': 30}
{'loss': 0.6869350433349609, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.032, 'step': 40}
{'loss': 0.6809392929077148, 'learning_rate': 5e-06, 'epoch': 0.04, 'step': 50}
{'loss': 0.6674755096435547, 'learning_rate': 6e-06, 'epoch': 0.048, 'step': 60}
{'loss': 0.6616851806640625, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.056, 'step': 70}
{'loss': 0.6221721649169922, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.064, 'step': 80}
{'loss': 0.5116352081298828, 'learning_rate': 9e-06, 'epoch': 0.072, 'step': 90}
{'loss': 0.45932960510253906, 'learning_rate': 1e-05, 'epoch': 0.08, 'step': 100}
{'loss': 0.3264110565185547, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.088, 'step': 110}
{'loss': 0



{'loss': 0.2766021728515625, 'learning_rate': 4.984615384615385e-05, 'epoch': 0.408, 'step': 510}
{'loss': 0.426080322265625, 'learning_rate': 4.969230769230769e-05, 'epoch': 0.416, 'step': 520}
{'loss': 0.415313720703125, 'learning_rate': 4.953846153846154e-05, 'epoch': 0.424, 'step': 530}
{'loss': 0.337548828125, 'learning_rate': 4.9384615384615384e-05, 'epoch': 0.432, 'step': 540}
{'loss': 0.2964202880859375, 'learning_rate': 4.923076923076924e-05, 'epoch': 0.44, 'step': 550}
{'loss': 0.347186279296875, 'learning_rate': 4.907692307692308e-05, 'epoch': 0.448, 'step': 560}
{'loss': 0.259088134765625, 'learning_rate': 4.892307692307693e-05, 'epoch': 0.456, 'step': 570}
{'loss': 0.27161712646484376, 'learning_rate': 4.876923076923077e-05, 'epoch': 0.464, 'step': 580}
{'loss': 0.38050384521484376, 'learning_rate': 4.861538461538462e-05, 'epoch': 0.472, 'step': 590}
{'loss': 0.30739288330078124, 'learning_rate': 4.846153846153846e-05, 'epoch': 0.48, 'step': 600}
{'loss': 0.293260192871093

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=1250.0, style=ProgressStyle(description_w…

{'loss': 0.2479034423828125, 'learning_rate': 3.830769230769231e-05, 'epoch': 1.008, 'step': 1260}
{'loss': 0.1425445556640625, 'learning_rate': 3.8153846153846153e-05, 'epoch': 1.016, 'step': 1270}
{'loss': 0.09515380859375, 'learning_rate': 3.8e-05, 'epoch': 1.024, 'step': 1280}
{'loss': 0.2122589111328125, 'learning_rate': 3.784615384615385e-05, 'epoch': 1.032, 'step': 1290}
{'loss': 0.1574859619140625, 'learning_rate': 3.769230769230769e-05, 'epoch': 1.04, 'step': 1300}
{'loss': 0.1630462646484375, 'learning_rate': 3.753846153846154e-05, 'epoch': 1.048, 'step': 1310}
{'loss': 0.06728515625, 'learning_rate': 3.738461538461538e-05, 'epoch': 1.056, 'step': 1320}
{'loss': 0.155352783203125, 'learning_rate': 3.723076923076923e-05, 'epoch': 1.064, 'step': 1330}
{'loss': 0.241119384765625, 'learning_rate': 3.707692307692308e-05, 'epoch': 1.072, 'step': 1340}
{'loss': 0.1820404052734375, 'learning_rate': 3.692307692307693e-05, 'epoch': 1.08, 'step': 1350}
{'loss': 0.1498992919921875, 'lear

{'loss': 0.290887451171875, 'learning_rate': 2.5538461538461538e-05, 'epoch': 1.6720000000000002, 'step': 2090}
{'loss': 0.11649169921875, 'learning_rate': 2.5384615384615383e-05, 'epoch': 1.6800000000000002, 'step': 2100}
{'loss': 0.198101806640625, 'learning_rate': 2.523076923076923e-05, 'epoch': 1.688, 'step': 2110}
{'loss': 0.20836181640625, 'learning_rate': 2.5076923076923077e-05, 'epoch': 1.696, 'step': 2120}
{'loss': 0.06085205078125, 'learning_rate': 2.4923076923076926e-05, 'epoch': 1.704, 'step': 2130}
{'loss': 0.1255615234375, 'learning_rate': 2.476923076923077e-05, 'epoch': 1.712, 'step': 2140}
{'loss': 0.16280517578125, 'learning_rate': 2.461538461538462e-05, 'epoch': 1.72, 'step': 2150}
{'loss': 0.1289306640625, 'learning_rate': 2.4461538461538465e-05, 'epoch': 1.728, 'step': 2160}
{'loss': 0.13631591796875, 'learning_rate': 2.430769230769231e-05, 'epoch': 1.736, 'step': 2170}
{'loss': 0.206427001953125, 'learning_rate': 2.4153846153846155e-05, 'epoch': 1.744, 'step': 2180

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=1250.0, style=ProgressStyle(description_w…

{'loss': 0.03902587890625, 'learning_rate': 1.9076923076923077e-05, 'epoch': 2.008, 'step': 2510}
{'loss': 0.057373046875, 'learning_rate': 1.8923076923076925e-05, 'epoch': 2.016, 'step': 2520}
{'loss': 0.123223876953125, 'learning_rate': 1.876923076923077e-05, 'epoch': 2.024, 'step': 2530}
{'loss': 0.041717529296875, 'learning_rate': 1.8615384615384616e-05, 'epoch': 2.032, 'step': 2540}
{'loss': 0.1138916015625, 'learning_rate': 1.8461538461538465e-05, 'epoch': 2.04, 'step': 2550}
{'loss': 0.02467041015625, 'learning_rate': 1.830769230769231e-05, 'epoch': 2.048, 'step': 2560}
{'loss': 0.01309814453125, 'learning_rate': 1.8153846153846155e-05, 'epoch': 2.056, 'step': 2570}
{'loss': 0.024267578125, 'learning_rate': 1.8e-05, 'epoch': 2.064, 'step': 2580}
{'loss': 0.039697265625, 'learning_rate': 1.7846153846153846e-05, 'epoch': 2.072, 'step': 2590}
{'loss': 0.01412353515625, 'learning_rate': 1.7692307692307694e-05, 'epoch': 2.08, 'step': 2600}
{'loss': 0.099114990234375, 'learning_rate':

{'loss': 0.08291015625, 'learning_rate': 6.153846153846155e-06, 'epoch': 2.68, 'step': 3350}
{'loss': 0.01361083984375, 'learning_rate': 6e-06, 'epoch': 2.6879999999999997, 'step': 3360}
{'loss': 0.0572265625, 'learning_rate': 5.846153846153846e-06, 'epoch': 2.6959999999999997, 'step': 3370}
{'loss': 0.094732666015625, 'learning_rate': 5.692307692307692e-06, 'epoch': 2.7039999999999997, 'step': 3380}
{'loss': 0.050836181640625, 'learning_rate': 5.5384615384615385e-06, 'epoch': 2.7119999999999997, 'step': 3390}
{'loss': 0.129412841796875, 'learning_rate': 5.3846153846153855e-06, 'epoch': 2.7199999999999998, 'step': 3400}
{'loss': 0.032586669921875, 'learning_rate': 5.230769230769231e-06, 'epoch': 2.7279999999999998, 'step': 3410}
{'loss': 0.074041748046875, 'learning_rate': 5.076923076923077e-06, 'epoch': 2.7359999999999998, 'step': 3420}
{'loss': 0.041302490234375, 'learning_rate': 4.923076923076923e-06, 'epoch': 2.7439999999999998, 'step': 3430}
{'loss': 0.06114501953125, 'learning_ra

TrainOutput(global_step=3750, training_loss=0.183282177734375)

In [122]:
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")
model.eval()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [136]:
from torch.utils.data import DataLoader
from transformers import DistilBertForSequenceClassification, AdamW

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
model.to(device)
model.train()

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

optim = AdamW(model.parameters(), lr=5e-5)

for epoch in range(3):
    for batch in train_loader:
        optim.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs[0]
        loss.backward()
        optim.step()

model.eval()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [10]:
torch.cuda.is_available()

True

In [147]:
model.save_pretrained("bert model")
tokenizer.save_pretrained("bert model")

('bert model\\vocab.txt',
 'bert model\\special_tokens_map.json',
 'bert model\\added_tokens.json')

In [150]:
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("bert model")
model = DistilBertForSequenceClassification.from_pretrained("bert model")
model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [143]:
def get_predictions(model, dataloader, compute_acc=False):
    predictions = None
    correct = 0
    total = 0
      
    with torch.no_grad():
        # 遍巡整個資料集
        for data in dataloader:
            # 將所有 tensors 移到 GPU 上
            #if next(model.parameters()).is_cuda:
            #    data = [t.to(device) for t in data if t is not None]
            
            
            # 別忘記前 3 個 tensors 分別為 tokens, segments 以及 masks
            # 且強烈建議在將這些 tensors 丟入 `model` 時指定對應的參數名稱
            #tokens_tensors, segments_tensors, masks_tensors = data[:3]
            #outputs = model(input_ids=tokens_tensors, 
            #                token_type_ids=segments_tensors, 
            #                attention_mask=masks_tensors)
            optim.zero_grad()
            input_ids = data['input_ids'].to(device)
            attention_mask = data['attention_mask'].to(device)
            #labels = data['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            
            logits = outputs[0]
            _, pred = torch.max(logits.data, 1)
            
            # 用來計算訓練集的分類準確率
            if compute_acc:
                labels = data[3]
                total += labels.size(0)
                correct += (pred == labels).sum().item()
                
            # 將當前 batch 記錄下來
            if predictions is None:
                predictions = pred
            else:
                predictions = torch.cat((predictions, pred))
    
    if compute_acc:
        acc = correct / total
        return predictions, acc
    return predictions

testloader = DataLoader(test_dataset, batch_size=16)
predictions = get_predictions(model, testloader)
predictions

tensor([1, 1, 1,  ..., 0, 0, 0], device='cuda:0')

In [139]:
model.eval()

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [141]:
text = 'this is good.'
encoded = tokenizer(text, truncation=True, padding=True)
encodedd = DataLoader(encoded, batch_size=1)
for batch in encodedd:
    batch[index_ids]

KeyError: 'Indexing with integers (to access backend Encoding for a given batch index) is not available when using Python based tokenizers'

In [166]:
from torch.nn.functional import softmax
#text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = 'not so bad.'
#text = 'I viewed the movie together with a homophobic friend, my wife and her female friend. So I had views from all kinds of directions. Mainly, the film made me laugh, the sexual tension was not really there and the only noticeable actors were Tudor Chirila and Maria Popistasu. Yes, I do think she played her role well, even if the script was not appropriate. There were good Romanian actors around, they just didn\'t have complex roles. I applaud Puya\'s entering the movie business. I don\'t know why, but I think he\'s a good guy, I just hope he\'ll be a good actor.<br /><br />The wife loved the movie, though, and I think there might have been chords being played and to which I had no ear for. If the film tried to present uncommon sexual behaviors and their consequences in todays Romania, then it failed miserably. There were no consequences. Just imagine that the girls are actually a boy and a girl, and the same story becomes just a boring, uninteresting plot.<br /><br />I have no idea why it got all those BAFTA awards. In my book, it should have gotten the "Better luck next time" award.'
#text = 'no good'
#encoded = tokenizer(text, truncation=True, padding=True)
#encodedd = DataLoader(encoded)
encoded = tokenizer.encode_plus(text, return_tensors='pt').to(device)
seq_relationship_logits = model(**encoded)[0]
probs = softmax(seq_relationship_logits, dim=1)
_, preds1 = torch.max(probs.data, dim=1)
#_, preds2 = torch.max(probs, dim=1)
preds1.tolist()
#predicted_tokens = tokenizer.convert_ids_to_tokens(preds1[0])
#predicted_tokens

[1]

In [131]:
train_texts[0], train_labels[0]

('I viewed the movie together with a homophobic friend, my wife and her female friend. So I had views from all kinds of directions. Mainly, the film made me laugh, the sexual tension was not really there and the only noticeable actors were Tudor Chirila and Maria Popistasu. Yes, I do think she played her role well, even if the script was not appropriate. There were good Romanian actors around, they just didn\'t have complex roles. I applaud Puya\'s entering the movie business. I don\'t know why, but I think he\'s a good guy, I just hope he\'ll be a good actor.<br /><br />The wife loved the movie, though, and I think there might have been chords being played and to which I had no ear for. If the film tried to present uncommon sexual behaviors and their consequences in todays Romania, then it failed miserably. There were no consequences. Just imagine that the girls are actually a boy and a girl, and the same story becomes just a boring, uninteresting plot.<br /><br />I have no idea why i

In [95]:
#encodedd = tokenizer(text, truncation=True, padding=True)
#encodedd, encoded
model(**encoded)

(tensor([[[-0.0865, -0.7266,  0.2697,  ...,  0.7230, -0.1993, -0.2673],
          [ 0.0124, -0.6388,  0.2815,  ...,  0.6794, -0.2216, -0.1828],
          [ 0.0114, -0.6859,  0.3307,  ...,  0.6306, -0.2857, -0.1467],
          [-0.0340, -0.5850,  0.5152,  ...,  0.6593, -0.4234, -0.4688],
          [-0.0872, -0.6707,  0.1509,  ...,  0.6818, -0.3874, -0.4071],
          [-0.0176, -0.6539,  0.2366,  ...,  0.7779, -0.5380, -0.3840]]],
        grad_fn=<NativeLayerNormBackward>),)

In [58]:
trainer.evaluate(encoded)

HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=1.0, style=ProgressStyle(description_wid…




KeyError: 'Indexing with integers (to access backend Encoding for a given batch index) is not available when using Python based tokenizers'