In [None]:
!pip install --q transformers
!pip install --q evaluate
!pip install --q accelerate
!pip install --q transformers[torch]

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import gensim
import gensim.downloader
from gensim.models import KeyedVectors
import matplotlib.pyplot as plt
from tqdm import tqdm
from tabulate import tabulate

#pytorch
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.optim import AdamW

#tf model
import tensorflow_hub as hub

#misc
import datasets
import evaluate
from transformers import TrainingArguments, Trainer

#transformers
from transformers import BertForSequenceClassification, AutoModelForSeq2SeqLM, AutoTokenizer, AutoConfig, BertModel, XLNetForSequenceClassification,YosoForSequenceClassification


#sklearn
from sklearn.metrics import classification_report, confusion_matrix

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')

**Data Preparation**

In [3]:
!wget -O "amazon.zip" "https://drive.google.com/uc?export=download&id=1MO1EYtIeCsJliaeCxrkqrIWKsowsd3X8&confirm=t&uuid=70bad3da-836c-43dd-90bc-c21da0dfb131&at=AB6BwCAx5FEYhB_nNxz511oJwkhT:1692183100774"

--2023-08-16 10:53:37--  https://drive.google.com/uc?export=download&id=1MO1EYtIeCsJliaeCxrkqrIWKsowsd3X8&confirm=t&uuid=70bad3da-836c-43dd-90bc-c21da0dfb131&at=AB6BwCAx5FEYhB_nNxz511oJwkhT:1692183100774
Resolving drive.google.com (drive.google.com)... 172.217.219.100, 172.217.219.113, 172.217.219.101, ...
Connecting to drive.google.com (drive.google.com)|172.217.219.100|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-14-0g-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/fh2p171qci8rbr449ak7vk6bpsqg8o2g/1692183150000/16787182300288898320/*/1MO1EYtIeCsJliaeCxrkqrIWKsowsd3X8?e=download&uuid=70bad3da-836c-43dd-90bc-c21da0dfb131 [following]
--2023-08-16 10:53:37--  https://doc-14-0g-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/fh2p171qci8rbr449ak7vk6bpsqg8o2g/1692183150000/16787182300288898320/*/1MO1EYtIeCsJliaeCxrkqrIWKsowsd3X8?e=download&uuid=70bad3da-836c-43dd-90bc-c21da0dfb131
Resolvin

In [4]:
!unzip "amazon.zip"

Archive:  amazon.zip
  inflating: test.csv                
  inflating: train.csv               
  inflating: validation.csv          
  inflating: amazon_translated_body_and_title_with_originals.csv  
  inflating: amazon_translated_body_and_title_with_originals_all_stars.csv  


In [None]:
SEED = 111

# Set the random seed for Python to SEED
random.seed(SEED)

# Set the random seed for numpy to SEED
np.random.seed(SEED)

# Set the random seed for torch to SEED
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [None]:
def prepareData(sentences, labels, tokenizer, max_length=256, batch_size=32):
    encoded_inputs = tokenizer(list(sentences), padding='max_length', truncation=True, max_length=max_length, return_tensors='pt')
    input_ids = encoded_inputs['input_ids']
    attention_mask = encoded_inputs['attention_mask']
    dataset = torch.utils.data.TensorDataset(input_ids, attention_mask, torch.tensor(labels))
    dataLoader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return dataLoader

In [None]:
def prepareDataTruncation(sentences, labels, tokenizer, N, M, max_length=256, batch_size=32):
    tokenized_inputs = [tokenizer.tokenize(sentences[i]) for i in range(len(list(sentences)))]
    for i in range(len(tokenized_inputs)):
        if len(tokenized_inputs[i]) > max_length:
            tokenized_inputs[i] = tokenized_inputs[i][:N] + tokenized_inputs[i][-M:]
        tokenized_inputs[i] = tokenizer.convert_tokens_to_string(tokenized_inputs[i])
    return prepareData(tokenized_inputs, labels, tokenizer, max_length=max_length, batch_size=batch_size)

**IMDB Dataset Training**

**Dataset Evaluation**

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
def tokenize_function(examples):
    tokenized_inputs = [tokenizer.tokenize(examples[reviewType][i]) for i in range(len(list(examples[reviewType])))]
    for i in range(len(tokenized_inputs)):
        if len(tokenized_inputs[i]) > N+M:
            tokenized_inputs[i] = tokenized_inputs[i][:N] + tokenized_inputs[i][-M:]
        tokenized_inputs[i] = tokenizer.convert_tokens_to_string(tokenized_inputs[i])
    tokenized_inputs = tokenizer(tokenized_inputs,padding="max_length", truncation=True, max_length=N+M)
    return tokenized_inputs

In [None]:
def print_using_tabulate(data):
    table_data = []
    for key, values in data.items():
        if key != 'macro avg' and key != 'weighted avg':
            if isinstance(values, dict):
                row = [key, values['precision'], values['recall'], values['f1-score'], values['support']]
                table_data.append(row)

    # Print the classification report using tabulate
    headers = ['Class', 'Precision', 'Recall', 'F1-Score', 'Support']
    m_table = tabulate(table_data, headers=headers, tablefmt='psql',floatfmt=".4f")
    print(m_table)

In [None]:
def evaluateModel(model, test_data, target_names):
    classificationReports = []
    model.eval()
    np_predictions = []
    np_y_eval = []
    for batch in test_data:
        input_ids, attention_mask, labels = batch
        input_ids = input_ids.to(DEVICE)
        attention_mask = attention_mask.to(DEVICE)
        predictions = model(input_ids=input_ids, attention_mask=attention_mask)
        _, outclass_predictions = torch.max(predictions.logits, 1)
        np_predictions.extend(outclass_predictions.cpu().numpy())
        np_y_eval.extend(labels.cpu().numpy())
    np_predictions = np.array(np_predictions)
    np_y_eval = np.array(np_y_eval)
    classificationReports = classification_report(np_y_eval, np_predictions, target_names=target_names, output_dict=True)
    # Confusion matrix for each label
    return classificationReports

In [None]:
def train(model, training_args, dataset, numOfExamples, imdbOrAmazon="imdb"):
    tokenized_datasets = dataset.map(tokenize_function, batched=True)
    small_train_dataset = tokenized_datasets["train"].shuffle(seed=SEED).select(range(numOfExamples))
    if imdbOrAmazon == "imdb":
        trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=small_train_dataset,
        eval_dataset=tokenized_datasets['test'],
        compute_metrics=compute_metrics,
        )
    else:
        trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=small_train_dataset,
        eval_dataset=tokenized_datasets['validation'],
        compute_metrics=compute_metrics,
        )
    trainer.train()

In [None]:
def evaluateTranslatedTestSetAndRegularTestSet(model, tokenizer, dataset, translatedDataset, targetNames, addedText, batch_size=8):
    tranlsatedTestSetTrunc = prepareDataTruncation(translatedDataset['translated_body'], translatedDataset['labels'], tokenizer, N, M, max_length=N+M, batch_size=batch_size)
    classificationReports = evaluateModel(model, tranlsatedTestSetTrunc, targetNames)
    print(f"\033[1m{addedText}\033[0m")
    print("Classification Report Translated Test Set")
    print_using_tabulate(classificationReports)
    testSet = prepareDataTruncation(dataset['test'][reviewType], dataset['test']['labels'], tokenizer, N, M, max_length=N+M, batch_size=batch_size)
    print("Classification Report Test Set")
    classificationReports = evaluateModel(model, testSet, targetNames)
    print_using_tabulate(classificationReports)

**IMDB Dataset Start & End Only Trial**

Logically, when looking at reviews, most likely the sentiment will be at the start or the end (or both) of the review and the more in-depth details of the product/movie/etc will be in the middle.
Ideally, we would have liked to process all the review, mark N start tokens with "start_token ... \start_token" and mark M end tokens with "end_token ... \end_token".
Then, train bert while pooling out these indices from the last hidden layer. Then, through K fully connected layers, normalization, dropout, etc ...  and finally a classifier.
This operation will be too expensive, practically infeasible in our case due to limited resources and very long reviews.
Therefore, what we will do instead is preprocess the dataset such that the first N tokens are concatenated to the last M tokens to form a new review with the same sentiment as before.

In [None]:
saveStrategy = "no"
targetNames = ['negative', 'positive']

In [None]:
translatedTestSet = pd.read_csv('amazon_translated_body_and_title_with_originals.csv')
translatedTestSet.head(5)

Unnamed: 0.1,Unnamed: 0,stars,review_body,review_title,language,translated_title,translated_body
0,0,0,"Leider, leider nach einmal waschen ausgebliche...",Leider nicht zu empfehlen,de,Unfortunately not recommended,"Unfortunately, unfortunately faded after one w..."
1,1,0,zunächst macht der Anker Halter einen soliden ...,Gummierung nach 6 Monaten kaputt,de,Rubber broken after 6 months,"first of all, the anchor holder makes a solid ..."
2,2,0,Siegel sowie Verpackung war beschädigt und war...,Flohmarkt ware,de,flea market goods,Seal and packaging was damaged and item was us...
3,3,0,Habe dieses Produkt NIE erhalten und das Geld ...,Katastrophe,de,catastrophe,NEVER received this product and the money was ...
4,4,0,Die Träger sind schnell abgerissen,Reißverschluss klemmt,de,Zipper is stuck,The straps ripped off quickly


In [None]:
translatedTestSet.drop(columns=['Unnamed: 0'], axis=0, inplace=True)
translatedTestSet.rename(columns={'stars':'labels'}, inplace=True)
translatedTestSet.head(5)

Unnamed: 0,labels,review_body,review_title,language,translated_title,translated_body
0,0,"Leider, leider nach einmal waschen ausgebliche...",Leider nicht zu empfehlen,de,Unfortunately not recommended,"Unfortunately, unfortunately faded after one w..."
1,0,zunächst macht der Anker Halter einen soliden ...,Gummierung nach 6 Monaten kaputt,de,Rubber broken after 6 months,"first of all, the anchor holder makes a solid ..."
2,0,Siegel sowie Verpackung war beschädigt und war...,Flohmarkt ware,de,flea market goods,Seal and packaging was damaged and item was us...
3,0,Habe dieses Produkt NIE erhalten und das Geld ...,Katastrophe,de,catastrophe,NEVER received this product and the money was ...
4,0,Die Träger sind schnell abgerissen,Reißverschluss klemmt,de,Zipper is stuck,The straps ripped off quickly


**BERT CASED**

****N = 128, M = 382; Overall 510 tokens****

In [None]:
N = 128
M = 382
reviewType = "text"

In [None]:
from datasets import load_dataset
imdbDataset = load_dataset('imdb')

Downloading builder script:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Downloading and preparing dataset imdb/plain_text (download: 80.23 MiB, generated: 127.02 MiB, post-processed: Unknown size, total: 207.25 MiB) to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1...


Downloading data:   0%|          | 0.00/84.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Dataset imdb downloaded and prepared to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
imdbDataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})

In [None]:
imdbDataset = imdbDataset.rename_column("label", "labels")

In [None]:
metric = evaluate.load("accuracy")
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

In [None]:
enLanguageModelStartEndTrial_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)

In [None]:
train(enLanguageModelStartEndTrial_N_128_M_382, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (519 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3124,0.253203,0.91136


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(enLanguageModelStartEndTrial_N_128_M_382, tokenizer, imdbDataset, targetNames, "bert-base-cased-2-labels-510-tokens-head+tail", batch_size=8)

[1mbert-base-cased-2-labels-510-tokens-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.7753 |   0.9565 |     0.8564 |      6000 |
| positive |      0.9432 |   0.7228 |     0.8185 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8732 |   0.9625 |     0.9157 |     12500 |
| positive |      0.9582 |   0.8602 |     0.9066 |     12500 |
+----------+-------------+----------+------------+-----------+


****repeat with N=M=64; overall 128 tokens****

In [None]:
N = 64
M = 64
reviewType = "text"

In [None]:
enLanguageModelStartEndTrial_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(enLanguageModelStartEndTrial_N_64_M_64, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3429,0.262426,0.8982


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(enLanguageModelStartEndTrial_N_64_M_64, tokenizer, imdbDataset, translatedTestSet, targetNames, "bert-base-cased-2-labels-N-64-M-64-head+tail", batch_size=8)

[1mbert-base-cased-2-labels-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8283 |   0.8925 |     0.8592 |      6000 |
| positive |      0.8835 |   0.8150 |     0.8479 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8899 |   0.9088 |     0.8993 |     12500 |
| positive |      0.9068 |   0.8876 |     0.8971 |     12500 |
+----------+-------------+----------+------------+-----------+


****repeat with N=510, M=0; overall 510 tokens****

In [None]:
N = 510
M = 0
reviewType = "text"

In [None]:
enLanguageModelStartEndTrial_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(enLanguageModelStartEndTrial_N_510_M_0, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3093,0.26856,0.90288


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(enLanguageModelStartEndTrial_N_510_M_0, tokenizer, imdbDataset, translatedTestSet, targetNames, "bert-base-cased-2-labels-N-510-M-0-head-only", batch_size=8)

[1mbert-base-cased-2-labels-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.7999 |   0.9517 |     0.8692 |      6000 |
| positive |      0.9404 |   0.7620 |     0.8418 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8611 |   0.9608 |     0.9082 |     12500 |
| positive |      0.9557 |   0.8450 |     0.8969 |     12500 |
+----------+-------------+----------+------------+-----------+


****repeat with N=0, M=510; overall 510 tokens****

In [None]:
N = 0
M = 510
reviewType = "text"

In [None]:
enLanguageModelStartEndTrial_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(enLanguageModelStartEndTrial_N_0_M_510, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3105,0.31947,0.90752


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(enLanguageModelStartEndTrial_N_0_M_510, tokenizer, imdbDataset, translatedTestSet, targetNames, "bert-base-cased-2-labels-N-0-M-510-tail-only", batch_size=8)

[1mbert-base-cased-2-labels-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8453 |   0.8678 |     0.8564 |      6000 |
| positive |      0.8642 |   0.8412 |     0.8525 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8700 |   0.9582 |     0.9120 |     12500 |
| positive |      0.9535 |   0.8568 |     0.9026 |     12500 |
+----------+-------------+----------+------------+-----------+


**BERT UNCASED**

****N = 128, M = 382; Overall 510 tokens****

In [None]:
N = 128
M = 382
reviewType = "text"

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [None]:
enLanguageModelStartEndTrial_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(enLanguageModelStartEndTrial_N_128_M_382, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2757,0.237274,0.93


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(enLanguageModelStartEndTrial_N_128_M_382, tokenizer, imdbDataset, translatedTestSet, targetNames, "bert-base-uncased-2-labels-N-128-M-382-head+tail", batch_size=8)

[1mbert-base-uncased-2-labels-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.9038 |   0.8193 |     0.8595 |      6000 |
| positive |      0.8348 |   0.9128 |     0.8721 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.9201 |   0.9418 |     0.9308 |     12500 |
| positive |      0.9404 |   0.9182 |     0.9292 |     12500 |
+----------+-------------+----------+------------+-----------+


**N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "text"

In [None]:
enLanguageModelStartEndTrial_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(enLanguageModelStartEndTrial_N_64_M_64, training_args, imdbDataset, imdbDataset['train'].num_rows)

In [None]:
train(enLanguageModelStartEndTrial_N_64_M_64, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3205,0.283606,0.89408


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(enLanguageModelStartEndTrial_N_64_M_64, tokenizer, imdbDataset, translatedTestSet, targetNames, "bert-base-uncased-2-labels-N-64-M-64-head+tail", batch_size=8)

[1mbert-base-uncased-2-labels-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8246 |   0.9112 |     0.8657 |      6000 |
| positive |      0.9007 |   0.8062 |     0.8508 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8560 |   0.9476 |     0.8995 |     12500 |
| positive |      0.9413 |   0.8406 |     0.8881 |     12500 |
+----------+-------------+----------+------------+-----------+


**N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "text"

In [None]:
enLanguageModelStartEndTrial_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(enLanguageModelStartEndTrial_N_510_M_0, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2903,0.242058,0.92732


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(enLanguageModelStartEndTrial_N_510_M_0, tokenizer, imdbDataset, translatedTestSet, targetNames, "bert-base-uncased-2-labels-N-510-M-0-head-only", batch_size=8)

[1mbert-base-uncased-2-labels-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.9049 |   0.8307 |     0.8662 |      6000 |
| positive |      0.8435 |   0.9127 |     0.8767 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.9402 |   0.9127 |     0.9262 |     12500 |
| positive |      0.9152 |   0.9419 |     0.9284 |     12500 |
+----------+-------------+----------+------------+-----------+


**N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "text"

In [None]:
enLanguageModelStartEndTrial_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(enLanguageModelStartEndTrial_N_0_M_510, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2901,0.255441,0.929


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(enLanguageModelStartEndTrial_N_0_M_510, tokenizer, imdbDataset, translatedTestSet, targetNames, "bert-base-uncased-2-labels-N-0-M-510-tail-only", batch_size=8)

[1mbert-base-uncased-2-labels-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.9006 |   0.8498 |     0.8745 |      6000 |
| positive |      0.8578 |   0.9062 |     0.8813 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.9247 |   0.9341 |     0.9294 |     12500 |
| positive |      0.9334 |   0.9239 |     0.9286 |     12500 |
+----------+-------------+----------+------------+-----------+


**XLNET IMDB**

**N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "text"

In [None]:
tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
XLNETModel_N_128_M_382 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=2)

Downloading (…)lve/main/config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'sequence_summary.summary.bias', 'logits_proj.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_128_M_382, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2745,0.290929,0.94192


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_128_M_382, tokenizer, imdbDataset, translatedTestSet, targetNames, "xlnet-base-cased-2-labels-N-128-M-382-head+tail", batch_size=8)

[1mxlnet-base-cased-2-labels-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8253 |   0.8875 |     0.8553 |      6000 |
| positive |      0.8783 |   0.8122 |     0.8440 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.9231 |   0.9636 |     0.9429 |     12500 |
| positive |      0.9619 |   0.9198 |     0.9404 |     12500 |
+----------+-------------+----------+------------+-----------+


**N=64, M=64, overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "text"

In [None]:
XLNETModel_N_64_M_64 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=2)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'sequence_summary.summary.bias', 'logits_proj.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_64_M_64, training_args, imdbDataset, imdbDataset['train'].num_rows)

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/50 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3254,0.30657,0.91436


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_64_M_64, tokenizer, imdbDataset, translatedTestSet, targetNames, "xlnet-base-cased-2-labels-N-64-M-64-head+tail", batch_size=8)

[1mxlnet-base-cased-2-labels-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8829 |   0.8800 |     0.8815 |      6000 |
| positive |      0.8804 |   0.8833 |     0.8819 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8892 |   0.9465 |     0.9170 |     12500 |
| positive |      0.9428 |   0.8821 |     0.9114 |     12500 |
+----------+-------------+----------+------------+-----------+


**N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "text"

In [None]:
XLNETModel_N_510_M_0 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=2)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'sequence_summary.summary.bias', 'logits_proj.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_510_M_0, training_args, imdbDataset, imdbDataset['train'].num_rows)

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3254,0.30657,0.91436


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_510_M_0, tokenizer, imdbDataset, translatedTestSet, targetNames, "xlnet-base-cased-2-labels-N-510-M-0-head-only", batch_size=8)

[1mxlnet-base-cased-2-labels-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8821 |   0.8800 |     0.8810 |      6000 |
| positive |      0.8803 |   0.8823 |     0.8813 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.8878 |   0.9755 |     0.9296 |     12500 |
| positive |      0.9728 |   0.8767 |     0.9223 |     12500 |
+----------+-------------+----------+------------+-----------+


**N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "text"

In [None]:
XLNETModel_N_0_M_510 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=2)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'sequence_summary.summary.bias', 'logits_proj.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_0_M_510, training_args, imdbDataset, imdbDataset['train'].num_rows)

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3299,0.242945,0.91808


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_0_M_510, tokenizer, imdbDataset, translatedTestSet, targetNames, "xlnet-base-cased-2-labels-N-0-M-510-tail-only", batch_size=8)

[1mxlnet-base-cased-2-labels-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.9089 |   0.8217 |     0.8631 |      6000 |
| positive |      0.8373 |   0.9177 |     0.8756 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| negative |      0.9446 |   0.9526 |     0.9486 |     12500 |
| positive |      0.9522 |   0.9442 |     0.9481 |     12500 |
+----------+-------------+----------+------------+-----------+


**Amazon En Dataset**

In [None]:
def dropColumns(df):
    df.drop(columnsToDrop, axis=1, inplace=True)
    return df

In [None]:
translatedTestSetAllStars = pd.read_csv('amazon_translated_body_and_title_with_originals_all_stars.csv')
translatedTestSetAllStars.head(5)

In [None]:
translatedTestSetAllStars.drop(columns=['Unnamed: 0'], axis=0, inplace=True)
translatedTestSetAllStars.rename(columns={'stars':'labels'}, inplace=True)

In [None]:
trainingSetAmazon = pd.read_csv('train.csv')
validationSetAmazon = pd.read_csv('validation.csv')
testSetAmazon = pd.read_csv('test.csv')

In [None]:
columnsToDrop = ['Unnamed: 0', 'review_id', 'product_id', 'reviewer_id', 'product_category']
trainingSetAmazon = dropColumns(trainingSetAmazon)
validationSetAmazon = dropColumns(validationSetAmazon)
testSetAmazon = dropColumns(testSetAmazon)

In [None]:
def extractLanguage(language, df):
    new_df = df.copy()
    exclude = np.where(new_df['language'] != language)
    new_df.drop(exclude[0],axis=0, inplace=True)
    return new_df
enOnlyTrain = extractLanguage('en', trainingSetAmazon)
enOnlyDev = extractLanguage('en', validationSetAmazon)
enOnlyTest = extractLanguage('en', testSetAmazon)

In [None]:
enOnlyTrain.head(5)

In [None]:
enOnlyTrain = datasets.Dataset.from_pandas(enOnlyTrain)
enOnlyDev = datasets.Dataset.from_pandas(enOnlyDev)
enOnlyTest = datasets.Dataset.from_pandas(enOnlyTest)

In [None]:
en_only_dataset = datasets.DatasetDict({"train": enOnlyTrain, 'validation': enOnlyDev, 'test': enOnlyTest})
en_only_dataset

In [None]:
en_only_dataset = en_only_dataset.rename_column('stars', 'labels')

In [None]:
en_only_dataset = en_only_dataset.remove_columns('__index_level_0__')

In [None]:
def reduceOne(examples):
    examples['labels'] = examples['labels'] - 1
    return examples

In [None]:
en_only_dataset = en_only_dataset.map(reduceOne)

In [None]:
translatedTestSetAllStars['labels'] = translatedTestSetAllStars['labels'] - 1

**Amazon Dataset English Only Model**

**BERT CASED**

**Five labels; N=128, M=382; overall 510 tokens**

In [None]:
metric = evaluate.load("accuracy")
numLabels = 5
targetNames = ['Negative', 'Somewhat Negative', 'Neutral', 'Somewhat Positive', 'Positive']

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased", num_labels=numLabels)

In [None]:
amazonEnLanguageModel_five_labels_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-4, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_five_labels_N_128_M_382, training_args, en_only_dataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,1.0447,1.036176,0.5436


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_five_labels_N_128_M_382, tokenizer, en_only_dataset, translatedTestSetAllStars, targetNames, "bert-base-cased-5-labels-N-128-M-382-head+tail", batch_size=8)

[1mbert-base-cased-5-labels-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative          |      0.5956 |   0.7550 |     0.6659 |      3000 |
| Somewhat Negative |      0.4342 |   0.4330 |     0.4336 |      3000 |
| Neutral           |      0.4614 |   0.3343 |     0.3877 |      3000 |
| Somewhat Positive |      0.4706 |   0.3917 |     0.4275 |      3000 |
| Positive          |      0.6188 |   0.7290 |     0.6694 |      3000 |
+-------------------+-------------+----------+------------+-----------+
Classification Report Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negati

**Five labels; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_five_labels_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_five_labels_N_64_M_64, training_args, en_only_dataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,1.0443,1.051336,0.5474


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_five_labels_N_64_M_64, tokenizer, en_only_dataset, translatedTestSetAllStars, targetNames, "bert-base-cased-5-labels-N-64-M-64-head+tail", batch_size=8)

[1mbert-base-cased-5-labels-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative          |      0.6153 |   0.7310 |     0.6682 |      3000 |
| Somewhat Negative |      0.4284 |   0.5133 |     0.4670 |      3000 |
| Neutral           |      0.4371 |   0.3267 |     0.3739 |      3000 |
| Somewhat Positive |      0.4677 |   0.3590 |     0.4062 |      3000 |
| Positive          |      0.6292 |   0.6913 |     0.6588 |      3000 |
+-------------------+-------------+----------+------------+-----------+
Classification Report Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative

**Five labels; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_five_labels_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_five_labels_N_510_M_0, training_args, en_only_dataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,1.0412,1.032473,0.5504


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_five_labels_N_510_M_0, tokenizer, en_only_dataset, translatedTestSetAllStars, targetNames, "bert-base-cased-5-labels-N-510-M-0-head-only", batch_size=8)

[1mbert-base-cased-5-labels-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative          |      0.6126 |   0.7373 |     0.6692 |      3000 |
| Somewhat Negative |      0.4390 |   0.4270 |     0.4329 |      3000 |
| Neutral           |      0.4577 |   0.3657 |     0.4065 |      3000 |
| Somewhat Positive |      0.4590 |   0.4033 |     0.4294 |      3000 |
| Positive          |      0.6248 |   0.7160 |     0.6673 |      3000 |
+-------------------+-------------+----------+------------+-----------+
Classification Report Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative

**Five labels; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_five_labels_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_five_labels_N_0_M_510, training_args, en_only_dataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,1.0399,1.031578,0.5522


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_five_labels_N_0_M_510, tokenizer, en_only_dataset, translatedTestSetAllStars, targetNames, "bert-base-cased-5-labels-N-0-M-510-tail-only", batch_size=8)

[1mbert-base-cased-5-labels-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative          |      0.6030 |   0.7433 |     0.6659 |      3000 |
| Somewhat Negative |      0.4382 |   0.4407 |     0.4394 |      3000 |
| Neutral           |      0.4645 |   0.3553 |     0.4026 |      3000 |
| Somewhat Positive |      0.4602 |   0.4050 |     0.4309 |      3000 |
| Positive          |      0.6290 |   0.7023 |     0.6636 |      3000 |
+-------------------+-------------+----------+------------+-----------+
Classification Report Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative

**BERT UNCASED**

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", num_labels=numLabels)

**Five labels; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_five_labels_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_five_labels_N_128_M_382, training_args, en_only_dataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (520 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,1.0365,1.091112,0.5326


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_five_labels_N_128_M_382, tokenizer, en_only_dataset, translatedTestSetAllStars, targetNames, "bert-base-uncased-5-labels-N-128-M-382-head+tail", batch_size=8)

[1mbert-base-uncased-5-labels-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative          |      0.5702 |   0.7960 |     0.6644 |      3000 |
| Somewhat Negative |      0.4091 |   0.4657 |     0.4355 |      3000 |
| Neutral           |      0.4553 |   0.2493 |     0.3222 |      3000 |
| Somewhat Positive |      0.4785 |   0.3563 |     0.4085 |      3000 |
| Positive          |      0.6315 |   0.7410 |     0.6819 |      3000 |
+-------------------+-------------+----------+------------+-----------+
Classification Report Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Nega

**Five labels; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_five_labels_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_five_labels_N_64_M_64, training_args, en_only_dataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,1.0314,1.040522,0.554


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_five_labels_N_64_M_64, tokenizer, en_only_dataset, translatedTestSetAllStars, targetNames, "bert-base-uncased-5-labels-N-64-M-64-head+tail", batch_size=8)

[1mbert-base-uncased-5-labels-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative          |      0.6293 |   0.7107 |     0.6675 |      3000 |
| Somewhat Negative |      0.4371 |   0.4840 |     0.4593 |      3000 |
| Neutral           |      0.4600 |   0.3473 |     0.3958 |      3000 |
| Somewhat Positive |      0.4650 |   0.3920 |     0.4254 |      3000 |
| Positive          |      0.6293 |   0.7333 |     0.6773 |      3000 |
+-------------------+-------------+----------+------------+-----------+
Classification Report Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negati

**Five labels; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_five_labels_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_five_labels_N_510_M_0, training_args, en_only_dataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,1.0363,1.065287,0.5384


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_five_labels_N_510_M_0, tokenizer, en_only_dataset, translatedTestSetAllStars, targetNames, "bert-base-uncased-5-labels-N-510-M-0-head-only", batch_size=8)

[1mbert-base-uncased-5-labels-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negative          |      0.5932 |   0.7613 |     0.6669 |      3000 |
| Somewhat Negative |      0.4270 |   0.4907 |     0.4566 |      3000 |
| Neutral           |      0.4665 |   0.2580 |     0.3323 |      3000 |
| Somewhat Positive |      0.4736 |   0.3920 |     0.4290 |      3000 |
| Positive          |      0.6310 |   0.7490 |     0.6850 |      3000 |
+-------------------+-------------+----------+------------+-----------+
Classification Report Test Set
+-------------------+-------------+----------+------------+-----------+
| Class             |   Precision |   Recall |   F1-Score |   Support |
|-------------------+-------------+----------+------------+-----------|
| Negati

**At this point we got discoureged to keep going as the accuracy is really low**

In [None]:
def dropColumns(df):
    df.drop(columnsToDrop, axis=1, inplace=True)
    return df

In [None]:
translatedTestSetAllStars = pd.read_csv('amazon_translated_body_and_title_with_originals_all_stars.csv')
translatedTestSetAllStars.head(5)

Unnamed: 0.1,Unnamed: 0,stars,review_body,review_title,language,translated_title,translated_body
0,0,1,"Leider, leider nach einmal waschen ausgebliche...",Leider nicht zu empfehlen,de,Unfortunately not recommended,"Unfortunately, unfortunately faded after one w..."
1,1,1,zunächst macht der Anker Halter einen soliden ...,Gummierung nach 6 Monaten kaputt,de,Rubber broken after 6 months,"first of all, the anchor holder makes a solid ..."
2,2,1,Siegel sowie Verpackung war beschädigt und war...,Flohmarkt ware,de,flea market goods,Seal and packaging was damaged and item was us...
3,3,1,Habe dieses Produkt NIE erhalten und das Geld ...,Katastrophe,de,catastrophe,NEVER received this product and the money was ...
4,4,1,Die Träger sind schnell abgerissen,Reißverschluss klemmt,de,Zipper is stuck,The straps ripped off quickly


In [None]:
translatedTestSetAllStars.drop(columns=['Unnamed: 0'], axis=0, inplace=True)
translatedTestSetAllStars.rename(columns={'stars':'labels'}, inplace=True)

In [None]:
trainingSetAmazon = pd.read_csv('train.csv')
validationSetAmazon = pd.read_csv('validation.csv')
testSetAmazon = pd.read_csv('test.csv')

In [None]:
columnsToDrop = ['Unnamed: 0', 'review_id', 'product_id', 'reviewer_id', 'product_category']
trainingSetAmazon = dropColumns(trainingSetAmazon)
validationSetAmazon = dropColumns(validationSetAmazon)
testSetAmazon = dropColumns(testSetAmazon)

In [None]:
def extractLanguage(language, df):
    new_df = df.copy()
    exclude = np.where(new_df['language'] != language)
    new_df.drop(exclude[0],axis=0, inplace=True)
    return new_df
enOnlyTrain = extractLanguage('en', trainingSetAmazon)
enOnlyDev = extractLanguage('en', validationSetAmazon)
enOnlyTest = extractLanguage('en', testSetAmazon)

In [None]:
def changeToBinary(example):
    if example['labels'] >= threshold:
        example['labels'] = 1
    else:
        example['labels'] = 0
    return example

In [None]:
def removeNeutrals(examples):
    df = pd.DataFrame(examples)
    exclude = np.where(df['labels'] == 3)
    df.drop(exclude[0],axis=0, inplace=True)
    return df

**Train on Amazon, Test on IMDB**

In [None]:
threshold = 3 # neutrals are included
numLabels = 2
targetNames = ['Negative', 'Positive']
metric = evaluate.load("accuracy")

In [None]:
updatedDataset = en_only_dataset.map(changeToBinary)

  0%|          | 0/200000 [00:00<?, ?ex/s]

  0%|          | 0/5000 [00:00<?, ?ex/s]

  0%|          | 0/5000 [00:00<?, ?ex/s]

**BERT CASED**

In [None]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased', num_labels=numLabels)

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

**Two labels with Neutral; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-4, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_128_M_382, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (529 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3933,0.40117,0.8218


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_128_M_382, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-cased-2-labels-N-128-M-382-head+tail", "Amazon", batch_size=8)

[1mbert-base-cased-2-labels-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8143 |   0.8682 |     0.8404 |     12500 |
| Positive |      0.8588 |   0.8021 |     0.8295 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7439 |   0.8640 |     0.7994 |      2000 |
| Positive |      0.8984 |   0.8017 |     0.8473 |      3000 |
+----------+-------------+----------+------------+-----------+


**Two labels with Neutral; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_64_M_64, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3963,0.408553,0.8168


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_64_M_64, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-cased-2-labels-N-64-M-64-head+tail", "Amazon", batch_size=8)

[1mbert-base-cased-5-labels-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7915 |   0.8822 |     0.8344 |     12500 |
| Positive |      0.8669 |   0.7676 |     0.8142 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7217 |   0.9090 |     0.8046 |      2000 |
| Positive |      0.9266 |   0.7663 |     0.8389 |      3000 |
+----------+-------------+----------+------------+-----------+


**Two labels with Neutral; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_510_M_0, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3931,0.402589,0.821


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_510_M_0, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-cased-2-labels-N-510-M-0-head-only", "Amazon", batch_size=8)

[1mbert-base-cased-2-labels-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8019 |   0.8908 |     0.8440 |     12500 |
| Positive |      0.8772 |   0.7799 |     0.8257 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7460 |   0.8680 |     0.8024 |      2000 |
| Positive |      0.9012 |   0.8030 |     0.8493 |      3000 |
+----------+-------------+----------+------------+-----------+


**Two labels with Neutral; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_0_M_510, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3931,0.402855,0.821


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_0_M_510, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-cased-2-labels-N-0-M-510-tail-only", "Amazon", batch_size=8)

[1mbert-base-cased-2-labels-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8139 |   0.8874 |     0.8491 |     12500 |
| Positive |      0.8763 |   0.7971 |     0.8348 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7472 |   0.8690 |     0.8035 |      2000 |
| Positive |      0.9020 |   0.8040 |     0.8502 |      3000 |
+----------+-------------+----------+------------+-----------+


**BERT UNCASED**

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", num_labels=numLabels)

**Two labels with Neutral; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_128_M_382, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (520 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3955,0.384687,0.8298


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_128_M_382, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-uncased-2-labels-N-128-M-382-head+tail", "Amazon", batch_size=8)

[1mbert-base-uncased-2-labels-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8746 |   0.8441 |     0.8591 |     12500 |
| Positive |      0.8493 |   0.8790 |     0.8639 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7538 |   0.8590 |     0.8030 |      2000 |
| Positive |      0.8964 |   0.8130 |     0.8526 |      3000 |
+----------+-------------+----------+------------+-----------+


**Two labels with Neutral; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_64_M_64, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3888,0.385671,0.829


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_64_M_64, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-uncased-2-labels-N-64-M-64-head+tail", "Amazon", batch_size=8)

[1mbert-base-uncased-2-labels-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8729 |   0.7597 |     0.8124 |     12500 |
| Positive |      0.7873 |   0.8894 |     0.8352 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7610 |   0.8455 |     0.8010 |      2000 |
| Positive |      0.8888 |   0.8230 |     0.8546 |      3000 |
+----------+-------------+----------+------------+-----------+


**Two labels with Neutral; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_510_M_0, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3919,0.387862,0.823


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_510_M_0, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-uncased-2-labels-N-510-M-0-head-only", "Amazon", batch_size=8)

[1mbert-base-uncased-2-labels-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8427 |   0.8821 |     0.8619 |     12500 |
| Positive |      0.8763 |   0.8354 |     0.8553 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7395 |   0.8730 |     0.8007 |      2000 |
| Positive |      0.9038 |   0.7950 |     0.8459 |      3000 |
+----------+-------------+----------+------------+-----------+


**Two labels with Neutral; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_0_M_510, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3918,0.386515,0.8236


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_0_M_510, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-uncased-2-labels-N-0-M-510-tail-only", "Amazon", batch_size=8)

[1mbert-base-uncased-2-labels-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8581 |   0.8830 |     0.8704 |     12500 |
| Positive |      0.8795 |   0.8540 |     0.8665 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7305 |   0.8865 |     0.8010 |      2000 |
| Positive |      0.9118 |   0.7820 |     0.8419 |      3000 |
+----------+-------------+----------+------------+-----------+


**XLNET AMAZON TWO LABELS WITH NEUTRAL**

**Five labels; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased", num_labels=numLabels)
XLNETModel_N_128_M_382 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=numLabels)

Downloading (…)lve/main/config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_128_M_382, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3977,0.391486,0.8326


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_128_M_382, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "xlnet-base-cased-2-labels-N-128-M-382-head+tail", "Amazon", batch_size=8)

[1mxlnet-base-cased-2-labels-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9487 |   0.8040 |     0.8704 |     12500 |
| Positive |      0.8299 |   0.9566 |     0.8888 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7925 |   0.8210 |     0.8065 |      2000 |
| Positive |      0.8777 |   0.8567 |     0.8671 |      3000 |
+----------+-------------+----------+------------+-----------+


**Two labels with Neutral; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
XLNETModel_N_64_M_64 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_64_M_64, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4105,0.395386,0.8202


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_64_M_64, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "xlnet-base-cased-2-labels-N-64-M-64-head+tail", "Amazon", batch_size=8)

[1mxlnet-base-cased-2-labels-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8208 |   0.9102 |     0.8632 |     12500 |
| Positive |      0.8993 |   0.8013 |     0.8474 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7345 |   0.8730 |     0.7978 |      2000 |
| Positive |      0.9032 |   0.7897 |     0.8426 |      3000 |
+----------+-------------+----------+------------+-----------+


**Two labels with Neutral; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
XLNETModel_N_510_M_0 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_510_M_0, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3957,0.417117,0.8354


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_510_M_0, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "xlnet-base-cased-2-labels-N-510-M-0-head-only", "Amazon", batch_size=8)

[1mxlnet-base-cased-2-labels-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9582 |   0.7593 |     0.8472 |     12500 |
| Positive |      0.8007 |   0.9669 |     0.8760 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8119 |   0.7815 |     0.7964 |      2000 |
| Positive |      0.8579 |   0.8793 |     0.8685 |      3000 |
+----------+-------------+----------+------------+-----------+


**Two labels with Neutral; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
XLNETModel_N_0_M_510 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_0_M_510, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/200 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4125,0.387716,0.8322


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_0_M_510, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "xlnet-base-cased-2-labels-N-0-M-510-tail-only", "Amazon", batch_size=8)

[1mxlnet-base-cased-2-labels-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9416 |   0.8350 |     0.8851 |     12500 |
| Positive |      0.8518 |   0.9482 |     0.8974 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7910 |   0.7870 |     0.7890 |      2000 |
| Positive |      0.8585 |   0.8613 |     0.8599 |      3000 |
+----------+-------------+----------+------------+-----------+


**labels 1,2,4,5 only**

In [None]:
amazonEnLanguageModelNoNeutrals = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [None]:
en_only_dataset = datasets.DatasetDict({"train": enOnlyTrain, 'validation': enOnlyDev, 'test': enOnlyTest})

In [None]:
en_only_dataset = en_only_dataset.rename_column('stars', 'labels')

In [None]:
en_only_dataset = en_only_dataset.remove_columns('__index_level_0__')

In [None]:
en_only_dataset['train'] = datasets.Dataset.from_pandas(removeNeutrals(en_only_dataset['train']))
en_only_dataset['validation'] = datasets.Dataset.from_pandas(removeNeutrals(en_only_dataset['validation']))
en_only_dataset['test'] = datasets.Dataset.from_pandas(removeNeutrals(en_only_dataset['test']))

In [None]:
updatedDataset = en_only_dataset.map(changeToBinary)

  0%|          | 0/160000 [00:00<?, ?ex/s]

  0%|          | 0/4000 [00:00<?, ?ex/s]

  0%|          | 0/4000 [00:00<?, ?ex/s]

**BERT CASED**

In [None]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased', num_labels=numLabels)

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

**Two labels No Neutral; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-4, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_128_M_382, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (529 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.308,0.32972,0.8965


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_128_M_382, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-cased-2-labels-No-Neutral-N-128-M-382-head+tail", "Amazon", batch_size=8)

[1mbert-base-cased-2-labels-No-Neutral-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8088 |   0.8934 |     0.8490 |     12500 |
| Positive |      0.8810 |   0.7888 |     0.8323 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8617 |   0.9500 |     0.9037 |      2000 |
| Positive |      0.9443 |   0.8475 |     0.8933 |      2000 |
+----------+-------------+----------+------------+-----------+


**Two labels No Neutral; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_64_M_64, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2992,0.342118,0.9035


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_64_M_64, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-cased-2-labels-No-Neutral-N-64-M-64-head+tail", "Amazon", batch_size=8)

[1mbert-base-cased-2-labels-No-Neutral-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7595 |   0.8519 |     0.8031 |     12500 |
| Positive |      0.8314 |   0.7302 |     0.7775 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8773 |   0.9330 |     0.9043 |      2000 |
| Positive |      0.9285 |   0.8695 |     0.8980 |      2000 |
+----------+-------------+----------+------------+-----------+


**Two labels No Neutral; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_510_M_0, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.311,0.321706,0.9


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_510_M_0, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-cased-2-labels-No-Neutral-N-510-M-0-head-only", "Amazon", batch_size=8)

[1mbert-base-cased-2-labels-No-Neutral-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7941 |   0.9106 |     0.8484 |     12500 |
| Positive |      0.8953 |   0.7638 |     0.8243 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8700 |   0.9535 |     0.9098 |      2000 |
| Positive |      0.9486 |   0.8575 |     0.9007 |      2000 |
+----------+-------------+----------+------------+-----------+


**Two labels No Neutral; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_0_M_510, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3079,0.299242,0.90325


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_0_M_510, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-cased-2-labels-No-Neutral-N-0-M-510-tail-only", "Amazon", batch_size=8)

[1mbert-base-cased-2-labels-No-Neutral-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8337 |   0.8861 |     0.8591 |     12500 |
| Positive |      0.8784 |   0.8233 |     0.8500 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8733 |   0.9545 |     0.9121 |      2000 |
| Positive |      0.9498 |   0.8615 |     0.9035 |      2000 |
+----------+-------------+----------+------------+-----------+


**BERT UNCASED**

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", num_labels=numLabels)

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

**Two labels No Neutral; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_128_M_382, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (520 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

[34m[1mwandb[0m: Currently logged in as: [33mnadav-talmon[0m ([33mnadav_igor[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2887,0.293609,0.91125


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_128_M_382, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-uncased-2-labels-No-Neutral-N-128-M-382-head+tail", "Amazon", batch_size=8)

[1mbert-base-uncased-2-labels-No-Neutral-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7933 |   0.9484 |     0.8639 |     12500 |
| Positive |      0.9359 |   0.7529 |     0.8345 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8770 |   0.9630 |     0.9180 |      2000 |
| Positive |      0.9590 |   0.8650 |     0.9096 |      2000 |
+----------+-------------+----------+------------+-----------+


**Two labels No Neutral; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_64_M_64, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2955,0.29271,0.91075


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_64_M_64, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-uncased-2-labels-No-Neutral-N-64-M-64-head+tail", "Amazon", batch_size=8)

[1mbert-base-uncased-2-labels-No-Neutral-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7934 |   0.8887 |     0.8384 |     12500 |
| Positive |      0.8735 |   0.7686 |     0.8177 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8894 |   0.9445 |     0.9161 |      2000 |
| Positive |      0.9408 |   0.8825 |     0.9107 |      2000 |
+----------+-------------+----------+------------+-----------+


**Two labels No Neutral; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_510_M_0, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2947,0.278233,0.91075


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_510_M_0, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-uncased-2-labels-No-Neutral-N-510-M-0-head-only", "Amazon", batch_size=8)

[1mbert-base-uncased-2-labels-No-Neutral-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8389 |   0.9046 |     0.8705 |     12500 |
| Positive |      0.8965 |   0.8263 |     0.8600 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8840 |   0.9560 |     0.9186 |      2000 |
| Positive |      0.9521 |   0.8745 |     0.9116 |      2000 |
+----------+-------------+----------+------------+-----------+


**Two labels No Neutral; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
amazonEnLanguageModel_two_labels_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(amazonEnLanguageModel_two_labels_N_0_M_510, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3019,0.274651,0.91275


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(amazonEnLanguageModel_two_labels_N_0_M_510, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "bert-base-uncased-2-labels-No-Neutral-N-0-M-510-tail-only", "Amazon", batch_size=8)

[1mbert-base-uncased-2-labels-No-Neutral-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8305 |   0.9261 |     0.8757 |     12500 |
| Positive |      0.9165 |   0.8110 |     0.8605 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8927 |   0.9525 |     0.9216 |      2000 |
| Positive |      0.9491 |   0.8855 |     0.9162 |      2000 |
+----------+-------------+----------+------------+-----------+


**XLNET AMAZON TWO LABELS NO NEUTRAL**

**Two labels No Neutral; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased", num_labels=numLabels)
XLNETModel_N_128_M_382 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=numLabels)

Downloading (…)lve/main/config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'sequence_summary.summary.bias', 'logits_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_128_M_382, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

[34m[1mwandb[0m: Currently logged in as: [33mnadav-talmon[0m ([33mnadav_igor[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3225,0.246511,0.922


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_128_M_382, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "xlnet-base-cased-2-labels-No-Neutral-N-128-M-382-head+tail", "Amazon", batch_size=8)

[1mxlnet-base-cased-2-labels-No-Neutral-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8878 |   0.9385 |     0.9124 |     12500 |
| Positive |      0.9348 |   0.8814 |     0.9073 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8950 |   0.9590 |     0.9259 |      2000 |
| Positive |      0.9558 |   0.8875 |     0.9204 |      2000 |
+----------+-------------+----------+------------+-----------+


**Two labels No Neutral; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
XLNETModel_N_64_M_64 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'sequence_summary.summary.bias', 'logits_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_64_M_64, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3253,0.331385,0.908


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_64_M_64, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "xlnet-base-cased-2-labels-No-Neutral-N-64-M-64-head+tail", "Amazon", batch_size=8)

[1mxlnet-base-cased-2-labels-No-Neutral-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8038 |   0.9362 |     0.8650 |     12500 |
| Positive |      0.9237 |   0.7714 |     0.8407 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8577 |   0.9765 |     0.9133 |      2000 |
| Positive |      0.9727 |   0.8380 |     0.9003 |      2000 |
+----------+-------------+----------+------------+-----------+


**Two labels No Neutral; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
XLNETModel_N_510_M_0 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'sequence_summary.summary.bias', 'logits_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_510_M_0, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3339,0.276015,0.923


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_510_M_0, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "xlnet-base-cased-2-labels-No-Neutral-N-510-M-0-head-only", "Amazon", batch_size=8)

[1mxlnet-base-cased-2-labels-No-Neutral-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9025 |   0.9238 |     0.9131 |     12500 |
| Positive |      0.9220 |   0.9002 |     0.9110 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9040 |   0.9510 |     0.9269 |      2000 |
| Positive |      0.9483 |   0.8990 |     0.9230 |      2000 |
+----------+-------------+----------+------------+-----------+


**Two labels No Neutral; N=0, M=510; overall 510 tokens**

In [None]:
N = 256
M = 256
reviewType = "review_body"

In [None]:
XLNETModel_N_0_M_510 = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=numLabels)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.bias', 'sequence_summary.summary.bias', 'logits_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(XLNETModel_N_0_M_510, training_args, updatedDataset, 30000, "amazon")

  0%|          | 0/160 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3436,0.257167,0.92525


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(XLNETModel_N_0_M_510, tokenizer, updatedDataset, imdbDataset['test'], targetNames, "xlnet-base-cased-2-labels-No-Neutral-N-0-M-510-tail-only", "Amazon", batch_size=8)

[1mxlnet-base-cased-2-labels-No-Neutral-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9244 |   0.8998 |     0.9119 |     12500 |
| Positive |      0.9024 |   0.9264 |     0.9142 |     12500 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8958 |   0.9495 |     0.9218 |      2000 |
| Positive |      0.9463 |   0.8895 |     0.9170 |      2000 |
+----------+-------------+----------+------------+-----------+


**MultiLingual Model**

In [None]:
threshold = 3 # neutrals are included
numLabels = 2
targetNames = ['Negative', 'Positive']

In [None]:
trainingSetAmazon.head(5)

Unnamed: 0,stars,review_body,review_title,language
0,1,Armband ist leider nach 1 Jahr kaputt gegangen,Leider nach 1 Jahr kaputt,de
1,1,In der Lieferung war nur Ein Akku!,EINS statt ZWEI Akkus!!!,de
2,1,"Ein Stern, weil gar keine geht nicht. Es hande...",Achtung Abzocke,de
3,1,"Dachte, das wären einfach etwas festere Binden...",Zu viel des Guten,de
4,1,Meine Kinder haben kaum damit gespielt und nac...,Qualität sehr schlecht,de


In [None]:
def extractMultipleLanguages(langugages, df):
    dfList = []
    for language in languages:
        dfList.append(extractLanguage(language, df))
    newdf = pd.concat(dfList)
    return newdf

In [None]:
languages = ['en', 'de', 'fr', 'es']
trainingMultiSet = extractMultipleLanguages(languages, trainingSetAmazon)
validationMultiSet = extractMultipleLanguages(languages, validationSetAmazon)
testMultiSet = extractMultipleLanguages(languages, testSetAmazon)

In [None]:
np.unique(trainingMultiSet['language'])

array(['de', 'en', 'es', 'fr'], dtype=object)

In [None]:
multiTrain = datasets.Dataset.from_pandas(trainingMultiSet)
multiValidation = datasets.Dataset.from_pandas(validationMultiSet)
multiTest = datasets.Dataset.from_pandas(testMultiSet)

In [None]:
multi_dataset = datasets.DatasetDict({"train": multiTrain, 'validation': multiValidation, 'test': multiTest})

In [None]:
multi_dataset

DatasetDict({
    train: Dataset({
        features: ['stars', 'review_body', 'review_title', 'language', '__index_level_0__'],
        num_rows: 800000
    })
    validation: Dataset({
        features: ['stars', 'review_body', 'review_title', 'language', '__index_level_0__'],
        num_rows: 20000
    })
    test: Dataset({
        features: ['stars', 'review_body', 'review_title', 'language', '__index_level_0__'],
        num_rows: 20000
    })
})

In [None]:
multi_dataset = multi_dataset.rename_column('stars', 'labels')
multi_dataset = multi_dataset.remove_columns('__index_level_0__')

In [None]:
updatedDatasetMulti = multi_dataset.map(changeToBinary)

  0%|          | 0/800000 [00:00<?, ?ex/s]

  0%|          | 0/20000 [00:00<?, ?ex/s]

  0%|          | 0/20000 [00:00<?, ?ex/s]

***BERT MULTILINGUAL***

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")
numLabels = 2
targetNames = ['Negative', 'Positive']

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

**Two labels, With Neutrals; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
bertMultiLingualModel_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=2)

Downloading model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModel_N_128_M_382, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/800 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (568 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4091,0.4023,0.8209


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModel_N_128_M_382, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-cased-2-labels-With-Neutrals-N-128-M-382-head+tail", batch_size=8)

[1mbert-base-multilingual-cased-2-labels-With-Neutrals-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9368 |   0.7653 |     0.8424 |      6000 |
| Positive |      0.8016 |   0.9483 |     0.8688 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7825 |   0.7865 |     0.7845 |      8000 |
| Positive |      0.8572 |   0.8542 |     0.8557 |     12000 |
+----------+-------------+----------+------------+-----------+


**Two labels, With Neutrals; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
bertMultiLingualModel_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModel_N_64_M_64, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/800 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4264,0.41241,0.81375


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModel_N_64_M_64, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-cased-2-labels-With-Neutrals-N-64-M-64-head+tail", batch_size=8)

[1mbert-base-multilingual-cased-2-labels-With-Neutrals-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9382 |   0.7212 |     0.8155 |      6000 |
| Positive |      0.7736 |   0.9525 |     0.8537 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7875 |   0.7595 |     0.7732 |      8000 |
| Positive |      0.8434 |   0.8633 |     0.8532 |     12000 |
+----------+-------------+----------+------------+-----------+


**Two labels, With Neutrals; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
bertMultiLingualModel_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModel_N_510_M_0, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/800 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4871,0.474261,0.7866


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModel_N_510_M_0, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-cased-2-labels-With-Neutrals-N-510-M-0-head-only", batch_size=8)

[1mbert-base-multilingual-cased-2-labels-With-Neutrals-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9058 |   0.6908 |     0.7839 |      6000 |
| Positive |      0.7501 |   0.9282 |     0.8297 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7521 |   0.7271 |     0.7394 |      8000 |
| Positive |      0.8220 |   0.8403 |     0.8310 |     12000 |
+----------+-------------+----------+------------+-----------+


**Two labels, With Neutrals; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
bertMultiLingualModel_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModel_N_0_M_510, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/800 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4562,0.440904,0.80505


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModel_N_0_M_510, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-cased-2-labels-With-Neutrals-N-0-M-510-tail-only", batch_size=8)

[1mbert-base-multilingual-cased-2-labels-With-Neutrals-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9193 |   0.7463 |     0.8238 |      6000 |
| Positive |      0.7865 |   0.9345 |     0.8541 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7520 |   0.7924 |     0.7716 |      8000 |
| Positive |      0.8564 |   0.8257 |     0.8408 |     12000 |
+----------+-------------+----------+------------+-----------+


**BERT MULTILINGUAL UNCASED**

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-uncased")

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.72M [00:00<?, ?B/s]

**Two labels, With Neutrals; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
bertMultiLingualModelUncased_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=numLabels)

Downloading model.safetensors:   0%|          | 0.00/672M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingu

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModelUncased_N_128_M_382, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/800 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (544 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3971,0.397917,0.82395


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModelUncased_N_128_M_382, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-uncased-2-labels-With-Neutrals-N-128-M-382-head+tail", batch_size=8)

[1mbert-base-multilingual-uncased-2-labels-With-Neutrals-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9367 |   0.7547 |     0.8359 |      6000 |
| Positive |      0.7946 |   0.9490 |     0.8650 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7885 |   0.7841 |     0.7863 |      8000 |
| Positive |      0.8566 |   0.8598 |     0.8582 |     12000 |
+----------+-------------+----------+------------+-----------+


**Two labels, With Neutrals; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
bertMultiLingualModelUncased_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingu

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModelUncased_N_64_M_64, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/800 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4224,0.40673,0.8199


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModelUncased_N_128_M_382, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-uncased-2-labels-With-Neutrals-N-64-M-64-head+tail", batch_size=8)

[1mbert-base-multilingual-uncased-2-labels-With-Neutrals-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9362 |   0.7557 |     0.8363 |      6000 |
| Positive |      0.7952 |   0.9485 |     0.8651 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7880 |   0.7837 |     0.7859 |      8000 |
| Positive |      0.8563 |   0.8594 |     0.8579 |     12000 |
+----------+-------------+----------+------------+-----------+


**Two labels, With Neutrals; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
bertMultiLingualModelUncased_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingu

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModelUncased_N_510_M_0, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/800 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (544 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6303,0.58697,0.69485


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModelUncased_N_510_M_0, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-uncased-2-labels-With-Neutrals-N-510-M-0-head-only", batch_size=8)

[1mbert-base-multilingual-uncased-2-labels-With-Neutrals-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8410 |   0.6058 |     0.7043 |      6000 |
| Positive |      0.6920 |   0.8855 |     0.7769 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.6258 |   0.6472 |     0.6364 |      8000 |
| Positive |      0.7593 |   0.7420 |     0.7506 |     12000 |
+----------+-------------+----------+------------+-----------+


**Two labels, With Neutrals; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
bertMultiLingualModelUncased_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingu

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModelUncased_N_0_M_510, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/800 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

  0%|          | 0/20 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4047,0.394215,0.82265


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModelUncased_N_0_M_510, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-uncased-2-labels-With-Neutrals-N-0-M-510-tail-only", batch_size=8)

[1mbert-base-multilingual-uncased-2-labels-With-Neutrals-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.9330 |   0.7897 |     0.8554 |      6000 |
| Positive |      0.8177 |   0.9433 |     0.8760 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.7779 |   0.8176 |     0.7972 |      8000 |
| Positive |      0.8741 |   0.8443 |     0.8590 |     12000 |
+----------+-------------+----------+------------+-----------+


**NO NEUTRALS**

In [None]:
multi_dataset['train'] = datasets.Dataset.from_pandas(removeNeutrals(multi_dataset['train']))
multi_dataset['validation'] = datasets.Dataset.from_pandas(removeNeutrals(multi_dataset['validation']))
multi_dataset['test'] = datasets.Dataset.from_pandas(removeNeutrals(multi_dataset['test']))

In [None]:
updatedDatasetMulti = multi_dataset.map(changeToBinary)

  0%|          | 0/640000 [00:00<?, ?ex/s]

  0%|          | 0/16000 [00:00<?, ?ex/s]

  0%|          | 0/16000 [00:00<?, ?ex/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")
numLabels = 2
targetNames = ['Negative', 'Positive']

**Two labels, No Neutrals; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
bertMultiLingualModel_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=2)

Downloading model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModel_N_128_M_382, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/640 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (568 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.356,0.341616,0.886687


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModel_N_128_M_382, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-cased-2-labels-No-Neutrals-N-128-M-382-head+tail", batch_size=8)

[1mbert-base-multilingual-cased-2-labels-No-Neutrals-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8591 |   0.9320 |     0.8941 |      6000 |
| Positive |      0.9257 |   0.8472 |     0.8847 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8566 |   0.9289 |     0.8913 |      8000 |
| Positive |      0.9223 |   0.8445 |     0.8817 |      8000 |
+----------+-------------+----------+------------+-----------+


**Two labels, No Neutrals; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
bertMultiLingualModel_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModel_N_64_M_64, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/640 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3235,0.298463,0.901875


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModel_N_64_M_64, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-cased-2-labels-No-Neutrals-N-64-M-64-head+tail", batch_size=8)

[1mbert-base-multilingual-cased-2-labels-No-Neutrals-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8870 |   0.9240 |     0.9051 |      6000 |
| Positive |      0.9207 |   0.8823 |     0.9011 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8903 |   0.9276 |     0.9086 |      8000 |
| Positive |      0.9245 |   0.8858 |     0.9047 |      8000 |
+----------+-------------+----------+------------+-----------+


**Two labels, No Neutrals; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
bertMultiLingualModel_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModel_N_510_M_0, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/640 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3567,0.315001,0.87525


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModel_N_510_M_0, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-cased-2-labels-No-Neutrals-N-510-M-0-head-only", batch_size=8)

[1mbert-base-multilingual-cased-2-labels-No-Neutrals-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8408 |   0.9270 |     0.8818 |      6000 |
| Positive |      0.9187 |   0.8245 |     0.8690 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8370 |   0.9346 |     0.8831 |      8000 |
| Positive |      0.9260 |   0.8180 |     0.8687 |      8000 |
+----------+-------------+----------+------------+-----------+


**Two labels, No Neutrals; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
bertMultiLingualModel_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModel_N_0_M_510, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/640 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3411,0.304153,0.897813


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModel_N_0_M_510, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-cased-2-labels-No-Neutrals-N-0-M-510-tail-only", batch_size=8)

[1mbert-base-multilingual-cased-2-labels-No-Neutrals-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8680 |   0.9335 |     0.8995 |      6000 |
| Positive |      0.9281 |   0.8580 |     0.8917 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8670 |   0.9434 |     0.9036 |      8000 |
| Positive |      0.9379 |   0.8552 |     0.8947 |      8000 |
+----------+-------------+----------+------------+-----------+


**BERT MULTILINGUAL UNCASED**

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-uncased")

**Two labels, No Neutrals; N=128, M=382; overall 510 tokens**

In [None]:
N = 128
M = 382
reviewType = "review_body"

In [None]:
bertMultiLingualModelUncased_N_128_M_382 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=numLabels)

Downloading model.safetensors:   0%|          | 0.00/672M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingu

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModelUncased_N_128_M_382, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/640 [00:00<?, ?ba/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (544 > 512). Running this sequence through the model will result in indexing errors


  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3102,0.295687,0.903813


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModelUncased_N_128_M_382, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-uncased-2-labels-No-Neutrals-N-128-M-382-head+tail", batch_size=8)

[1mbert-base-multilingual-uncased-2-labels-No-Neutrals-N-128-M-382-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8864 |   0.9277 |     0.9066 |      6000 |
| Positive |      0.9241 |   0.8812 |     0.9021 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8895 |   0.9327 |     0.9106 |      8000 |
| Positive |      0.9293 |   0.8841 |     0.9062 |      8000 |
+----------+-------------+----------+------------+-----------+


**Two labels, No Neutrals; N=64, M=64; overall 128 tokens**

In [None]:
N = 64
M = 64
reviewType = "review_body"

In [None]:
bertMultiLingualModelUncased_N_64_M_64 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingu

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModelUncased_N_64_M_64, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/640 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3212,0.289871,0.90525


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModelUncased_N_64_M_64, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-uncased-2-labels-No-Neutrals-N-64-M-64-head+tail", batch_size=8)

[1mbert-base-multilingual-uncased-2-labels-No-Neutrals-N-64-M-64-head+tail[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8878 |   0.9232 |     0.9051 |      6000 |
| Positive |      0.9200 |   0.8833 |     0.9013 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8955 |   0.9291 |     0.9120 |      8000 |
| Positive |      0.9264 |   0.8916 |     0.9087 |      8000 |
+----------+-------------+----------+------------+-----------+


**Two labels, No Neutrals; N=510, M=0; overall 510 tokens**

In [None]:
N = 510
M = 0
reviewType = "review_body"

In [None]:
bertMultiLingualModelUncased_N_510_M_0 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingu

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModelUncased_N_510_M_0, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/640 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3099,0.281954,0.910625


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModelUncased_N_510_M_0, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-uncased-2-labels-No-Neutrals-N-510-M-0-head-only", batch_size=8)

[1mbert-base-multilingual-uncased-2-labels-No-Neutrals-N-510-M-0-head-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8925 |   0.9283 |     0.9101 |      6000 |
| Positive |      0.9253 |   0.8882 |     0.9064 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8923 |   0.9354 |     0.9133 |      8000 |
| Positive |      0.9321 |   0.8871 |     0.9091 |      8000 |
+----------+-------------+----------+------------+-----------+


**Two labels, No Neutrals; N=0, M=510; overall 510 tokens**

In [None]:
N = 0
M = 510
reviewType = "review_body"

In [None]:
bertMultiLingualModelUncased_N_0_M_510 = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=numLabels)

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingu

In [None]:
training_args = TrainingArguments(output_dir="test_trainer", optim="adamw_torch", warmup_steps=10000, num_train_epochs=1, weight_decay=1e-1, evaluation_strategy="epoch", save_strategy = saveStrategy)

In [None]:
train(bertMultiLingualModelUncased_N_0_M_510, training_args, updatedDatasetMulti, 120000, "amazon")

  0%|          | 0/640 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3091,0.279787,0.909937


In [None]:
evaluateTranslatedTestSetAndRegularTestSet(bertMultiLingualModelUncased_N_0_M_510, tokenizer, updatedDatasetMulti, translatedTestSet, targetNames, "bert-base-multilingual-uncased-2-labels-No-Neutrals-N-0-M-510-tail-only", batch_size=8)

[1mbert-base-multilingual-uncased-2-labels-No-Neutrals-N-0-M-510-tail-only[0m
Classification Report Translated Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8965 |   0.9257 |     0.9109 |      6000 |
| Positive |      0.9232 |   0.8932 |     0.9079 |      6000 |
+----------+-------------+----------+------------+-----------+
Classification Report Test Set
+----------+-------------+----------+------------+-----------+
| Class    |   Precision |   Recall |   F1-Score |   Support |
|----------+-------------+----------+------------+-----------|
| Negative |      0.8985 |   0.9290 |     0.9135 |      8000 |
| Positive |      0.9265 |   0.8950 |     0.9105 |      8000 |
+----------+-------------+----------+------------+-----------+
