# Roberta Classifier on Yelp: baseline


In [1]:
import json
import os
from typing import List

%pip install datasets
%pip install transformers
import torch
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments


ROOT_DIR = "drive/My Drive/Colab Notebooks/nlp/results/yelp_baseline"
if not os.path.exists(ROOT_DIR):
    os.mkdir(ROOT_DIR)



In [2]:
def get_datasets(dataset_name, train_size, val_size=1_000, test_size=None, random_seed: int = 42):
    """Returns """
    dataset = load_dataset(dataset_name, split="train")
    test_dataset = load_dataset(dataset_name, split="test")
    # We want test and validation data to be the same for every experiment
    if test_size:
        test_dataset = test_dataset.train_test_split(test_size=test_size, seed=random_seed)["test"]
    train_val_split = dataset.train_test_split(test_size=val_size, seed=random_seed)
    # Validation and test sets
    train_dataset = train_val_split["train"].train_test_split(train_size=train_size, seed=random_seed)["train"]
    val_dataset = train_val_split["test"]
    return train_dataset, val_dataset, test_dataset


class DataCollator:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
        
    def __call__(self, examples: List[dict]):
        labels = [example['label'] for example in examples]
        texts = [example['text'] for example in examples]
        tokenizer_output = self.tokenizer(texts, truncation=True, padding=True)
        return {
            'labels': torch.tensor(labels), 
            'input_ids': torch.tensor(tokenizer_output['input_ids']), 
            'attention_mask': torch.tensor(tokenizer_output['attention_mask'])
            }



def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [3]:
tokenizer = AutoTokenizer.from_pretrained('roberta-base', use_fast=True)
model = AutoModelForSequenceClassification.from_pretrained('roberta-base', return_dict=True)
data_collator = DataCollator(tokenizer)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [4]:
TRAIN_SIZES = [20, 100, 1_000, 10_000]
for train_size in TRAIN_SIZES:
    train_dataset, val_dataset, test_dataset = get_datasets("yelp_polarity", train_size, val_size=1_000, test_size=5_000)
    print(f"Train size: {len(train_dataset)}, Validation size: {len(val_dataset)}, Test size: {len(test_dataset)}")
    print(train_dataset[0])
    print(val_dataset[0])
    print(test_dataset[0])
    output_dir = os.path.join(ROOT_DIR, f"train_size_{train_size}")

    # https://huggingface.co/transformers/main_classes/trainer.html#trainingarguments
    training_args = TrainingArguments(
        learning_rate=3e-5,
        weight_decay=0.01,
        output_dir=output_dir,
        num_train_epochs=6,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        gradient_accumulation_steps=2,  # actual batch size: 16 (as suggested in Bert paper)
        warmup_steps=0,  # don't have any intuition for the right value here
        logging_dir=output_dir,
        logging_steps=10,
        load_best_model_at_end=True,
        evaluation_strategy='epoch',
        remove_unused_columns=False,
        no_cuda=False,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=data_collator,
        compute_metrics=compute_metrics
        
    )

    trainer.train()

    test_result = trainer.evaluate(test_dataset)

    print(test_result)

    with open(os.path.join(output_dir, 'test_result.json'), 'w') as f:
        json.dump(test_result, f, indent=4)

Reusing dataset yelp_polarity (/root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c)
Reusing dataset yelp_polarity (/root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c)
Loading cached split indices for dataset at /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-07a3bc09247d2cd5.arrow and /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-c589a9143254ed8c.arrow
Loading cached split indices for dataset at /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-f3c2c50366d14000.arrow and /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920

Train size: 20, Validation size: 1000, Test size: 5000
{'label': 0, 'text': 'For a minute I thought the commuter terminal at PHX didn\'t even have a bar, then I stumbled into \\""All Star Sports Bar.\\""  After spending about an hour here I wish there was no bar at all. \\n\\nThis place fails on almost every level imaginable. \\n\\nFirst of all, it\'s not anywhere close to being a sports bar.  2 TV\'s and bartenders in referee uniforms doesn\'t make you a sports bar.  Furthermore, the food here could not be worse.  \\n\\nLook - I\'m not opposed to the plastic cheese from a can on nachos, but if you can\'t even make that taste good, I can\'t even begin to imagine what the rest of the food here is like. \\n\\nUnlike the other reviewers I actually found the service (at the bar) to be fine, if not indifferent, and the one good thing I will say about this place is that they do make a pretty good Bloody Mary. You can credit that to the Ocean Spray mix they used so it\'s not exactly rocket sc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,No log,0.694498,0.505,0.671096,0.505,1.0
1,No log,0.690641,0.505,0.671096,0.505,1.0
2,No log,0.689164,0.517,0.67649,0.511134,1.0
3,No log,0.68778,0.62,0.696,0.583893,0.861386
4,No log,0.686923,0.63,0.614583,0.648352,0.584158
5,No log,0.68644,0.617,0.566251,0.661376,0.49505


Reusing dataset yelp_polarity (/root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c)
Reusing dataset yelp_polarity (/root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c)
Loading cached split indices for dataset at /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-07a3bc09247d2cd5.arrow and /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-c589a9143254ed8c.arrow
Loading cached split indices for dataset at /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-f3c2c50366d14000.arrow and /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920

Train size: 100, Validation size: 1000, Test size: 5000
{'label': 1, 'text': 'In town for a short weekend trip and wanted a quick lunch.  Saw the reviews on Yelp and figured we had to give it a try.   Well, we loved it.    The service is amazing and friendly.   Joe, the owner, and his daughter, were lovely to chat with.     \\n\\nFood is fresh and prepared when you order.'}
{'label': 1, 'text': "Decent size, decent selection, decent staff.\\n\\nI guess that can wholly sum this place up, it's decent.  As with many other stores that are like this, the product rotates depending on what doesn't sale well at other stores.  Can always snag a deal here.  I was able to pick up a pretty sweet Puma jacket for $10, can't beat that, right?\\n\\nThat being said, there are those times that you may not find anything as well.  So really don't get your hopes up if you are looking for a specific item."}
{'label': 1, 'text': "Nightclub rating only...\\n\\nWe got lucky because we happened to arrive during

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,No log,0.671638,0.782,0.809107,0.725275,0.914851
1,0.711242,0.569951,0.747,0.670143,0.980916,0.508911
2,0.711242,0.379999,0.896,0.898438,0.88632,0.910891
3,0.499623,0.27309,0.915,0.909478,0.983871,0.845545
4,0.200273,0.23631,0.925,0.920467,0.990868,0.859406
5,0.200273,0.188462,0.943,0.942132,0.966667,0.918812


Reusing dataset yelp_polarity (/root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c)
Reusing dataset yelp_polarity (/root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c)
Loading cached split indices for dataset at /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-07a3bc09247d2cd5.arrow and /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-c589a9143254ed8c.arrow
Loading cached split indices for dataset at /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-f3c2c50366d14000.arrow and /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920

Train size: 1000, Validation size: 1000, Test size: 5000
{'label': 0, 'text': "Let me just start by saying DO NOT BOOK THIS HOTEL... I will give you our experience is short then explain everything in full. Within the first 10 hours we were switched to 3 different rooms, treated like crap by all the employees and rooms smell either like cat piss or dirty trash!!! Okay here it goes:\\nSo this is everything off the top of my head... mix in extreme attitude from the staff as if they hate their jobs and are being forced to work there\\n1. We had to walk through the whole casino floor with 9 bags to the check in at 9:45pm\\n2. Check in line was 45 minutes which we understand is not their fault they were busy but at least be nice and smile \\n3. We go to the far tower with 9 bags to our room and our key doesnt work\\n4. We wait 25 minutes in a hot hallway for security to open our door you then acts like it isnt our room as if we are stupid and went to the wrong door and knocks over and over o

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,0.124411,0.312827,0.926,0.931227,0.877408,0.992079
1,0.158581,0.207483,0.955,0.956438,0.935606,0.978218
2,0.111012,0.338308,0.93,0.933837,0.893309,0.978218
3,0.028893,0.402189,0.937,0.94051,0.898917,0.986139
4,0.00035,0.253589,0.961,0.961039,0.969758,0.952475
5,0.000307,0.252051,0.961,0.960883,0.973577,0.948515


Reusing dataset yelp_polarity (/root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c)
Reusing dataset yelp_polarity (/root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c)
Loading cached split indices for dataset at /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-07a3bc09247d2cd5.arrow and /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-c589a9143254ed8c.arrow
Loading cached split indices for dataset at /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920dd9c326f3c828e67a22c51a98c/cache-f3c2c50366d14000.arrow and /root/.cache/huggingface/datasets/yelp_polarity/plain_text/1.0.0/2b33212d89209ed1ea0522001bccc5f5a5c920

Train size: 10000, Validation size: 1000, Test size: 5000
{'label': 1, 'text': 'This place is awesome the food is good..'}
{'label': 1, 'text': "Decent size, decent selection, decent staff.\\n\\nI guess that can wholly sum this place up, it's decent.  As with many other stores that are like this, the product rotates depending on what doesn't sale well at other stores.  Can always snag a deal here.  I was able to pick up a pretty sweet Puma jacket for $10, can't beat that, right?\\n\\nThat being said, there are those times that you may not find anything as well.  So really don't get your hopes up if you are looking for a specific item."}
{'label': 1, 'text': "Nightclub rating only...\\n\\nWe got lucky because we happened to arrive during Kris Humphries' (new husband of Kim Kardashian) bachelor party.\\n\\nI also saw Jordan Farmar, Lamar Odom and Scott.\\n\\nPlace was packed on a Saturday night and we didn't want to wait in line so we did bottle service. Cost us $575 total for 5 guys and

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.141533,0.129239,0.96,0.960396,0.960396,0.960396
2,0.079735,0.120618,0.972,0.972056,0.979879,0.964356
3,0.049576,0.194458,0.966,0.966797,0.953757,0.980198
4,0.015181,0.22305,0.969,0.969277,0.970238,0.968317
5,0.028983,0.230698,0.968,0.968127,0.973948,0.962376
6,0.000154,0.238671,0.971,0.971029,0.979839,0.962376
