In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict, load_dataset, load_metric
from transformers import AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification, TrainingArguments, Trainer
from huggingface_hub import notebook_login

import pkg_resources
packages = ['pandas','numpy','scikit-learn','datasets','transformers','huggingface_hub','transformers','torch']
for package in packages:
    print(pkg_resources.require(package))

To refamiliarize with DatasetDicts and Dataset objects for transformers, let's take a look:

In [None]:
imdb = load_dataset('imdb')
example = imdb["train"].shuffle(seed=42).select([i for i in list(range(3000))])
example

Clear out the memory for our use case:

In [None]:
del imdb
del example

I wanted to forgo using load_dataset because many real world problems will not have training data all prepackaged.  
This is actually my biggest gripe with computer vision tutorials, is that everything is already pre-cleaned for you.  
I will create my own instead, which will also allow me to change how I split the dataset into train and test.

In [None]:
imdb = pd.read_csv('IMDB Dataset.csv')
x = imdb['review']
y = imdb['sentiment']
df_length = len(x)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=42)
train_df = pd.DataFrame({'text':x_train,'label':y_train})
test_df = pd.DataFrame({'text':x_test, 'label':y_test})
unsupervised_df = imdb[['review','sentiment']].rename(columns={'review': 'text', 'sentiment':'label'})

In [None]:
#matching the format of a typical huggingface DatasetDict
train_dataset = Dataset.from_pandas(train_df).remove_columns('__index_level_0__')
test_dataset = Dataset.from_pandas(test_df).remove_columns('__index_level_0__')
unsupervised_dataset = Dataset.from_pandas(unsupervised_df)

dataset_dict = DatasetDict({'train': train_dataset, 'test':test_dataset, 'unsupervised':unsupervised_dataset})
dataset_dict

Now that we have an object that the transformers package expects, we can continue.  
Also, we now have code that we can repurpose for custom datasets.

In [None]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [None]:
def preprocess_function(examples):
   return tokenizer(examples["text"], truncation=True)
 
tokenized_train = dataset_dict['train'].map(preprocess_function, batched=True)
tokenized_test = dataset_dict['test'].map(preprocess_function, batched=True)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
model.to('cuda')

Taking heed to the warning, this is expected given we are changing the output layer to 2 possible labels.

Now let's define our metrics (I am adding loss as I don't understand why the example does not include this):

In [None]:
def compute_metrics(eval_pred):
   load_accuracy = load_metric("accuracy")
   load_f1 = load_metric("f1")

   logits, labels = eval_pred
   predictions = np.argmax(logits, axis=-1)
   accuracy = load_accuracy.compute(predictions=predictions, references=labels)["accuracy"]
   f1 = load_f1.compute(predictions=predictions, references=labels)["f1"]
   return {"accuracy": accuracy,"f1": f1}

In [None]:
from huggingface_hub import notebook_login
notebook_login()

Let's train for 10 epochs

In [None]:
repo_name = "finetuning-sentiment-model-imdb-full"
 
training_args = TrainingArguments(
   output_dir=repo_name,
   learning_rate=2e-5,
   per_device_train_batch_size=16,
   per_device_eval_batch_size=16,
   num_train_epochs=5,
   weight_decay=0.01,
   save_strategy="epoch",
   push_to_hub=True,
   fp16=True
)
 
trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=tokenized_train,
   eval_dataset=tokenized_test,
   tokenizer=tokenizer,
   data_collator=data_collator,
   compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
trainer.evaluate()

In [None]:
from transformers import pipeline
 
sentiment_model = pipeline(model="slyle/finetuning-sentiment-model-imdb-full")
sentiment_model(["I wish this movie had more action.  If there was more action, it'd be good.  But as it stands, this movie left a lot to be desired.  I wouldn't watch it again.", "This movie was okay, and I'd watch it again.  I think there was enough in it to make it interesting and fun."])