# Travel agency's reviews - classification with BERT

Implement and evaluate a classifier of user reviews with BERT.

In [None]:
!pip install transformers

In [2]:
import pandas as pd

reviews = pd.read_csv('https://raw.githubusercontent.com/mlcollege/natural-language-processing/master/data/en_reviews.csv', sep='\t', header=None, names =['rating', 'text'])
reviews[35:45]

Unnamed: 0,rating,text
35,5,I bought the cheapest tickets through this ser...
36,5,Such a pleasure to know that you will be prope...
37,5,I always use this website to look for flights ...
38,2,A startup that finds discount flight tickets '...
39,5,"Excellent customer service, fast and kind. Wan..."
40,4,very good service from Quan Costa to help me w...
41,3,.@Skypickercom Finds Cheap Flights 'Hidden' On...
42,5,I have a problem with my tickets skypicker don...
43,4,Even though it took a bit time untill an agent...
44,5,Today I had a great experience with one of Kiw...


## Preparation of train and test data sets
Separate and rename target values.

In [3]:
target = reviews['rating']
data = reviews['text']

print(data[:5])
print(target[:5])

0    A voucher to nowhere #skypickerfail 2400 out o...
1    I booked with Kiwi for the first time, just a ...
2    I would like to say THANKS YOU for your custom...
3    I just noticed 2 hours before my flight that I...
4    This is the first time I have dealt with Skypi...
Name: text, dtype: object
0    2
1    5
2    5
3    5
4    2
Name: rating, dtype: int64


Import the BERT model and tokenizer

In [4]:
from transformers import BertTokenizer, TFBertForSequenceClassification

In [None]:
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased',num_labels=5)

Split the data to train and test parts.

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.1)
print('Train size: {}'.format(len(X_train)))
print('Test size: {}'.format(len(X_test)))

Train size: 7013
Test size: 780


Tokenize the documents and create attention masks.

In [None]:
import numpy as np

train_ids=[]
train_masks=[]
test_ids=[]
test_masks=[]


for doc in X_train:
    bert_inp = bert_tokenizer.encode_plus(doc, add_special_tokens = True, pad_to_max_length = True, max_length = 64, return_attention_mask = True)
    train_ids.append(np.array(bert_inp['input_ids']))
    train_masks.append(np.array(bert_inp['attention_mask']))

for doc in X_test:
    bert_inp = bert_tokenizer.encode_plus(doc, add_special_tokens = True, pad_to_max_length = True, max_length = 64, return_attention_mask = True)
    test_ids.append(np.array(bert_inp['input_ids']))
    test_masks.append(np.array(bert_inp['attention_mask']))

train_ids = np.asarray(train_ids)
train_masks = np.asarray(train_masks)
test_ids = np.asarray(test_ids)
test_masks = np.asarray(test_masks)

print (train_ids.shape)
print (test_ids.shape)

One-hot encode the target values.

In [8]:
from tensorflow.python.keras.utils import np_utils

n_classes = 5
y_train = np_utils.to_categorical(y_train-1, n_classes)
y_test = np_utils.to_categorical(y_test-1, n_classes)

Compile the model.

In [9]:
import tensorflow as tf

loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08)
bert_model.compile(loss=loss,optimizer=optimizer,metrics=["accuracy"])

In [10]:
bert_model.fit([train_ids, train_masks], y_train, batch_size=32, epochs=3, validation_data=([test_ids, test_masks], y_test))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fbf8abc4eb8>

## Evaluate the model

In [11]:
from sklearn import metrics
from sklearn.metrics import accuracy_score

y_pred = bert_model.predict([test_ids, test_masks])
y_test_class = np.argmax(y_test, axis=1)
y_pred_class = np.argmax(y_pred[0], axis=1)

print ("Test accuracy: {:.4f}".format(accuracy_score(y_test_class, y_pred_class)))
print ()
print(metrics.classification_report(y_test_class, y_pred_class, digits=4))

Test accuracy: 0.8141

              precision    recall  f1-score   support

           0     0.8036    0.8036    0.8036        56
           1     0.7778    0.5385    0.6364        26
           2     0.6757    0.7353    0.7042        34
           3     0.4118    0.1308    0.1986       107
           4     0.8457    0.9641    0.9010       557

    accuracy                         0.8141       780
   macro avg     0.7029    0.6345    0.6487       780
weighted avg     0.7735    0.8141    0.7803       780

