In [1]:
pip install transformers

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
import torch
from transformers import TrainingArguments, Trainer
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import EarlyStoppingCallback

In [3]:
# from google.colab import files
# uploaded = files.upload()

In [4]:
data = pd.read_csv("c_500_modified.csv")

In [5]:
# data.drop(data.index[20:], inplace=True)

In [6]:
data

Unnamed: 0,Label,Comment
0,0,I had an accident with an Uber driver in Mexic...
1,1,I have had my account completely hacked to whe...
2,1,I requested an 8 mile ride in Boston on a Satu...
3,1,Uber is overcharging for Toll fees. When In Fl...
4,1,I had an airport flight today. Uber would not ...
...,...,...
494,0,"Our drivers, two different ones, were very pol..."
495,0,Baltimore drivers are not as courteous as Dall...
496,1,"As a disabled person, when in SF, Uber and Lyf..."
497,0,I had no problem with my Uber experience excep...


In [7]:
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [8]:
data.rename(columns = {'Label':'sentiment'}, inplace = True)
data.rename(columns = {'Comment':'review'}, inplace = True)

In [9]:
data

Unnamed: 0,sentiment,review
0,0,I had an accident with an Uber driver in Mexic...
1,1,I have had my account completely hacked to whe...
2,1,I requested an 8 mile ride in Boston on a Satu...
3,1,Uber is overcharging for Toll fees. When In Fl...
4,1,I had an airport flight today. Uber would not ...
...,...,...
494,0,"Our drivers, two different ones, were very pol..."
495,0,Baltimore drivers are not as courteous as Dall...
496,1,"As a disabled person, when in SF, Uber and Lyf..."
497,0,I had no problem with my Uber experience excep...


In [10]:
X = list(data["review"])
y = list(data["sentiment"])
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=512)
X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=512)

In [11]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

In [12]:
train_dataset = Dataset(X_train_tokenized, y_train)
val_dataset = Dataset(X_val_tokenized, y_val)

In [13]:
def compute_metrics(p):
    print('Inside compute metrics')
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    recall = recall_score(y_true=labels, y_pred=pred)
    precision = precision_score(y_true=labels, y_pred=pred)
    f1 = f1_score(y_true=labels, y_pred=pred)

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

In [14]:
args = TrainingArguments(
    output_dir="output",
    evaluation_strategy="steps",
    eval_steps=500,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    seed=0,
    load_best_model_at_end=True,
)

In [15]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

In [16]:
trainer.train()



Step,Training Loss,Validation Loss


TrainOutput(global_step=150, training_loss=0.6187407430013021, metrics={'train_runtime': 28.4962, 'train_samples_per_second': 42.006, 'train_steps_per_second': 5.264, 'total_flos': 314943933265920.0, 'train_loss': 0.6187407430013021, 'epoch': 3.0})

In [18]:
test_data = pd.read_csv("c_500_modified.csv")

In [19]:
# test_data.drop(test_data.index[0:20], inplace=True)
test_data.drop(test_data.index[40:], inplace=True)

In [22]:
test_data.rename(columns = {'Label':'sentiment'}, inplace = True)
test_data.rename(columns = {'Comment':'review'}, inplace = True)
test_data

Unnamed: 0,sentiment,review
0,0,I had an accident with an Uber driver in Mexic...
1,1,I have had my account completely hacked to whe...
2,1,I requested an 8 mile ride in Boston on a Satu...
3,1,Uber is overcharging for Toll fees. When In Fl...
4,1,I had an airport flight today. Uber would not ...
5,1,In July of this year I had sushi delivered to ...
6,1,"My driver, Rohan was nice, but when I tried to..."
7,1,I had seven fraudulent Uber transactions over ...
8,0,Our driver never showed up and Uber cancelled ...
9,1,"When the service worked, it was good, and tech..."


In [23]:
test_data = test_data
X_test = list(test_data["review"])
X_test_tokenized = tokenizer(X_test, padding=True, truncation=True, max_length=512)

In [39]:
print(test_data['review'][0])

I had an accident with an Uber driver in Mexico City. The car that I got into had no side mirror. The Brakes were not working properly either. I almost got into an accident twice. The driver's conversation was unpleasant. Being a foreigner he was very curious to ask where I am from and what brought me to Mexico. I replied to be a tourist and through that conversation is over. He became very rude and asked me if I came to look for a Mexican husband. I never answered and kept quiet. He took the wrong route and made several in requested stops. Having in mind it was uberX. He continued asked me whom I sleep with. He literally stopped the car and asked me to wait for him text someone. I asked him to let me go and take a different driver but he locked the doors and didn't allow me.


In [24]:
test_dataset = Dataset(X_test_tokenized)

In [25]:
raw_pred, _, _ = trainer.predict(test_dataset)

In [26]:
y_pred = np.argmax(raw_pred, axis=1)

In [44]:
if(y_pred[2] == 0):
    print('Prediction - Fair review')
if(y_pred[2] == 1):
    print('Prediction - Unfair review')
print('Review - ', test_data['review'][2])


Prediction - Unfair review
Review -  I requested an 8 mile ride in Boston on a Saturday night around 1:00 am that was coming up around $55 on the app and Lyft was around the same rate. Ended up taking a regular cab that cost me $20. Beware of high rates.


In [28]:
test_labels = test_data['sentiment'].values

In [29]:
print(test_labels)

[0 1 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 0 1 1 0 1 1 0 1 1 0 1 1 1 1 1 0 1 1 1 1
 1 1 0]


In [46]:
print('Accuracy - ', accuracy_score(y_true=test_labels, y_pred=y_pred))
print('Recall score -', recall_score(y_true=test_labels, y_pred=y_pred))
print('Precision score - ', precision_score(y_true=test_labels, y_pred=y_pred))
print('F1 score - ', f1_score(y_true=test_labels, y_pred=y_pred))

Accuracy -  0.875
Recall score - 0.8518518518518519
Precision score -  0.9583333333333334
F1 score -  0.9019607843137256


In [45]:
import numpy as np
mse = np.mean((test_labels - y_pred) ** 2)
print('The mean squared error for test dataset was found to be - ', mse)

The mean squared error for test dataset was found to be -  0.125
