In [1]:
# data visualisation and manipulation
import pandas as pd #import the Pandas library to read the CSV and save it in a data frame
import numpy  as np

# vectorizers for creating the document-term-matrix (DTM)
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.preprocessing            import normalize

In [2]:
# loading dataset
clustered_reviews = pd.read_csv("reviews_taxi_data-clustered.csv", encoding='utf-8')
clustered_reviews.head(5)

Unnamed: 0,order_id,feedback_id,rider_id,driver_id,created_at,stars,custom_comment,clean_custom_comment,cluster
0,0d47c51b-a1df-4901-97eb-58ee193d9655,b89cf504-f2fb-4ad4-a113-1112f6da5265,9d9e0f25-a383-444e-afe7-01ed085d216f,d6c92134-8f65-4da4-b6ab-a2a5c4bbaf9f,2023-03-01 01:49:32,1,very not polite,very not polite,4
1,ae23c8d2-496a-47a8-8fb4-bd48f3e6f342,93748ca6-e77a-46e9-8460-f9c65b3781f6,576e7518-c1af-4697-9182-4251a488cbec,a635ef51-6da4-446a-9161-45d0a48ec2d2,2023-03-01 03:40:14,3,not a pleasant interaction overall. The driver...,not pleasant interaction overall The driver se...,4
2,def53af0-8c66-4759-ac16-7143b9e116d5,7edf5cf5-aa9f-4be6-8ca7-cd03c1aa66c0,e24c35a0-1cbf-43a1-9cae-171a89d1e7c1,57c5de49-2a44-46f8-800c-71a39490ea92,2023-03-01 06:46:58,1,Driver is a theif. Our travel cost is 154 and ...,Driver theif Our travel cost 154 and gave 200....,2
3,365c9450-344e-4608-8abb-a3e8b6ac7019,32e24028-cb9e-4499-bb48-142a0920b61b,b106a159-2d28-4019-bc7f-1feb31d0536e,254a79db-0421-4c3a-afae-3d93fd5fd5ac,2023-03-01 07:44:43,1,he was rude and did not drop me to my destination,rude and did not drop destination,4
4,52a81772-00cb-4ef0-8935-53ce4cd4c5e2,4372a23a-e506-4918-8006-71238e7eefa4,76311bc2-abbf-4c92-b07c-4716ad41c1ee,cee9eab5-9dc8-4565-b795-b4467059db7d,2023-03-01 07:55:51,1,Drove recklessly. Said he was a race driver. W...,Drove recklessly Said race driver Was very rud...,2


#### **Run sentiment analysis predictions by using Pipeline**

In [3]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [4]:
# sentence splitting + remove sentences which consist of 1-3 words

def splitting_spacy(text):
    #text = text.lower()
    doc = nlp(text)
    reviews = []
    split_spacy = [sent.text for sent in doc.sents] 
    for sentence in split_spacy:
        reviews.append(sentence)
    return reviews


In [5]:
# Add column with split text

clustered_reviews['splitted_custom_comment'] = clustered_reviews['custom_comment'].apply(splitting_spacy)

In [6]:
clustered_reviews.head(5)

Unnamed: 0,order_id,feedback_id,rider_id,driver_id,created_at,stars,custom_comment,clean_custom_comment,cluster,splitted_custom_comment
0,0d47c51b-a1df-4901-97eb-58ee193d9655,b89cf504-f2fb-4ad4-a113-1112f6da5265,9d9e0f25-a383-444e-afe7-01ed085d216f,d6c92134-8f65-4da4-b6ab-a2a5c4bbaf9f,2023-03-01 01:49:32,1,very not polite,very not polite,4,[very not polite]
1,ae23c8d2-496a-47a8-8fb4-bd48f3e6f342,93748ca6-e77a-46e9-8460-f9c65b3781f6,576e7518-c1af-4697-9182-4251a488cbec,a635ef51-6da4-446a-9161-45d0a48ec2d2,2023-03-01 03:40:14,3,not a pleasant interaction overall. The driver...,not pleasant interaction overall The driver se...,4,"[not a pleasant interaction overall., The driv..."
2,def53af0-8c66-4759-ac16-7143b9e116d5,7edf5cf5-aa9f-4be6-8ca7-cd03c1aa66c0,e24c35a0-1cbf-43a1-9cae-171a89d1e7c1,57c5de49-2a44-46f8-800c-71a39490ea92,2023-03-01 06:46:58,1,Driver is a theif. Our travel cost is 154 and ...,Driver theif Our travel cost 154 and gave 200....,2,"[Driver is a theif., Our travel cost is 154 an..."
3,365c9450-344e-4608-8abb-a3e8b6ac7019,32e24028-cb9e-4499-bb48-142a0920b61b,b106a159-2d28-4019-bc7f-1feb31d0536e,254a79db-0421-4c3a-afae-3d93fd5fd5ac,2023-03-01 07:44:43,1,he was rude and did not drop me to my destination,rude and did not drop destination,4,[he was rude and did not drop me to my destina...
4,52a81772-00cb-4ef0-8935-53ce4cd4c5e2,4372a23a-e506-4918-8006-71238e7eefa4,76311bc2-abbf-4c92-b07c-4716ad41c1ee,cee9eab5-9dc8-4565-b795-b4467059db7d,2023-03-01 07:55:51,1,Drove recklessly. Said he was a race driver. W...,Drove recklessly Said race driver Was very rud...,2,"[Drove recklessly., Said he was a race driver...."


In [5]:
# !pip install -q transformers

In [7]:
# Using pipeline class to make predictions from models available in the Hub in an easy way 
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")
custom_comment_column = clustered_reviews['splitted_custom_comment']

# sentiment_predictions = []
sentiment_predictions = custom_comment_column.map(sentiment_pipeline)

clustered_reviews['sentiment'] = sentiment_predictions 


No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


In [9]:
clustered_reviews.head(5)

Unnamed: 0,order_id,feedback_id,rider_id,driver_id,created_at,stars,custom_comment,clean_custom_comment,cluster,splitted_custom_comment,sentiment
0,0d47c51b-a1df-4901-97eb-58ee193d9655,b89cf504-f2fb-4ad4-a113-1112f6da5265,9d9e0f25-a383-444e-afe7-01ed085d216f,d6c92134-8f65-4da4-b6ab-a2a5c4bbaf9f,2023-03-01 01:49:32,1,very not polite,very not polite,4,[very not polite],"[{'label': 'NEGATIVE', 'score': 0.999755680561..."
1,ae23c8d2-496a-47a8-8fb4-bd48f3e6f342,93748ca6-e77a-46e9-8460-f9c65b3781f6,576e7518-c1af-4697-9182-4251a488cbec,a635ef51-6da4-446a-9161-45d0a48ec2d2,2023-03-01 03:40:14,3,not a pleasant interaction overall. The driver...,not pleasant interaction overall The driver se...,4,"[not a pleasant interaction overall., The driv...","[{'label': 'NEGATIVE', 'score': 0.999800860881..."
2,def53af0-8c66-4759-ac16-7143b9e116d5,7edf5cf5-aa9f-4be6-8ca7-cd03c1aa66c0,e24c35a0-1cbf-43a1-9cae-171a89d1e7c1,57c5de49-2a44-46f8-800c-71a39490ea92,2023-03-01 06:46:58,1,Driver is a theif. Our travel cost is 154 and ...,Driver theif Our travel cost 154 and gave 200....,2,"[Driver is a theif., Our travel cost is 154 an...","[{'label': 'POSITIVE', 'score': 0.780716121196..."
3,365c9450-344e-4608-8abb-a3e8b6ac7019,32e24028-cb9e-4499-bb48-142a0920b61b,b106a159-2d28-4019-bc7f-1feb31d0536e,254a79db-0421-4c3a-afae-3d93fd5fd5ac,2023-03-01 07:44:43,1,he was rude and did not drop me to my destination,rude and did not drop destination,4,[he was rude and did not drop me to my destina...,"[{'label': 'NEGATIVE', 'score': 0.964007675647..."
4,52a81772-00cb-4ef0-8935-53ce4cd4c5e2,4372a23a-e506-4918-8006-71238e7eefa4,76311bc2-abbf-4c92-b07c-4716ad41c1ee,cee9eab5-9dc8-4565-b795-b4467059db7d,2023-03-01 07:55:51,1,Drove recklessly. Said he was a race driver. W...,Drove recklessly Said race driver Was very rud...,2,"[Drove recklessly., Said he was a race driver....","[{'label': 'NEGATIVE', 'score': 0.978695690631..."
5,f22e9d1c-e0fd-47b1-83f8-435a915469b0,5f083470-cb3e-4da3-8a8d-81ca467a725d,2e202b30-9611-44f1-82f9-d804f07e4a07,808f6bdf-df81-4849-bd94-027ce22bec44,2023-03-01 07:56:38,2,rude and angry,rude and angry,3,[rude and angry],"[{'label': 'NEGATIVE', 'score': 0.999692678451..."
6,1a99c30a-7302-4603-bd6b-d6625de64907,3ece09b4-3382-42db-98c0-bdd85dbba3b8,66572fc6-4a96-4f2b-87c1-274283016e34,ee7739a3-e388-40c2-b4d4-caab08c59f56,2023-03-01 08:04:31,2,i ask the driver to drop me 50 metres from the...,ask driver drop metre destination point said w...,2,[i ask the driver to drop me 50 metres from th...,"[{'label': 'NEGATIVE', 'score': 0.999069392681..."
7,abd015cd-1a64-4bf2-937e-22c87c27788b,514c6855-b1ec-4443-930f-9e95d9143442,4a047d6c-5b3d-4637-a247-51f4bac929ec,d754a31c-964b-48ac-937c-7b6b3f6260b3,2023-03-01 11:30:45,2,Driver didn’t bothered to arrive to the comfor...,Driver didn bothered arrive comfortable pickup...,9,[Driver didn’t bothered to arrive to the comfo...,"[{'label': 'POSITIVE', 'score': 0.931552112102..."
8,18491c39-be20-4870-9ecd-fa1067b4177f,d11c40de-03f7-4e11-8e0d-ded9873e16b9,a2d80ba0-e3fc-4ca0-9758-b65ffef020bc,d5424622-4cca-4887-b12a-5479e648965a,2023-03-01 11:39:45,3,Was very slow,Was very slow,2,[Was very slow],"[{'label': 'NEGATIVE', 'score': 0.999605119228..."
9,71b8b974-ff3f-4033-9bc5-6fa48943f386,7f3d78c7-81f8-4fad-908d-5dc7e180f6c9,3a7699a5-f3e1-4a80-b9b7-15919ad83a16,49a1337f-d084-4e01-94f0-18904a5ef5d8,2023-03-01 12:05:51,2,Driver cheated me - Taxi price was 172 and dr...,Driver cheated Taxi price 172 and driver charg...,2,[Driver cheated me - Taxi price was 172 and d...,"[{'label': 'NEGATIVE', 'score': 0.998717904090..."


**Use a specific sentiment analysis model available on the Hub by specifying its name**

In [10]:
specific_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")

In [11]:
custom_comment_column2 = clustered_reviews['splitted_custom_comment']

sentiment_predictions2 = []
sentiment_predictions2 = custom_comment_column2.map(specific_model)

clustered_reviews['sentiment2'] = sentiment_predictions2 

In [12]:
clustered_reviews.head(25)

Unnamed: 0,order_id,feedback_id,rider_id,driver_id,created_at,stars,custom_comment,clean_custom_comment,cluster,splitted_custom_comment,sentiment,sentiment2
0,0d47c51b-a1df-4901-97eb-58ee193d9655,b89cf504-f2fb-4ad4-a113-1112f6da5265,9d9e0f25-a383-444e-afe7-01ed085d216f,d6c92134-8f65-4da4-b6ab-a2a5c4bbaf9f,2023-03-01 01:49:32,1,very not polite,very not polite,4,[very not polite],"[{'label': 'NEGATIVE', 'score': 0.999755680561...","[{'label': 'NEGATIVE', 'score': 0.999442279338..."
1,ae23c8d2-496a-47a8-8fb4-bd48f3e6f342,93748ca6-e77a-46e9-8460-f9c65b3781f6,576e7518-c1af-4697-9182-4251a488cbec,a635ef51-6da4-446a-9161-45d0a48ec2d2,2023-03-01 03:40:14,3,not a pleasant interaction overall. The driver...,not pleasant interaction overall The driver se...,4,"[not a pleasant interaction overall., The driv...","[{'label': 'NEGATIVE', 'score': 0.999800860881...","[{'label': 'NEGATIVE', 'score': 0.999502658843..."
2,def53af0-8c66-4759-ac16-7143b9e116d5,7edf5cf5-aa9f-4be6-8ca7-cd03c1aa66c0,e24c35a0-1cbf-43a1-9cae-171a89d1e7c1,57c5de49-2a44-46f8-800c-71a39490ea92,2023-03-01 06:46:58,1,Driver is a theif. Our travel cost is 154 and ...,Driver theif Our travel cost 154 and gave 200....,2,"[Driver is a theif., Our travel cost is 154 an...","[{'label': 'POSITIVE', 'score': 0.780716121196...","[{'label': 'NEGATIVE', 'score': 0.999367058277..."
3,365c9450-344e-4608-8abb-a3e8b6ac7019,32e24028-cb9e-4499-bb48-142a0920b61b,b106a159-2d28-4019-bc7f-1feb31d0536e,254a79db-0421-4c3a-afae-3d93fd5fd5ac,2023-03-01 07:44:43,1,he was rude and did not drop me to my destination,rude and did not drop destination,4,[he was rude and did not drop me to my destina...,"[{'label': 'NEGATIVE', 'score': 0.964007675647...","[{'label': 'NEGATIVE', 'score': 0.999499559402..."
4,52a81772-00cb-4ef0-8935-53ce4cd4c5e2,4372a23a-e506-4918-8006-71238e7eefa4,76311bc2-abbf-4c92-b07c-4716ad41c1ee,cee9eab5-9dc8-4565-b795-b4467059db7d,2023-03-01 07:55:51,1,Drove recklessly. Said he was a race driver. W...,Drove recklessly Said race driver Was very rud...,2,"[Drove recklessly., Said he was a race driver....","[{'label': 'NEGATIVE', 'score': 0.978695690631...","[{'label': 'NEGATIVE', 'score': 0.999479591846..."
5,f22e9d1c-e0fd-47b1-83f8-435a915469b0,5f083470-cb3e-4da3-8a8d-81ca467a725d,2e202b30-9611-44f1-82f9-d804f07e4a07,808f6bdf-df81-4849-bd94-027ce22bec44,2023-03-01 07:56:38,2,rude and angry,rude and angry,3,[rude and angry],"[{'label': 'NEGATIVE', 'score': 0.999692678451...","[{'label': 'NEGATIVE', 'score': 0.998189389705..."
6,1a99c30a-7302-4603-bd6b-d6625de64907,3ece09b4-3382-42db-98c0-bdd85dbba3b8,66572fc6-4a96-4f2b-87c1-274283016e34,ee7739a3-e388-40c2-b4d4-caab08c59f56,2023-03-01 08:04:31,2,i ask the driver to drop me 50 metres from the...,ask driver drop metre destination point said w...,2,[i ask the driver to drop me 50 metres from th...,"[{'label': 'NEGATIVE', 'score': 0.999069392681...","[{'label': 'NEGATIVE', 'score': 0.999502420425..."
7,abd015cd-1a64-4bf2-937e-22c87c27788b,514c6855-b1ec-4443-930f-9e95d9143442,4a047d6c-5b3d-4637-a247-51f4bac929ec,d754a31c-964b-48ac-937c-7b6b3f6260b3,2023-03-01 11:30:45,2,Driver didn’t bothered to arrive to the comfor...,Driver didn bothered arrive comfortable pickup...,9,[Driver didn’t bothered to arrive to the comfo...,"[{'label': 'POSITIVE', 'score': 0.931552112102...","[{'label': 'NEGATIVE', 'score': 0.999497056007..."
8,18491c39-be20-4870-9ecd-fa1067b4177f,d11c40de-03f7-4e11-8e0d-ded9873e16b9,a2d80ba0-e3fc-4ca0-9758-b65ffef020bc,d5424622-4cca-4887-b12a-5479e648965a,2023-03-01 11:39:45,3,Was very slow,Was very slow,2,[Was very slow],"[{'label': 'NEGATIVE', 'score': 0.999605119228...","[{'label': 'NEGATIVE', 'score': 0.999446094036..."
9,71b8b974-ff3f-4033-9bc5-6fa48943f386,7f3d78c7-81f8-4fad-908d-5dc7e180f6c9,3a7699a5-f3e1-4a80-b9b7-15919ad83a16,49a1337f-d084-4e01-94f0-18904a5ef5d8,2023-03-01 12:05:51,2,Driver cheated me - Taxi price was 172 and dr...,Driver cheated Taxi price 172 and driver charg...,2,[Driver cheated me - Taxi price was 172 and d...,"[{'label': 'NEGATIVE', 'score': 0.998717904090...","[{'label': 'NEGATIVE', 'score': 0.999464452266..."


In [13]:
# creating csv file with clusters and sentiment
clustered_reviews.to_csv('reviews_taxi_data-sentiment.csv', index=False, encoding='utf-8') 