In [1]:
# Import necessary packages
import pandas as pd
import torch

from transformers import logging
logging.set_verbosity(logging.WARNING)

In [9]:
# Init data
car_reviews_df = pd.read_csv("data/car_reviews.csv", sep=';')

car_reviews_text = car_reviews_df['Review'].to_list()
class_mapping = {'POSITIVE':1, 'NEGATIVE':0}
car_reviews_class = [class_mapping[ref] for ref in car_reviews_df['Class'].to_list()]

In [10]:
# Sentiment Classification
from transformers import pipeline

def getSentiment(review):
    return pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")(review)

predicted_labels = getSentiment(car_reviews_text)
predictions = [class_mapping[output['label']] for output in predicted_labels]

# Evaluate results
import evaluate

f1 = evaluate.load("f1")
accuracy = evaluate.load("accuracy")

f1_result = f1.compute(references=car_reviews_class, predictions=predictions)['f1']
accuracy_result = accuracy.compute(references=car_reviews_class, predictions=predictions)['accuracy']
print(f1_result, accuracy_result)

Device set to use cpu


0.8571428571428571 0.8


In [11]:
# Translating first two sentences of first review
def translateEnToSpanish(sentences):
    return pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")(sentences)[0]['translation_text']

def findNOccurrence(text, keyword=".", time=2):
    i = 0
    textLen = len(text)
    while time != 0 and i < textLen:
        if text[i] == keyword:
            time -= 1
        i += 1
    return i if i < textLen else -1

sentences = car_reviews_text[0][:findNOccurrence(car_reviews_text[0])]
translated_review = translateEnToSpanish(sentences)

with open("data/reference_translations.txt","r") as f:
    ref_text_content = [str.strip() for str in f.readlines()]

bleu = evaluate.load("bleu")
bleu_score = bleu.compute(references=[ref_text_content], predictions=[translated_review])
print(bleu_score['bleu'])

Device set to use cpu


0.7794483794144497


In [14]:
# QA LLM
def getAnswer(question, context):
    return pipeline("question-answering", model="deepset/minilm-uncased-squad2")({'question': question, 'context': context})['answer']

question = "What did he like about the brand?"
context = car_reviews_text[1]

answer = getAnswer(question, context)
print(answer)

Some weights of the model checkpoint at deepset/minilm-uncased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fetching 0 files: 0it [00:00, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 0 files: 0it [00:00, ?it/s]

Device set to use cpu


ride quality, reliability


In [13]:
# Summarize last review
def summarizeText(text):
    return pipeline("summarization", model="facebook/bart-large-cnn", max_new_tokens=55)(text)[0]['summary_text']

text = car_reviews_text[-1]
summarized_text = summarizeText(text)
print(summarized_text)

Device set to use cpu


The Nissan Rogue provides me with the desired SUV experience without burdening me with an exorbitant payment. Handling and styling are great; I have hauled 12 bags of mulch in the back with the seats down and could have held more. The engine delivers strong performance
