In [77]:
# Import all required libraries and setup OpenAI api keys

import os
import openai
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from openai import OpenAI

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

openai.api_key = os.environ['OPENAI_API_KEY']


In [49]:
# Import smaller set from data folder

reviews_set_small = pd.read_csv('data/Amazon_Product_Reviews_Edited.csv')

In [None]:
# Review all columns in the above dataset
reviews_set_small.columns

In [51]:
# Add another column that combines review titles and review text, we will send this for embedding.
reviews_set_small["combined"] = reviews_set_small['reviews.title'] + reviews_set_small['reviews.text']

In [None]:
# Check combined column
reviews_set_small

In [None]:
# Review rating distribution across the set

reviews_set_small.loc[:,['reviews_rating']]

In [None]:
# Here we analyze rating distribution and will use it later to compare with sentiments labelling

df = reviews_set_small.groupby('reviews_rating').reviews_rating.count().reset_index(name='count')
df


In [None]:
# Create a bar graph

fig, ax = plt.subplots()

ax.bar(df.loc[:,'reviews_rating'], df.loc[:,'count'], width=0.50)

ax.set_ylabel('Count of each rating')
ax.set_title('Ratings Distribution')

In [78]:

# Creat an OpenAI client
client = OpenAI()


In [79]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    completion = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return completion.choices[0].message.content

In [None]:
# Write a prompt and pass review text as "Review text"
prompt = f"""
What is the sentiment of the following product review, 
which is delimited with triple backticks?

Review text: '''{product_review}'''
"""
response = get_completion(prompt)
print(response)

In [81]:
# Evaluate a single negative review
product_review_n = """
Freetime makes me angry. So, so angry. 3\
As the Kids Edition is nothing more than \ 
a basic 50 7 Kindle Fire with some extras,\
this review is primarily concerned with the \
50-worth of extras bundled with the Kids \
Edition. Primarily, the year of included Freetime \
....................
"""

In [47]:
# Evaluate a single positive review
product_review_p = """
Great upgrade for aesthetics \
I'm a huge fan of the Echo family.\
I have two Echo's, three Dots and one Tap.\
No they aren't essential to my life \
but I love playing music, listening to\
the news and controlling the Philips Hue\
............................
"""

In [None]:
# Loop through all reviews and collect the result in an array.
# This may take sometime and results can be saved in a local csv file as well

sentiments_array = []
for review in reviews_set_small['combined']:
    review_text = review
    prompt = f"""
    What is the sentiment of the following product review? \
    Give your answer in a single word as 'positive', 'negative', 'neutral'. \
    Review text: '''{review_text}'''
    """
    response = get_completion(prompt)
    sentiments_array.append(response)
sentiments_array

In [56]:
# Collect all ratings into a CSV file for later use
df_sentiment.to_csv('data/sentiment_analysis_19012024.csv')