In [1]:
# Get necessary import statements
import json
import os
import pandas as pd
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from langchain.prompts import PromptTemplate

In [2]:
# Load API key from environment variable
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ada_model_name = "text-ada-001"
davinci_model_name = "text-davinci-003"
gpt_35_turbo = "gpt-3.5-turbo"

In [None]:
#@ title: Explore pythonic way to clean up json
def parse_dict(**kwargs) -> dict:
    if not kwargs:
        raise ValueError("No kwargs")
    

def sanitize_extracted_sentiment(x: str) -> dict:
    try:
        return json.loads(x)
    except Exception:
        if not x.startswith("{"):
            if "{" not in x:
                raise ValueError(x)
            *_, x = x.partition("{")
        if not x.endswith("}"):
            if "}" not in x:
                raise ValueError(x)
            x, *_ = x.rpartition("}")
        try:
            return json.loads(x)
        except Exception:
            raise RuntimeError(x)

In [None]:
#@title: Desired Sample output
{
    "aspects": {
        "Value": {
            "sentiment": "positive",
            "justification": "Great value for the money",
        },
        "Product Quality": {
            "sentiment": "neutral",
            "justification": "No information provided",
        },
        "OLED Screen": {
            "sentiment": "positive",
            "justification": "Impression that the product includes an OLED screen",
        },
        "Waiting Time": {
            "sentiment": "neutral",
            "justification": "No sentiment expressed regarding waiting time",
        },
    }
}

In [3]:
# read reviews_csv.csv into pandas dataframe called reviews
reviews_df = pd.read_csv('reviews_csv.csv')
print(reviews_df.head())

         asin               date              id               name  rating   
0  B09R95CGSW     March 23, 2023   RU6F758GNRXII            Harold        5  \
1  B09R95CGSW      April 8, 2023  R35VYWRYIAMWKD  Brandon Goldsmith       5   
2  B09R95CGSW     March 28, 2023  R2GN4ETLMJULLX            Patrick       5   
3  B09R95CGSW  February 24, 2023  R1S7A5IHRSWEY8         G Springer       4   
4  B09R95CGSW     March 26, 2023  R14S60Q1Q5V2P1               Cole       4   

                                              review   
0  This XR65A80K I purchased was to replace an XR...  \
1  My LED TV developed some sort of light leakage...   
2  with the recent price drop (probably due to ne...   
3  First I am a Sony tv owner for 7 years now, an...   
4  Well with all the crazy reviews this company h...   

                                  title  verified_purchase   
0  Impressive TV, especially for movies               True  \
1                          Bravo, Sony!               True   
2 

In [10]:
#@ Title: I cleaned 3 extracted reviews format that didn't follow the same format as the other reviews
# Saved the results to the reviews_raw_sentiment file
# import json
# # read raw_sentiment_dict.json
# with open('raw_sentiment_dict.json') as f:
#     raw_sentiment_dict = json.load(f)
#     print(raw_sentiment_dict)

# reviews_df['extracted_raw_sentiment'] = list(raw_sentiment_dict.values())
# reviews_df.to_csv('reviews_raw_sentiment.csv',index = False)

In [7]:
template = """Analyze the provided review delimited by <> and extract product-related aspects, sentiments (positive/negative/neutral), and justifications. Format the result as a JSON object with the following structure: a key named 'aspects' containing a nested object with keys for each aspect, and each aspect key having an object with 'sentiment' and 'justification' keys. Exclude non-product discussions. If no sentiment is expressed for an aspect, return 'neutral' as the sentiment and 'No information provided' as the justification. If information isn't present, use 'unknown' as the value. If information can't be retrieved, return an empty JSON object. Review: <{review}>"""

#@title: Function to get sentiment from OpenAI for one review
def get_llm_sentiment(model, prompt_template, review):
    prompt = prompt_template.format(review=review)
    result = model(prompt)
    return result

# desired model name
gpt_35_turbo = "gpt-3.5-turbo"

prompt_template = PromptTemplate(input_variables=["review"], template=template)

openAIturbo = OpenAI(model_name=gpt_35_turbo, openai_api_key=OPENAI_API_KEY)

reviews_ls = reviews_df.total_review.to_list()

#@title: Loop through each review and extract sentiment for each review
sentiment_dict = {}
for review in reviews_ls:
    try:
        sentiment_dict[review] = get_llm_sentiment(
            model=openAIturbo, prompt_template=prompt_template, review=review
        )
    except Exception:
        pass



In [13]:
#@title: dump the setiment_dict into json file
import json
from pathlib import Path # dealing with file paths

reviews_df = pd.read_csv('reviews_raw_sentiment.csv')
#@title: Create a dictionary and json dump for extracted sentiments from LLM
# Save extracted sentiment to the dataframe
# reviews_df['extracted_sentiment'] = [value for key, value in sentiment_dict.items()]
# create sentiment dictionary with review_id as key and extracted sentiment as value using zip
sentiment_dict_review_id = dict(zip(reviews_df.id, reviews_df.extracted_raw_sentiment))

# Incase the response.json doesn't exist, create it
# if not Path('./raw_sentiment_dict.json').exists():
#     Path('./raw_sentiment_dict.json').touch()

# # Write the json file to local 
# Path('./raw_sentiment_dict.json').write_text(json.dumps(sentiment_dict_review_id))
# #check with path the file was saved to
# print(Path('./raw_sentiment_dict.json').absolute())

In [14]:
# with open(Path('./raw_sentiment_dict.json').absolute(), 'r') as file:
#     sentiment_dict = json.load(file)
#@title: Extract each aspect and sentiment from the json file
def get_sentiments_todf(data: dict) -> pd.DataFrame:
    sentimentdatals = []
    for key, value in data.items():
        sentimentdatals.append([key, value['sentiment'],value['justification']])
    return pd.DataFrame(sentimentdatals, columns= ['aspect', 'sentiment', 'justification'])

sentiment_df_clean = []
for id, sentiment_extracted in sentiment_dict_review_id.items():
    try:
        mydict = json.loads(sentiment_extracted)
    except Exception as e:
        print(f"Exception: {e}")
        print(sentiment_extracted)
        break
    tempdf = get_sentiments_todf(mydict['aspects'])
    tempdf['id'] = id
    sentiment_df_clean.append(tempdf)

Exception: Expecting ',' delimiter: line 11 column 2 (char 202)
{"Aspects":
{
  "fit": {
    "sentiment": "positive",
    "justification": "Perfect fit"
  },
  "entertainment": {
    "sentiment": "positive",
    "justification": "Perfect fit for entertainment"
  }
}


In [16]:
sentiment_df_clean = pd.concat(sentiment_df_clean)
sentiment_df_clean.head()

Unnamed: 0,aspect,sentiment,justification,id
0,Infinite contrast,positive,OLED technology provides inky blacks and clear...,RU6F758GNRXII
1,Color accuracy,positive,Out of the box color accuracy is amazing with ...,RU6F758GNRXII
2,Processing,positive,Additional video noise reduction and motion pr...,RU6F758GNRXII
3,Upscaling,positive,Sony's processing is second to none making ups...,RU6F758GNRXII
4,Sound,positive,The sound from the TV itself was surprisingly ...,RU6F758GNRXII


In [20]:
# merge sentiment_df_clean and reviews_df on id, inner join
sentiment_df_clean = pd.merge(reviews_df, sentiment_df_clean, on='id', how='inner')
# Write the sentiment_df_clean to csv
sentiment_df_clean.to_csv('reviews_sentiment_clean.csv',index = False)

In [21]:
sentiment_df_clean.head()

Unnamed: 0,asin,date,id,name,rating,review,title,verified_purchase,total_review,extracted_raw_sentiment,aspect,sentiment,justification
0,B09R95CGSW,23-Mar-23,RU6F758GNRXII,Harold,5,This XR65A80K I purchased was to replace an XR...,"Impressive TV, especially for movies",True,"Title: Impressive TV, especially for movies Re...","{\n ""aspects"": {\n ""Infinite contrast"": {\...",Infinite contrast,positive,OLED technology provides inky blacks and clear...
1,B09R95CGSW,23-Mar-23,RU6F758GNRXII,Harold,5,This XR65A80K I purchased was to replace an XR...,"Impressive TV, especially for movies",True,"Title: Impressive TV, especially for movies Re...","{\n ""aspects"": {\n ""Infinite contrast"": {\...",Color accuracy,positive,Out of the box color accuracy is amazing with ...
2,B09R95CGSW,23-Mar-23,RU6F758GNRXII,Harold,5,This XR65A80K I purchased was to replace an XR...,"Impressive TV, especially for movies",True,"Title: Impressive TV, especially for movies Re...","{\n ""aspects"": {\n ""Infinite contrast"": {\...",Processing,positive,Additional video noise reduction and motion pr...
3,B09R95CGSW,23-Mar-23,RU6F758GNRXII,Harold,5,This XR65A80K I purchased was to replace an XR...,"Impressive TV, especially for movies",True,"Title: Impressive TV, especially for movies Re...","{\n ""aspects"": {\n ""Infinite contrast"": {\...",Upscaling,positive,Sony's processing is second to none making ups...
4,B09R95CGSW,23-Mar-23,RU6F758GNRXII,Harold,5,This XR65A80K I purchased was to replace an XR...,"Impressive TV, especially for movies",True,"Title: Impressive TV, especially for movies Re...","{\n ""aspects"": {\n ""Infinite contrast"": {\...",Sound,positive,The sound from the TV itself was surprisingly ...
