# Example AI App - Analysis of web shop reviews

In [None]:
import os
import openai
from IPython.display import display, HTML
from pprint import pprint

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

total_cost = 0.0

---

## The task:

Write a code to automatically analyse Customer reviews published at on-line electronics web shop. Example reviews are loaded with code in cell below.

---

In [None]:
reviews_csv_filepath = "Reviews3.csv"

with open(reviews_csv_filepath, 'r', encoding='UTF-8') as file:
    reviews = [line.rstrip() for line in file]
    
for i, review in enumerate(reviews):
    print(f"{i}: {review}")

---

Function `analyse_review` takes a string containing one review and returns a dictionary with following keys and values.
    
| Key | Value type | Description |
| :---: | :---: | :--- |
| "in_english" | string | review translated to English, or with corrected spelling, grammar and punctuation |
| "language" | string | language of the review |
| "is_valid_review" | boolean | True if the review is a valid and legitmate Customer review |
| "customer" | string | name of the Customer signing the review |
| "rating" | integer | sentiment rating, 1: very negative to 5: very positive (0 if not a valid review) |
| "product" | string | product name extracted from the review |
| "logistics" | boolean | True if review mentions logistics in positive or negative sense |
| "pricing" | boolean | True if review mentions pricing in positive or negative sense |
| "quality" | boolean | True if review mentions product quality in positive or negative sense |
| "top_positive" | list of strings | up to 3 positive comments in the review |
| "top_negative" | list of strings | up to 3 negative comments in the review |

If the input review is not a legitimate and valid customer review, then rating should be set to 0, customer and product to "Unrecognized", all booleans set to false and both lists must be empty.

The output dictionary is later used to convert to JSON and stored into a Database. Product Management, Pricing, Logistics and Quality departments use the Database to analyse reviews, querying with the keys above.

---

In [None]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

response_schemas = [
    ResponseSchema(
        name="in_english",
        type="str",
        description="Translate the text to English if text is in language other than English. Otherwise correct spelling, grammar and punctuation. Do not put output in quotes or any other delimiters."
    ),
    ResponseSchema(
        name="language", 
        type="str",
        description="Identify language of the review text. Answer with one word, tell Unrecognized if you don't know or unsure."
    ),
    ResponseSchema(
        name="is_valid_review",
        type="bool",
        description="Please analyse review text and output single word true only if the review is a legitimate product review, otherwise output single word false."
    ),
    ResponseSchema(
        name="customer",
        type="str",
        description="If the text is a legitimate product review, then please extract name of a person signing the review. Output Unrecognized if not a review or you don't know or unsure."
    ),
    ResponseSchema(
        name="rating",
        type="int",
        description="If the text is a legitimate product review, then please rate the sentiment expressed in the review on a scale of 1 to 5, where 1 is very negative, 3 is balanced and neutral, 5 is very positive. Output single digit answer. Output 0 if the text is not a legitimate product review or sentiment cannot be rated or unsure."
    ),
    ResponseSchema(
        name="product",
        type="str",
        description="If the text is a legitimate product review, then extract product name from review. Output Unrecognized if the text is not a legitimate product review or you don't know exact product name or unsure."
    ),
    ResponseSchema(
        name="logistics",
        type="bool",
        description="If the text is a legitimate product review, then please analyse review text and output single word true only if the review mentions comment about delivery time or damaged goods, otherwise output single word false."
    ),
    ResponseSchema(
        name="pricing",
        type="bool",
        description="If the text is a legitimate product review, then please analyse review text and output single word true only if the review mentions positive or negative comment about price, otherwise output single word false."
    ),
    ResponseSchema(
        name="quality",
        type="bool",
        description="If the text is a legitimate product review, then please analyse review text and output single word true only if the review mentions positive or negative comment about product quality, otherwise output single word false."
    ),
    ResponseSchema(
        name="top_positive",
        type="List[str]",
        description="If the text is a legitimate product review, then please analyse review text and output list of up to 3 positive comments in it. Otherwise output an empty list."
    ),
    ResponseSchema(
        name="top_negative",
        type="List[str]",
        description="If the text is a legitimate product review, then please analyse review text and output list of up to 3 negative comments in it. Otherwise output an empty list."
    ),
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

print(format_instructions)

In [None]:
prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template("Please analyse the review below.\n\n{format_instructions}\n\n<review>\n{review}\n</review>")  
    ],
    input_variables=["review"],
    partial_variables={"format_instructions": format_instructions}
)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate
from langchain.callbacks import OpenAICallbackHandler

totals_cb = OpenAICallbackHandler()

print(totals_cb)

review_chain = LLMChain(
    llm=ChatOpenAI(temperature=0.0),
    prompt=prompt,
    output_parser=output_parser
)

analyse_review = lambda x: review_chain(x, callbacks=[totals_cb])['text']

In [None]:
print(f"Review text: \n{reviews[0]}\n")

output_dict = analyse_review(reviews[0])

pprint(output_dict)

In [None]:
# Mixed review in French, no logistics issue - reviews[2]
print(f"Review text: \n{reviews[2]}\n")

output_dict = analyse_review(reviews[2])

pprint(output_dict)

In [None]:
# Logistics issue - reviews[4]
print(f"Review text: \n{reviews[4]}\n")

output_dict = analyse_review(reviews[4])

pprint(output_dict)

In [None]:
# Negative review, misspelled - reviews[5]
print(f"Review text: \n{reviews[5]}\n")

output_dict = analyse_review(reviews[5])

pprint(output_dict)

In [None]:
# Weather report, but positive - review[8]
print(f"Review text: \n{reviews[8]}\n")

output_dict = analyse_review(reviews[8])

pprint(output_dict)

In [None]:
# Don Quixote - review[10]
print(f"Review text: \n{reviews[10]}\n")

output_dict = analyse_review(reviews[10])

pprint(output_dict)

In [None]:
print(f"Total cost: ${totals_cb}")