In [12]:
import os
import json
import random
from pprint import pprint
from dotenv import load_dotenv
import openai
import pandas as pd


In [3]:
load_dotenv()
api_key = os.getenv('OPENAI_KEY')
openai.api_key = api_key


In [4]:
def get_completion(instructions, review):
    prompt = f"{instructions}/n==========/nReview text: '''{review}'''"
    message = [{'role': 'user', 'content': prompt}]
    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo',
        temperature=0,
        messages=message
    )

    output = {'content': response.choices[0].message['content'],
              'cost': response.usage['total_tokens'] / 1000 * 0.002}
    return output
    

In [5]:
instructions = """
Your task is to summarize salient points under each topic from
a hotel review in one sentence. Evaluate if the guest was angry 
about that aspect of their stay, True or False, and also evaluate 
the sentiment on a scale of 1 to 5:

1 - very negative
2 - negative
3 - neutral
4 - positive
5 - very positive

Topics: frontdesk service, cleanliness, condition of hotel, value

Summarize the review below as instructed above, delimited by triple 
ticks. Format as JSON.
"""

In [6]:
df_reviews = pd.read_csv('..\\data\\reviews.csv')
df_positive = df_reviews[df_reviews['Overall'] >= 4].sample(10)
df_positive = df_positive[['Title', 'Content']]
df_negative = df_reviews[df_reviews['Overall'] <= 2].sample(10)
df_negative = df_negative[['Title', 'Content']]

df_positive

Unnamed: 0,Title,Content
327234,“Nice play to stay”,Very nice hotel with a friendly and helpful st...
33647,“Overall Pleased”,My first visit to Vegas. I stayed at the Bella...
440,"“Phenomenal service, beautiful hotel”",I highly recommend this hotel--from the time I...
206559,“High-energy relaxation!”,"We stayed at the Hard Rock for a week, and wer..."
56138,“Service makes this four star feel like a five...,I was here on a trip with my parents. They had...
205599,“If you are going to Playa Del Carmen - this i...,I stayed at the Playacar Palace from 8/17-8/26...
11122,“Fabulous!”,This was our first stay at the hotel (we have ...
418695,“Very nice hotel !!!!!!”,we've been there for 4 nights first part of ou...
402283,“Comfortable and good location”,We stayed at hotel unico over the easter weeke...
29929,“Ocean Maya has Award Winning Staff”,My husband and I just returned from this wonde...


In [7]:
df_negative

Unnamed: 0,Title,Content
152999,“Worst Experience Ever”,We stayed here for 10 nights at the start of o...
426318,“Would not recommend this one”,Read some reviews here and changed to this hot...
18485,“Very bad customer service”,Very shocked at how they treated us. They disr...
210910,"“Not professional, chaos at breakfast”",Arriving late after a very long conference on ...
436342,“BE CAREFUL WHAT YOU WISH FOR”,"This is an ""add on"" to the 5 star resorts that..."
295213,“You have got to be kidding me...depressing an...,"Summary: pros: new carpet, staff is great cons..."
423407,"“Nice Staff, Terrible Rooms”",Our choice to book this hotel was based on the...
164857,“Won't go there again”,We were given a room that was extremely small ...
431464,“disgusting”,as Dubai is famed for its high standard of cus...
74731,“Loud Night Club on 1st Floor of Hotel”,There is a loud night club on the 1st floor of...


In [13]:
%%time

i = random.randint(0, 9)

review = df_positive.iloc[i][['Title', 'Content']].values
review = ' '.join(review)

response = get_completion(instructions, review)
pprint(json.loads(response['content']))
print(f'Cost: {response["cost"]}')

{'cleanliness': {'angry': False, 'sentiment': 3, 'summary': 'Not mentioned.'},
 'condition of hotel': {'angry': False,
                        'sentiment': 4,
                        'summary': 'The rooms are spacious and elegantly '
                                   'decorated, and the double pane windows '
                                   'kept the busy street quiet.'},
 'frontdesk service': {'angry': False,
                       'sentiment': 5,
                       'summary': 'The staff was fantastic, greeted the guest '
                                  'and took the time to sit down with them to '
                                  'make sure they were seeing all the '
                                  'important historic sites, and were always '
                                  'prompt and courteous with email requests.'},
 'value': {'angry': False, 'sentiment': 3, 'summary': 'Not mentioned.'}}
Cost: 0.00109
CPU times: total: 15.6 ms
Wall time: 14 s


In [14]:
%%time

i = random.randint(0, 9)

review = df_negative.iloc[i][['Title', 'Content']].values
review = ' '.join(review)

response = get_completion(instructions, review)
pprint(json.loads(response['content']))
print(f'Cost: {response["cost"]}')

{'cleanliness': {'angry': True,
                 'sentiment': 1,
                 'summary': 'The room was dusty, tired, and smelled of '
                            'cigarette smoke. The surfaces were dirty, the '
                            'balcony was stained with mold, and the bedspread, '
                            'carpet, and air conditioning unit were filthy.'},
 'condition of hotel': {'angry': True,
                        'sentiment': 1,
                        'summary': 'The building looked like a converted '
                                   'apartment circa 1962, the elevators were '
                                   'very small and slow, the ceilings were '
                                   'low, the hallways narrow, and the exterior '
                                   'was depressing.'},
 'frontdesk service': {'angry': False,
                       'sentiment': 4,
                       'summary': 'The staff was extremely nice and helpful.'},
 'value': {'angry': F