# Booking review analysis: Azure Cognitive Services vs. GPT3

## Step 0: Import libraries

In [None]:
# Credentials ---
## Azure Credentials
## Enter your Azure Cognitive Services Text Analytics Key and Endpoint here:
AZURE_KEY = "xxxxxxxxxxxxxxxxxxx"
AZURE_ENDPOINT = "https://xxxxxxxxxxxxxxxxxxxx.cognitiveservices.azure.com/"

## Open AI Credentials
## Enter your Open AI API key here (and delete after exposure)
OPENAI_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

# Packages ---
%pip install azure-ai-textanalytics==5.1.0
%pip install openai
import pandas as pd
import os
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
import os
import openai

# Azure Functions ----
## Authenticate client
def authenticate_client():
    ta_credential = AzureKeyCredential(AZURE_KEY)
    text_analytics_client = TextAnalyticsClient(
            endpoint=AZURE_ENDPOINT, 
            credential=ta_credential)
    return text_analytics_client

client = authenticate_client()

## Key Phrases Function
def recognize_key_phrases(documents, client):
    result = client.extract_key_phrases(documents)
    doc_result = [doc for doc in result if not doc.is_error]
    return(doc_result)

# Open AI Functions ---

## Keywords extraction
def recognize_key_phrases_openai(text):
  response = openai.Completion.create(
    model="text-davinci-003",
    prompt="Extract the complaints as keywords from this negative review:\n\n"+text+"\n\nFormat output as a comma separated list.",
    temperature=0.5,
    max_tokens=60,
    top_p=1.0,
    frequency_penalty=0.8,
    presence_penalty=0.0
  )
  return(response)

# Utility Functions ---

## Get successive n-sized chunks from list.
def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Step 1: Load data

Download here: [Kaggle Booking.com reviews](https://www.kaggle.com/datasets/michelhatab/hotel-reviews-bookingcom)

In [None]:
reviews = pd.read_csv("La_Veranda_Reviews-2023-01-16.csv")

In [None]:
reviews

Unnamed: 0,Title,PositiveReview,NegativeReview,Score,GuestName,GuestCountry,RoomType,NumberOfNights,VisitDate,GroupType,PropertyResponse
0,Wonderful place to stay.,"New, comfortable apartments, close to the airp...",Nothing at all.,10.0,Olga,Norway,Budget Twin Room,1 night,June 2022,Solo traveler,
1,It was superb,We had a really pleasant stay! The staff was v...,,10.0,Iwona,Poland,Double Room,3 nights,December 2022,Family,
2,Very Good,the location is great and near the airport. bu...,,8.0,Ruijia,Sweden,Double Room,1 night,December 2022,Solo traveler,
3,Wonderful,Great stuff\nGreat Quality/price\nClean,,9.0,Theprincem,United Kingdom,Double Room with Balcony,2 nights,September 2022,Solo traveler,
4,"Fantastic value for a new, modern and spotless...","Clean and modern with very comfortable beds, i...",,10.0,M,Switzerland,Family Suite with Balcony,1 night,October 2022,Family,
...,...,...,...,...,...,...,...,...,...,...,...
1518,Exceptional,,,10.0,Anonymous,Israel,,5 nights,September 2021,Couple,
1519,Wonderful,,,9.0,Anonymous,Czech Republic,,1 night,September 2021,Group,
1520,Very Good,,,8.0,Anonymous,Sweden,,1 night,August 2021,Couple,
1521,Wonderful,,,9.0,Anonymous,Germany,,3 nights,September 2021,Solo traveler,


In [None]:
reviews.to_csv("La_Veranda_Reviews-2023-01-16_with-index.csv", index = True)

In [None]:
reviews_2022 = reviews[reviews['VisitDate'].str.contains("2022")]

## 2. Call the AI services

In [None]:
reviews_text = reviews_2022['NegativeReview'].dropna()

### 2.1 Azure

In [None]:
# Batch input files in chunks of 10 for faster API call
batches = list(chunks(reviews_text, 10))

In [None]:
# Create list of documents which will be sent to the API
documents = []
for batch in batches:
  documents_batch = []
  for (review, review_index) in zip(batch, batch.index):
    text = review.strip().lower()
    id = review_index
    documents_batch.append(
        {
        "id": id,
        "text": text
      })
  documents.append(documents_batch)

In [None]:
# Call the AI service
results = []
for document in documents:
  results.append(recognize_key_phrases(document, client))

# Print the first result
results[0]



In [None]:
# Flatten result list object
results_flat = [item for sublist in results for item in sublist]
doc_result = [doc for doc in results_flat if not doc.is_error]

In [None]:
ids = [document.id for document in doc_result]
key_phrases = [document.key_phrases for document in doc_result]

In [None]:
key_phrases_df_azure = pd.DataFrame({"Id": ids, "KeyPhrases": key_phrases})

In [None]:
key_phrases_df_azure

Unnamed: 0,Id,KeyPhrases
0,0,[]
1,7,[]
2,8,"[fresh fruit, possibility, veg]"
3,11,[]
4,13,"[different one, receptionist, english, feeling..."
...,...,...
271,760,"[receptionist, english, staff, information, bu..."
272,761,"[lack, adapters, sockets]"
273,774,[one window]
274,776,"[different sockets, difficulties, devices, ada..."


In [None]:
key_phrases_df_azure = key_phrases_df_azure.explode("KeyPhrases").dropna()

In [None]:
key_phrases_df_azure

Unnamed: 0,Id,KeyPhrases
2,8,fresh fruit
2,8,possibility
2,8,veg
4,13,different one
4,13,receptionist
...,...,...
273,774,one window
274,776,different sockets
274,776,difficulties
274,776,devices


In [None]:
key_phrases_df_azure.to_csv("key_phrases_azure.csv", index = False)

### 2.2 Open AI

In [None]:
openai.api_key = OPENAI_KEY

In [None]:
keywords = []
ids = []

for text, id in zip(reviews_text, reviews_text.index):
  try: 
    response = recognize_key_phrases_openai(text)
    keywords.append(response["choices"][0]["text"])
    ids.append(id)
  except:
    pass

In [None]:
# Check missing ids - repeat until output is 0:
# TODO: Implement loop
missing_ids = [i for i, item in enumerate(reviews_text.index) if item not in ids]
missing_reviews = reviews_text.iloc[missing_ids]
len(missing_reviews)

0

In [None]:
for text, id in zip(missing_reviews, missing_reviews.index):
  try: 
    response = recognize_key_phrases_openai(text)
    keywords.append(response["choices"][0]["text"])
    ids.append(id)
  except:
    pass

In [None]:
keywords = [item.replace("\n", "") for item in keywords]
keywords = [sub.split(",") for sub in keywords]
key_phrases_df_openai = pd.DataFrame({"Id": ids, "KeyPhrases": keywords})
key_phrases_df_openai = key_phrases_df_openai.explode("KeyPhrases").dropna()
key_phrases_df_openai.to_csv("key_phrases_openai.csv", index = False)

In [None]:
key_phrases_df_openai

Unnamed: 0,Id,KeyPhrases
0,7,nothing
1,8,Fresh fruit
1,8,veg
1,8,purchasing
2,13,Receptionist
...,...,...
273,749,bath
274,507,positive experience
274,507,use again
274,507,next time
