# KT Yelp Review Auto-Responder

In [1]:
# set to run every tuesday and thursday at 10am PST

In [29]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from bs4 import BeautifulSoup
import re
import html
import json
from datetime import datetime, timedelta

import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
import random

import base64
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

In [19]:
locations_df_temp = pd.read_excel("KT Locations Data.xlsx")
locations_df = locations_df_temp.copy()
business_ids = locations_df['Yelp_Bus_Id'].tolist()
reviews_data = []

current_date = datetime.now()
three_months_ago = current_date - timedelta(days=90)

GOOGLE_API_KEY = pd.read_csv("GOOGLE_API_Key.txt").columns[0]
YELP_API_KEY

## Web Scraping

In [4]:
# given a 'list', list_search searches for the given string 'term' and will output whatever is in the position 
# 'num' off of the position of 'term'
def list_search(list, term, num):
    
    indices = [index for index, item in enumerate(list) if item == term]
    index = indices[0]
    target_val = list[index + num]
    
    return target_val

In [5]:
# pulls the 10 most recent reviews from yelp page given business_id and outputs them as a df
def yelp_review_scraper(business_id):   
    search_url = f"https://www.yelp.com/biz/{business_id}?sort_by=date_desc"
    search_response = requests.get(search_url)

    # Check if the request was successful (status code 200)
    if search_response.status_code == 200:
        soup = BeautifulSoup(search_response.text, 'html.parser')

    # find matches for set pattern
    soup_string = str(soup)
    soup_string = soup_string.replace('null', '"%"')
    pattern = r'"reviews":\[(.*?)\](.*?)\](.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\[(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\}(.*?)\['
    matches = re.findall(pattern, soup_string)

    # convert tuple matches in match to list
    matches_new = [list(t) for t in matches]

    # cut lists at stop list to get only the data we need
    stop_phrase = '"tags":'

    new_list = []
    current_sublist = []

    for item in matches_new[0]:
        if stop_phrase in item:
            if current_sublist:
                current_sublist.append(item)
                new_list.append(current_sublist)
            current_sublist = []
        else:
            current_sublist.append(item)

    if current_sublist:
        new_list.append(current_sublist)

    # handle the issue of nested lists
    too_many_lists = [["".join(sublist)] for sublist in new_list]
    flattened_list = [item for sublist in too_many_lists for item in sublist]
    filtered_list = [item for item in flattened_list if stop_phrase in item]

    # extract the data we actually want from individual review JSON data
    names = []
    review_texts = []
    dates = []
    ratings = []
    review_ids = []
    already_replieds = []

    for match in filtered_list:
                filtered_list = match.split('"')
                filtered_list = [item for item in filtered_list if item != ',']
                name = list_search(filtered_list, 'markupDisplayName', 2)
                review_text = list_search(filtered_list, 'text', 2)
                date = datetime.strptime(list_search(filtered_list, 'localizedDate', 2), '%m/%d/%Y')
                rating = int(list_search(filtered_list, 'rating', 1).strip(':,'))
                review_id = list_search(filtered_list, 'id', 2)
            
                businessOwnerReplies = list_search(filtered_list, 'businessOwnerReplies', 2)
            
                if businessOwnerReplies == '%':
                    already_replied = False
                else:
                    already_replied = True
                names.append(name)
                review_texts.append(review_text)
                dates.append(date)
                ratings.append(rating)
                review_ids.append(review_id)
                already_replieds.append(already_replied)
            
    data = {
        "names": names,
        "review_text": review_texts,
        "date": dates,
        "rating": ratings,
        "review_id": review_ids,
        "already_replied": already_replieds
    }

    df = pd.DataFrame(data)

    return df           

In [6]:
max_attempts = 3  
for _ in range(max_attempts):
    try:
        dfs = []
        
        # Loop through each business_id
        for business_id in business_ids:
            df = yelp_review_scraper(business_id)
            if df is not None:
                df['business_id'] = business_id
                dfs.append(df)
                
        break  
    except Exception as e:
        print(f"An error occurred: {e}")

In [7]:
# Concatenate all the DataFrames in the list
yelp_reviews = pd.concat(dfs, ignore_index=True)

# Replace business_id with Clinic in df
merged_df = yelp_reviews.merge(locations_df[['Yelp_Bus_Id', 'Clinic']], left_on='business_id', right_on='Yelp_Bus_Id', how='left')

# Drop the 'business_id' column and rename the 'Clinic' column
merged_df.drop(columns=['business_id'], inplace=True)
merged_df.rename(columns={'Yelp_Bus_Id': 'business_id'}, inplace=True)
yelp_reviews = merged_df

yelp_reviews['review_text'] = yelp_reviews['review_text'].apply(lambda x: x.replace('\xa0', ''))
yelp_reviews['review_text'] = yelp_reviews['review_text'].apply(lambda x: x.replace('&amp;#39;', "'"))
yelp_reviews['review_text'] = yelp_reviews['review_text'].apply(lambda x: x.replace('<br&gt;', ''))
yelp_reviews['review_text'] = yelp_reviews['review_text'].apply(lambda x: x.replace('&amp;#34;', '"'))
yelp_reviews['review_text'] = yelp_reviews['review_text'].apply(lambda x: x.replace('&amp;amp;', '&'))
yelp_reviews['names'] = yelp_reviews['names'].str.replace(r'\s[A-Za-z]\.\s*', '')
yelp_reviews['review_text'] = yelp_reviews['review_text'].str.lower()



  yelp_reviews = pd.concat(dfs, ignore_index=True)
  yelp_reviews['names'] = yelp_reviews['names'].str.replace(r'\s[A-Za-z]\.\s*', '')


In [8]:
yelp_reviews

Unnamed: 0,names,review_text,date,rating,review_id,already_replied,business_id,Clinic
0,Elizabeth,this office new staff is horrible. i used to l...,2023-09-22,1.0,YF6jJ3EZtimxRcpWDgRM0Q,0.0,kids-and-teens-medical-group-northridge-northr...,Northridge
1,Martha,dr benjamin is the best she so nice and patien...,2023-03-19,5.0,4zb5jC0Y8vium32lovjqpQ,1.0,kids-and-teens-medical-group-northridge-northr...,Northridge
2,Ehis,"this place should be closed honestly , the sta...",2023-03-07,1.0,uxNYDRtjCawALeK64CkE6w,1.0,kids-and-teens-medical-group-northridge-northr...,Northridge
3,Richard,"they don't deserve 1 star, yelp should allow 0...",2022-12-28,1.0,xkyt-QoHzSvsvbWYsJnWDw,1.0,kids-and-teens-medical-group-northridge-northr...,Northridge
4,Diana,we had to do a blood test for our daughter and...,2022-11-18,5.0,OhwsOar1NOrXrwwJu4cCxw,1.0,kids-and-teens-medical-group-northridge-northr...,Northridge
...,...,...,...,...,...,...,...,...
97,Maria,i scheduled an appointment and specified and a...,2023-08-18,1.0,WBckyR48WhpjM5jVnPr75A,1.0,kids-and-teens-medical-group-pico-rivera-pico-...,Pico Rivera
98,Emily,i was in yesterday to see dr man and let me te...,2023-06-28,5.0,z5xb3-6aBisThXgNhARyCA,0.0,kids-and-teens-medical-group-pico-rivera-pico-...,Pico Rivera
99,Monica,this 3rd party shit is the stupidest thing i'v...,2023-06-09,1.0,mCXcwAb-gICj3maVPJEGjw,0.0,kids-and-teens-medical-group-pico-rivera-pico-...,Pico Rivera
100,Liza,i recently have gone to this doctor. i have an...,2021-09-08,1.0,Rk6XLnfoCqdnrYrjGYYI6g,0.0,kids-and-teens-medical-group-arcadia-arcadia,Arcadia


## Generate Review Responses

5 - Generic thank you, parse through 5 different ones with rng

4 - Generic Thank you, if theres any issue: email

1 - if theres any issue email, scan review text for words like 'wait' repond with personalized review

Hi F_name,
Thank you....

In [42]:
#review prompt banks
Five_Star = ["Thank you for visiting us, [Name]! Your kind words brighten our day. Should you ever need our services in the future, please don't hesitate to reach out. Best regards, Dr. De Silva",
             "Wow, [Name], we can't thank you enough for your generous feedback! It truly warms our hearts. Feel free to return whenever you require our care. Warm regards, Dr. De Silva", 
             "Dear [Name], we're immensely grateful for your support and the wonderful review. Your children's well-being is our top priority, so please remember that we're here for you whenever you need us. Wishing you all the best, Dr. De Silva",
             "Your kind words mean the world to us, [Name]. We're here to provide the best care possible for your children. If you ever require our assistance again, please reach out. Thank you, Dr. De Silva",
             "Thank you so much, [Name], for your heartwarming review! Please know that you're always welcome back whenever the need arises. And don't forget, you can easily schedule appointments online through our portal at ktdoctor.com. Best wishes, Dr. De Silva",
             "[Name], your positive feedback is greatly appreciated. We'll be sure to share your kind words with our team. If you have any further questions or require our services, please don't hesitate to contact us. Warm regards, Dr. De Silva",
             "Thank you for your review; it means a lot to us. For any inquiries or assistance, please feel free to connect with our office at lacanada@ktdoctor.com. We look forward to continuing to provide you with exceptional care. Kind regards, Dr. De Silva",
             "Your support is invaluable, [Name]! We can't wait to welcome you and your children back whenever you need us. Your well-being is our priority. Stay safe and reach out to us anytime at lacanada@ktdoctor.com. Warm regards, Dr. De Silva",
             "Thank You, [Name]! Your kind words motivate us to keep providing top-notch care. As your children grow, know that we're here to support you along the way. Best, Dr. De Silva, drdesilva@ktdoctor.com"]
One_Star = ["I'm truly sorry for the inconvenience you experienced during your visit. Your feedback is important to us. Please reach out to us at drdesilva@ktdoctor.com, and we'll do our best to address your concerns and provide you with better service. Thank you for bringing this to our attention.",
            "We're genuinely sorry to hear about your negative experience. Please contact us at drdesilva@ktdoctor.com. Thank you for your review.",
            "I apologize for the experience you had with us. We genuinely want to assist you better and address your concerns. Please email us directly at drdesilva@ktdoctor.com with your contact information, and I will personally reach out to you. Thank you for bringing this to our attention.",
            "Hello [Name], I'm sincerely sorry for the negative experience you encountered. Your feedback is crucial to us, and we want to make things right. You can contact us 24/7 for non-urgent matters by texting 626-298-7121 or emailing drdesilva@ktdoctor.com. We are committed to improving your experience.",
            "I'm truly sorry that you had a disappointing experience with us. We value your feedback, and we're dedicated to making improvements. Please email me directly at drdesilva@ktdoctor.com. Your satisfaction is our priority, and we appreciate your review."]

T_F_Star = ["Hello [Name], thank you for sharing your thoughts about your recent experience with us. We appreciate your input and take every review seriously. Your feedback helps us continually improve our services. If you have any additional insights or suggestions, please feel free to share them with us at drdesilva@ktdoctor.com. We're here to serve you better.",
            "Dear [Name], we're grateful for your review and for choosing our practice for your child's healthcare needs. Your feedback is important to us, and it helps us better understand our patients' experiences. If there are any specific areas you'd like us to focus on or if you have more details to provide, please don't hesitate to reach out to us at drdesilva@ktdoctor.com. Thank you for entrusting us with your child's care.",
            "Hi [Name], thank you for taking the time to leave your feedback about our practice. We're glad to hear about your recent experience with our care. We value all feedback, and it's important in our ongoing efforts to serve you better. If there are any additional insights or details you'd like to share or if you have any questions, please feel free to contact us at drdesilva@ktdoctor.com. Your input is greatly appreciated.",
            "Dear [Name], your review is appreciated, and we're pleased to have had the opportunity to serve your child's healthcare needs. We're always looking for ways to enhance our services, and your feedback is instrumental in this process. If there are any specific aspects of your child's visit that you'd like to discuss further or any suggestions you may have, please reach out to us at drdesilva@ktdoctor.com. Thank you for choosing us for your child's healthcare."]

Neg_Wait_Time = ["I apologize for any inconvenience you experienced due to wait times during your visit. Your time is valuable to us, and we're committed to improving our efficiency. To help us serve you better, please consider using our online portal at ktdoctor.com for scheduling or secure messaging with your doctor. Thank you for your feedback; it helps us make positive changes.",
                 "I'm genuinely sorry for the wait time you encountered [Name]. Your time is important to us, and we understand how frustrating long waits can be. We're actively working on streamlining our processes. If you have any further concerns or would like to provide additional feedback, please reach out to us at drdesilva@ktdoctor.com. We appreciate your patience and feedback.",
                 "I regret that your visit was marred by extended wait times. We're taking your feedback seriously and are dedicated to improving our services. To minimize wait times, you can self-schedule appointments or securely message your doctor through our website at ktdoctor.com. Thank you for bringing this to our attention; it helps us make necessary improvements.",
                 "I'm deeply sorry for any inconvenience caused by wait times during your visit. We understand how valuable your time is, and we're actively working to reduce wait times. For immediate assistance and to provide us with more insights into your experience, please email us at drdesilva@ktdoctor.com. Your feedback is crucial in helping us enhance our service quality."]


In [10]:
def check_word(string, word):
    if word in string:
        return True
    else:
        return False

In [44]:
responses = {}

names = []
ratings =[]
review_texts = []
clinics = []

for index, row in yelp_reviews.iterrows():
    
    #RNG
    rand_9 = random.randint(0, 8)
    rand_5 = random.randint(0, 4)
    rand_4 = random.randint(0, 3)
    
    review = pd.DataFrame(row).T
    already_replied = review['already_replied'].values[0]
    date = pd.to_datetime(review['date'].values[0])
    name = review['names'].values[0]
    rating = review['rating'].values[0]
    r_id = review['review_id'].values[0]
    text = review['review_text'].values[0]
    clinic = review['Clinic'].values[0]

    if (already_replied == 0) and (date > three_months_ago):
        responses[r_id] = text
        wait_time = check_word(text, "wait")
        if (rating == 5):
            response = Five_Star[rand_9].replace("[Name]", name)
        elif (rating == 1):
            if (wait_time == True):
                response = Neg_Wait_Time[rand_4].replace("[Name]", name)
            else: 
                response = One_Star[rand_5].replace("[Name]", name)
                
        elif (rating == 2):
            if (wait_time == True):
                response = Neg_Wait_Time[rand_4].replace("[Name]", name)
            else: 
                response = One_Star[rand_5].replace("[Name]", name)
                
        elif (rating == 3):
            if (wait_time == True):
                response = Neg_Wait_Time[rand_4].replace("[Name]", name)
            else: 
                response = T_F_Star[rand_4].replace("[Name]", name)
            
        elif (rating == 4):
            response = T_F_Star[rand_4].replace("[Name]", name)
           
        names.append(name)
        review_texts.append(text)
        ratings.append(rating)
        clinics.append(clinic)
        
        data = {
            "names": names,
            "rating": ratings,
            "review_text": review_texts,
            "clinic": clinics
        }

        Reviews_Responded = pd.DataFrame(data)
        
        responses[r_id] = response

## Push Responses to Yelp

In [45]:
responses

{'YF6jJ3EZtimxRcpWDgRM0Q': "We're genuinely sorry to hear about your negative experience. Please contact us at drdesilva@ktdoctor.com. Thank you for your review.",
 'FOXKjGBuMzWIHB92Rh6nGQ': "Thank you so much, LuzMarina, for your heartwarming review! Please know that you're always welcome back whenever the need arises. And don't forget, you can easily schedule appointments online through our portal at ktdoctor.com. Best wishes, Dr. De Silva",
 '5iTKGjR14S-GH1HNNalqoQ': "Thank you so much, Edna, for your heartwarming review! Please know that you're always welcome back whenever the need arises. And don't forget, you can easily schedule appointments online through our portal at ktdoctor.com. Best wishes, Dr. De Silva",
 'n5CpJFhvgba1MbL0sUYgaA': 'Thank you for your review; it means a lot to us. For any inquiries or assistance, please feel free to connect with our office at lacanada@ktdoctor.com. We look forward to continuing to provide you with exceptional care. Kind regards, Dr. De Silv

## Email Summary

In [20]:
key = GOOGLE_API_KEY

In [16]:
grouped_reviews = {}
email_text = "Hello, \n \nHere are the Yelp Reviews that I responded to today: \n\n"

for index, row in Reviews_Responded.iterrows():
    clinic = row['clinic']
    name = row['names']
    rating = int(row['rating'])
    review_text = row['review_text']

    if clinic not in grouped_reviews:
        grouped_reviews[clinic] = []

    review_info = {
        'name': name,
        'rating': f'{rating} Star(s)',
        'review_text': review_text
    }

    grouped_reviews[clinic].append(review_info)

for clinic, reviews in grouped_reviews.items():
    email_text += ('\n----------------------------------------------------------------------------------------------------------- \n\n' + clinic + ':')
    for review in reviews:
        email_text += (f"\n{review['name']}: {review['rating']}:\n")
        email_text += (f"{review['review_text']}\n")


In [17]:
print(email_text)

Hello, 
 
Here are the Yelp Reviews that I responded to today: 


----------------------------------------------------------------------------------------------------------- 

Northridge:
Elizabeth: 1 Star(s):
this office new staff is horrible. i used to love coming to this office.i travel from far about 40 miles just to get here.they didn't want to give me paper to fill out, because i must use my phone.and they would reschedule.apparently, resources are scarce.the location manager suwan sarabia refused to write down his name, i had to write it down. they're no help. i remember when the office used to be for the patients, helping families without hesitation. i'm disappointed i've been a customer for about a decade and i might just leave this company. not worth the stress.

----------------------------------------------------------------------------------------------------------- 

Pasadena:
LuzMarina: 5 Star(s):
reason for visit getting a refill on my sons allergy medicine nurse angeli

In [31]:
#waiting for OAuth 2.0 creds :(
# Build the Gmail API service
service = build('gmail', 'v1', developerKey=key)

message = {
    'to': 'schwa1238@gmail.com',
    'subject': 'Bi-Weekly Kids & Teens Yelp Bot',
    'raw': base64.urlsafe_b64encode(email_text.encode('utf-8')).decode('utf-8')
}

# Send the email
try:
    message = service.users().messages().send(userId='me', body=message).execute()
    print(f"Message sent! Message Id: {message['id']}")
except HttpError as error:
    print(f"An error occurred: {error}")

An error occurred: <HttpError 401 when requesting https://gmail.googleapis.com/gmail/v1/users/me/messages/send?key=AIzaSyBffchwDCUUCuy9gU6yipWkxDGWv_aGSPk&alt=json returned "API keys are not supported by this API. Expected OAuth2 access token or other authentication credentials that assert a principal. See https://cloud.google.com/docs/authentication". Details: "[{'message': 'Login Required.', 'domain': 'global', 'reason': 'required', 'location': 'Authorization', 'locationType': 'header'}]">
