In [2]:
import warnings

warnings.filterwarnings('ignore')

In [10]:
from rapidfuzz import process, fuzz
import pandas as pd

# Funcion que implementa busqueda 'fuzzy' en textos
def fuzzy_match(x, match_to, threshold=90):
    match, score, _ = process.extractOne(x, match_to, scorer=fuzz.WRatio)
    return match if score >= threshold else None

In [12]:
# Diccionario de marcas de Darden y competidores
brands = {
    # Darden brands
    'darden': [
        "Olive Garden Italian Restaurant",
        "Olive Garden",
        "LongHorn Steakhouse",
        "Cheddar's Scratch Kitchen",
        "Yard House",
        "The Capital Grille",  # no esta en el dataset
        "Seasons 52",
        "Bahama Breeze",
        "Eddie V's",  # no esta en el dataset
        "Ruth's Chris Steak House"
    ],
    # Bloomin brands
    'bloomin': [
        "Outback Steakhouse",
        "Carrabba's Italian Grill",
        "Bonefish Grill",
        "Fleming's Prime Steakhouse & Wine Bar",
        "Aussie Grill",
        "Aussie Grill - "
    ],
    # Brinker brands
    'brinker': [
        "Chili's",  # aparece con 2 nombres distintos
        "Chili's Grill & Bar",  # aparece con 2 nombres distintos
        "Maggiano's Little Italy",
        "It's Just Wings"
    ],
    # Texas Roadhouse brands
    'texasroadhouse': [
        "Texas Roadhouse",
        "Bubba's 33",
        #"Jaggers"  # Fast food, excluir?   ####
    ]
}

In [6]:
df_business = pd.read_parquet('../data/clean/y_business.parquet')

lista_brands = [brand for lista in brands.values() for brand in lista]

In [13]:
# Aplicar fuzzy matching para Darden
df_business['name_match'] = df_business['name'].apply(fuzzy_match, match_to=lista_brands)
# Filtrar nulos (donde nulo representa no-match)
df_darden_y_comp = df_business[df_business['name_match'].notnull()]
df_darden_y_comp.drop(columns=['name_match'], inplace=True)
df_darden_y_comp['name'].value_counts()

name
Chili's                                  15
Outback Steakhouse                       12
Olive Garden Italian Restaurant          11
LongHorn Steakhouse                       9
Bonefish Grill                            8
Texas Roadhouse                           6
Carrabba's Italian Grill                  5
Maggiano's Little Italy                   2
Bahama Breeze                             2
Seasons 52                                2
Yard House                                2
Cheddar's Scratch Kitchen                 2
Ruth's Chris Steak House                  2
Aussie Grill - Brandon                    1
Aussie Grill                              1
Fleming’s Prime Steakhouse & Wine Bar     1
Aussie Grill by Outback                   1
Eddie V's Prime Seafood                   1
It's Just Wings                           1
Aussie Grill - Clearwater                 1
Name: count, dtype: int64

In [31]:
#### TESTING, remove .sample() in prod

cols_para_merge_reviews = ['name', 'business_id', 'city', 'state', 'postal_code', 'coordinates']

business_filtrado = df_darden_y_comp[cols_para_merge_reviews]  ####  remove .sample()
####business_ids = df_darden_y_comp[cols_para_merge_reviews]
business_filtrado

Unnamed: 0,name,business_id,city,state,postal_code,coordinates
1093,Texas Roadhouse,T9n_LqUUhC2b1ALhg57Y5Q,Bensalem,PA,19020,"40.09165,-74.93997"
2597,LongHorn Steakhouse,CS_GzUYlEPa6QHTRY224wQ,Norristown,FL,19403,"40.1274771937,-75.4040751479"
3454,Chili's,y3iKFTk_sgIXCT6fNcBn_Q,Tampa,PA,33612,"28.0550259,-82.4280458"
6085,Outback Steakhouse,U0ICWpbd1C0GD9SpIv3bSA,Brandon,NJ,33511,"27.9384740106,-82.3183461208"
7736,Aussie Grill by Outback,P3GeRGkqeW-uL4yT-kuGtw,Tampa,FL,33607,"27.965651,-82.521205"
...,...,...,...,...,...,...
146717,Chili's,jTCBK1BS8O_Iy9N0vL88uw,Fairless Hills,PA,19030,"40.1858497,-74.8680285"
147397,Chili's,U50T86i8wyNWGWxsP7GIRw,Philadelphia,NJ,19150,"40.074768681,-75.1579621104"
147799,Olive Garden Italian Restaurant,baxAe39nPgmAhkNwqvoHnQ,Tampa,FL,33625,"28.065588,-82.574715"
148253,Carrabba's Italian Grill,TiywUz1q7Yw8JL4daD4fKQ,Bensalem,DE,19020,"40.1153418721,-74.9590072"


In [18]:
import requests

# Your Yelp Fusion API key
API_KEY = ""

def get_yelp_reviews(business_id):
    """
    """
    url = f'https://api.yelp.com/v3/businesses/{business_id}/reviews?limit=50&sort_by=yelp_sort'
    headers = {'Authorization': f'Bearer {API_KEY}'}

    respuesta = requests.get(url, headers=headers)

    if respuesta.status_code == 200:
        # Leer archivo de respuesta JSON 
        reviews_data = respuesta.json()
        # Extraer `reviews`
        reviews = reviews_data['reviews']
        return reviews
        
    elif respuesta.status_code == 429:
        return "ACCESS_LIMIT_REACHED"
    
    else:
        print(f'Error: {respuesta.status_code}')


In [32]:
b_id = business_filtrado['business_id'].iat[0]
b_id

'T9n_LqUUhC2b1ALhg57Y5Q'

In [20]:
reviews = get_yelp_reviews(b_id)

for r in reviews:
    print(r)

{'id': '6AKeEcA8-v3n_sf3NkOpYA', 'url': 'https://www.yelp.com/biz/bahama-breeze-tampa-2?adjust_creative=HyPG4wMfXQKDAtlq7HpPvw&hrid=6AKeEcA8-v3n_sf3NkOpYA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=HyPG4wMfXQKDAtlq7HpPvw', 'text': 'Food, service AND atmosphere were amazing!!\n\nAfter spending two weeks in Florida, we decided to spend our last night out here and no regrets! Everything...', 'rating': 5, 'time_created': '2024-04-25 09:55:55', 'user': {'id': 'LihIHTVGZl_qQhWen0VxkQ', 'profile_url': 'https://www.yelp.com/user_details?userid=LihIHTVGZl_qQhWen0VxkQ', 'image_url': None, 'name': 'Erin S.'}}
{'id': 'tXz3p0txwlJPSc_bTBGrqw', 'url': 'https://www.yelp.com/biz/bahama-breeze-tampa-2?adjust_creative=HyPG4wMfXQKDAtlq7HpPvw&hrid=tXz3p0txwlJPSc_bTBGrqw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=HyPG4wMfXQKDAtlq7HpPvw', 'text': 'Okay, first thing first. This place is loud and crowded (Sunday evening). I was lucky that I was by myself

In [21]:
reviews[0]

{'id': '6AKeEcA8-v3n_sf3NkOpYA',
 'url': 'https://www.yelp.com/biz/bahama-breeze-tampa-2?adjust_creative=HyPG4wMfXQKDAtlq7HpPvw&hrid=6AKeEcA8-v3n_sf3NkOpYA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=HyPG4wMfXQKDAtlq7HpPvw',
 'text': 'Food, service AND atmosphere were amazing!!\n\nAfter spending two weeks in Florida, we decided to spend our last night out here and no regrets! Everything...',
 'rating': 5,
 'time_created': '2024-04-25 09:55:55',
 'user': {'id': 'LihIHTVGZl_qQhWen0VxkQ',
  'profile_url': 'https://www.yelp.com/user_details?userid=LihIHTVGZl_qQhWen0VxkQ',
  'image_url': None,
  'name': 'Erin S.'}}

In [27]:
df_reviews = pd.read_parquet('../data/clean/reviews_darden.parquet')
df_reviews.sample()

Unnamed: 0,name,review_id,user_id,business_id,stars,text,date,day,text_reply,city,state,postal_code,coordinates
3648,LongHorn Steakhouse,aSfEDavM_aepvPZRnzdhjw,ao2DWGjcU2hhPNWcCXfK0w,epVHdJqIFN0dtAiFvnVJVg,1.0,This was the worst experience. I came here to ...,2018-08-17,Friday,,Philadelphia,PA,19114,"40.0844101,-75.0240676748"


In [26]:
# Extraer datos de la respuesta API
review_id = r['id']
user_id = r['user']['id']
stars = float(r['rating'])
text = r['text'].replace('\n', ' ')
date = r['time_created']
# Extraer dia de semana
day = pd.to_datetime(r['time_created'])
day_nombres = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
day = day_nombres[day.dayofweek]

nueva_fila_tabla_reviews = {
    'name': ,
    'review_id': review_id,
    'user_id': user_id,
    'business_id': ,
    'stars': stars,
    'text': text,
    'date': date,
    'day': day,
    'city': ,
    'state': ,
    'postal_code': ,
    'coordinates': 
}


"I don't know which was better: the food or Sage, our server.   I had a phenomenal non-alcoholic drink and the Jamaican dish with sweet plantains. Delicious...."