# MODELO DE RECOMENDACIÓN DE NEGOCIOS - MACHINE LEARNING

In [1]:
# Importar librerias
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cdist


In [5]:
# cargo dataframe
df_business = pd.read_csv('../restaurantes_california.csv')
df_business

Unnamed: 0.1,Unnamed: 0,gmap_id,name,latitude,longitude,combined_categories,num_of_reviews,avg_rating,cluster,primary_category
0,1,0x80c2c778e3b73d33:0xbdc58662a4a97d49,San Soo Dang,34.058092,-118.292130,Korean restaurant,18,4.4,14,restaurant
1,5,0x80dd2b4c8555edb7:0xfc33d65c4bdbef42,Vons Chicken,33.916402,-118.010855,Restaurant,18,4.5,14,restaurant
2,46,0x808f879f35b5088b:0xe3541cec7a95bd88,TACOS LA CABANA,37.789076,-122.233884,Taco restaurant,2,5.0,14,restaurant
3,48,0x808f87f90c1f661f:0xf384e804a61e0c0b,Mariscos el poblano,37.764203,-122.214647,Restaurant,3,5.0,14,restaurant
4,59,0x80dcd95d192d988b:0x68795f58e35bf888,Off The Hoof,33.748329,-117.866045,Restaurant,3,4.0,14,restaurant
...,...,...,...,...,...,...,...,...,...,...
23905,299713,grBPIq_eJCT_SGuhvMrUZQ,Blaze Pizza,34.440160,-119.752421,"Fast Food, Restaurants, Pizza, Salad",146,3.5,12,restaurant
23906,300342,XQY1_EorK7FMCffiftN0fA,Papa John's Pizza,34.441749,-119.823092,"Restaurants, Food, Food Delivery Services, Pizza",52,2.5,12,restaurant
23907,300449,v5jxpGnKbx3vVxs28dgokQ,Saigon Vietnamese Restaurant,34.439514,-119.751631,"Restaurants, Vietnamese, Vegetarian",192,3.0,4,restaurant
23908,300630,TCWnTa69sJ3vGQsdEe3Esw,Uncle Roccos Famous NY Pizza,34.416585,-119.695533,"Pizza, Italian, Fast Food, Restaurants",317,2.5,12,restaurant


In [6]:
# procesar de datos
encoder = OneHotEncoder()
encoded_categories = encoder.fit_transform(df_business[['combined_categories']])

In [7]:
# unir con avg_rating
features = np.hstack([encoded_categories.toarray(), df_business[['avg_rating']].values])

In [8]:
# realizar calculo similitud de coseno
similarity_matrix = cosine_similarity(features)

In [9]:
# calcular distancia geográfica
coords = df_business[['latitude', 'longitude']].values
distance_matrix = cdist(coords, coords, metric='euclidean')


In [10]:
# Funcion de recomendacion
def recommend_business(id_business, top_n=5, max_distance=0.9):
    # obtener índice del negocio dado
    business_index = df_business[df_business['gmap_id'] == id_business].index[0]

    #obtener las similitudes 
    similarity_scores = list(enumerate(similarity_matrix[business_index]))
    
    #ordenar los negocios por similitud
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[1:]  # Omitimos el primer resultado que es el propio negocio
    
    # filtrar distancia geográfica
    nearby_businesses = [i for i in similarity_scores if distance_matrix[business_index][i[0]] <= max_distance]
    
    #si no hay suficientes negocios cercanos, achicar la restricción de distancia
    if len(nearby_businesses) < top_n:
        nearby_businesses = similarity_scores
    
    #filtrar  mejores avg_rating
    filtered_scores = sorted(nearby_businesses, key=lambda x: df_business.iloc[x[0]]['avg_rating'], reverse=True)
    
    #obtener indices
    similar_indices = [i[0] for i in filtered_scores[:top_n]]
    
    #retorno nombres de los negocios en forma de lista
    return df_business.iloc[similar_indices]['name'].tolist()




# Recomendaciones con negocios

In [11]:
business_index = '0x80c2c778e3b73d33:0xbdc58662a4a97d49'  # Índice  restaurante coreano
recommended_business_names = recommend_business(business_index, max_distance=0.5)
recommended_business_names

['Jook Hyang II',
 'Genghis Khan',
 'Sulga',
 'Western Soondae',
 'Choo Choo Train Udon']

In [16]:
business_index = '0x808f879f35b5088b:0xe3541cec7a95bd88'
recommended_business_names = recommend_business(business_index, max_distance=0.5)
recommended_business_names

['Tacos El Último Baile',
 'Comalito',
 'Tacos Zaragoza',
 'Mariscos el poblano',
 'Bridge Kitchen']

# ¡Muchas gracias!
# Equipo Jupix

In [None]:
## funciones de precision y recall
#def precision_at_k(recommended, relevant, k):
#    recommended_at_k = recommended[:k]
#    relevant_at_k = set(relevant)  # Convertir a conjunto para facilitar comparación
#    hits = len(set(recommended_at_k) & relevant_at_k)
#    return hits / len(recommended_at_k) if recommended_at_k else 0

# def recall_at_k(recommended, relevant, k):
#     recommended_at_k = recommended[:k]
#     relevant_at_k = set(relevant)
#     hits = len(set(recommended_at_k) & relevant_at_k)
#     return hits / len(relevant_at_k) if relevant_at_k else 0

# # Separar datos en entrenamiento y prueba
# from sklearn.model_selection import train_test_split

# # Separar datos en entrenamiento y prueba
# df_train, df_test = train_test_split(df_business, test_size=0.2, random_state=42)

# #  Definir la función para obtener recomendaciones
# def get_recommendations_for_business(id_business, top_n=5, max_distance=0.9):
#     return recommend_business(id_business, top_n=top_n, max_distance=max_distance)

# # Crear un diccionario para almacenar recomendaciones
# recommendations = {}

# for index, row in df_test.iterrows():
#     id_business = row['gmap_id']
#     recommended_businesses = get_recommendations_for_business(id_business)
#     recommendations[id_business] = recommended_businesses

# #  Calcular Precision y Recall
# precision_scores = []
# recall_scores = []
# top_n = 5

# relevant_by_category = {cat: df_train[df_train['combined_categories'] == cat].index.tolist() for cat in df_train['combined_categories'].unique()}

# for index, row in df_test.iterrows():
#     id_business = row['gmap_id']
#     recommended_businesses = recommendations.get(id_business, [])
    
#     relevant_businesses = relevant_by_category.get(row['combined_categories'], [])
    
#     precision = precision_at_k(recommended_businesses, relevant_businesses, top_n)
#     recall = recall_at_k(recommended_businesses, relevant_businesses, top_n)
    
#     precision_scores.append(precision)
#     recall_scores.append(recall)

# #  Calcula el promedio de precisión y recall
# average_precision = np.mean(precision_scores)
# average_recall = np.mean(recall_scores)

# print(f"Precision {top_n}: {average_precision}")
# print(f"Recall {top_n}: {average_recall}")


