# Main objective
To recommend a food business restaurant by location and to show where the main competitors are.

## How it works
This recommender uses offer, demand, satisfaction and trends to calculate a score.

* Offer <-- Inegi
* Demand <-- Google Trends
* Satisfaction <-- Yelp
* Trends <-- Google Trends Interest Over Time

#### Score = Demand + Trends - Offer - Satisfaction
Biggest score is the recommendation.

# Main libraries

In [1]:
from dotenv import load_dotenv
load_dotenv()

import os
import pandas as pd

# Inegi
In this section we are getting the offer.

We are using an Inegi database from https://inegi.org.mx/app/descarga/default.html

Currently, we are using a preprocessed database from an attached notebook.

In [2]:
def prepareInegi():
    inegi = pd.read_csv('inegi.csv')
    inegi.drop(columns=['nom_estab', 'per_ocu', 'fecha_alta', 'nom_processed', 'tipoUniEco', 'localidad'], inplace=True)
    return inegi

In [3]:
inegi = prepareInegi()
inegi.entidad.value_counts()

VERACRUZ DE IGNACIO DE LA LLAVE             48459
YUCATAN                                     17180
SAN LUIS POTOSÍ                             15655
TAMAULIPAS                                  15576
SINALOA                                     15321
SONORA                                      13198
QUERÉTARO                                   12448
TABASCO                                     11959
QUINTANA ROO                                11275
ZACATECAS                                    8474
TLAXCALA                                     8267
Name: entidad, dtype: int64

## Filtering by state and city

In [4]:
def getInegiByState(state):
    return inegi[inegi['entidad']==state]

### As use case for the demonstration we are selecting TAMAULIPAS

In [5]:
sinegi = getInegiByState('TAMAULIPAS')
sinegi.head()

Unnamed: 0,entidad,municipio,latitud,longitud,tipo,Cocina Económica,Antojitos,Pizza,Hamburguesas,Hot Dogs,...,Yucateca,Tabasqueña,Arrachera,Pastor,Birria,Barbacoa,Carnero,Pozole,Enchiladas,Chilaquiles
79856,TAMAULIPAS,Matamoros,25.869956,-97.514709,"['burg', 'burgu', 'hamburgues']",0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
79857,TAMAULIPAS,Altamira,22.391528,-97.930704,[],0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
79858,TAMAULIPAS,Valle Hermoso,25.670395,-97.815718,[],0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
79859,TAMAULIPAS,Victoria,23.729669,-99.152546,[],0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
79860,TAMAULIPAS,Altamira,22.33495,-97.865727,['restaur'],0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
sinegi['municipio'].value_counts()

Matamoros             2384
Tampico               2161
Reynosa               1972
Nuevo Laredo          1757
Victoria              1550
Ciudad Madero         1156
Altamira              1053
El Mante               667
Río Bravo              542
Valle Hermoso          380
Miguel Alemán          222
González               211
San Fernando           171
Tula                   154
Aldama                 134
Camargo                109
Gustavo Díaz Ordaz     108
Xicoténcatl            104
Soto la Marina          95
Jaumave                 87
Padilla                 85
Ocampo                  74
Hidalgo                 61
Mier                    48
Antiguo Morelos         37
Abasolo                 36
Llera                   36
Jiménez                 30
Nuevo Morelos           29
Gómez Farías            27
Guerrero                27
Villagrán               11
Güémez                  11
San Carlos              10
Burgos                   9
Miquihuana               7
Bustamante               5
P

In [7]:
def inegiFilterByMunicipio(municipio, df):
    return df.drop(df[df['municipio']!=municipio].index)

### As use case we are selecting Tampico as the city

In [8]:
sinegi = inegiFilterByMunicipio('Tampico', sinegi)
sinegi.head()

Unnamed: 0,entidad,municipio,latitud,longitud,tipo,Cocina Económica,Antojitos,Pizza,Hamburguesas,Hot Dogs,...,Yucateca,Tabasqueña,Arrachera,Pastor,Birria,Barbacoa,Carnero,Pozole,Enchiladas,Chilaquiles
79866,TAMAULIPAS,Tampico,22.273236,-97.874833,[],0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
79867,TAMAULIPAS,Tampico,22.255264,-97.861039,['com'],1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
79868,TAMAULIPAS,Tampico,22.262607,-97.859094,['com'],1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
79874,TAMAULIPAS,Tampico,22.213607,-97.849838,[],0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
79885,TAMAULIPAS,Tampico,22.302357,-97.860012,[],0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Obtaining offer by food type

In [9]:
foods = ['Cocina Económica', 'Antojitos', 'Pizza', 'Hamburguesas', 'Hot Dogs', 'Sushi', 'Tamales', 'Mariscos', 
         'Pescado', 'Tacos', 'Carne', 'Asada', 'Panuchos', 'Cochinita', 'Pollo', 'Desayunos', 'Tortas', 'Mondongo', 
         'Menudo', 'Memelas', 'Empanadas', 'Chicharrón', 'Gorditas', 'Costillas', 'Carnitas', 'Doraditas', 
         'Baguettes', 'Parrilla', 'Huaraches', 'Rosticería', 'China', 'Yucateca', 'Tabasqueña', 'Arrachera', 
         'Pastor', 'Birria', 'Barbacoa', 'Carnero', 'Pozole', 'Enchiladas', 'Chilaquiles']

In [10]:
def getInegiResults(df):
    results = {}

    for food in foods:
        results[food] = df[food].sum()

    return results

In [11]:
inegi_results = getInegiResults(sinegi)
inegi_results

{'Cocina Económica': 185,
 'Antojitos': 156,
 'Pizza': 31,
 'Hamburguesas': 31,
 'Hot Dogs': 23,
 'Sushi': 2,
 'Tamales': 39,
 'Mariscos': 45,
 'Pescado': 11,
 'Tacos': 344,
 'Carne': 0,
 'Asada': 69,
 'Panuchos': 0,
 'Cochinita': 9,
 'Pollo': 113,
 'Desayunos': 2,
 'Tortas': 252,
 'Mondongo': 1,
 'Menudo': 0,
 'Memelas': 0,
 'Empanadas': 9,
 'Chicharrón': 1,
 'Gorditas': 111,
 'Costillas': 1,
 'Carnitas': 38,
 'Doraditas': 0,
 'Baguettes': 5,
 'Parrilla': 11,
 'Huaraches': 1,
 'Rosticería': 5,
 'China': 0,
 'Yucateca': 3,
 'Tabasqueña': 0,
 'Arrachera': 2,
 'Pastor': 10,
 'Birria': 0,
 'Barbacoa': 26,
 'Carnero': 0,
 'Pozole': 0,
 'Enchiladas': 1,
 'Chilaquiles': 1}

## Adding offer to the main dataframe df

In [12]:
def getInegiDfResult(inegi_results):
    return pd.DataFrame(inegi_results.items(), columns = ['Index', 'InegiCount']).set_index('Index')

In [13]:
df = getInegiDfResult(inegi_results)
df.head()

Unnamed: 0_level_0,InegiCount
Index,Unnamed: 1_level_1
Cocina Económica,185
Antojitos,156
Pizza,31
Hamburguesas,31
Hot Dogs,23


# Google Trends
In this section we are getting the demand.

We are using the Google Trends endpoint https://trends.google.com/trends by means of the Pytrends API https://pypi.org/project/pytrends/


We check the trending of each food type in Mexico. Google trends gives information by state.

In [14]:
from pytrends.request import TrendReq

In [15]:
def getGoogleTrends(df):
    pytrends = TrendReq(hl='es-MX', tz=360)
    
    resultados = []
    for comida in foods:
        print(comida)
        pytrends.build_payload([comida], cat=71, geo='MX', gprop='')
        resultados.append(pytrends.interest_by_region(resolution='REGION', inc_low_vol=True, inc_geo_code=False))
    return resultados

In [16]:
google_trends = getGoogleTrends(sinegi)
google_trends

Cocina Económica
Antojitos
Pizza
Hamburguesas
Hot Dogs
Sushi
Tamales
Mariscos
Pescado
Tacos
Carne
Asada
Panuchos
Cochinita
Pollo
Desayunos
Tortas
Mondongo
Menudo
Memelas
Empanadas
Chicharrón
Gorditas
Costillas
Carnitas
Doraditas
Baguettes
Parrilla
Huaraches
Rosticería
China
Yucateca
Tabasqueña
Arrachera
Pastor
Birria
Barbacoa
Carnero
Pozole
Enchiladas
Chilaquiles


[                      Cocina Económica
 geoName                               
 Aguascalientes                       0
 Baja California                      0
 Baja California Sur                  0
 Campeche                             0
 Chiapas                              0
 Chihuahua                            0
 Ciudad de México                    21
 Coahuila de Zaragoza                 0
 Colima                               0
 Durango                              0
 Estado de México                    29
 Guanajuato                           0
 Guerrero                             0
 Hidalgo                              0
 Jalisco                              0
 Michoacán                            0
 Morelos                              0
 Nayarit                              0
 Nuevo León                           0
 Oaxaca                               0
 Puebla                               0
 Querétaro                            0
 Quintana Roo                         0


### Filtering by state

In [17]:
def getTrends(google_trends, state):
    requests = google_trends[0].join(google_trends[1:]).T
    return requests[state].sort_values(ascending=False)

In [18]:
trends =  getTrends(google_trends, 'Tamaulipas')
trends

Tamales             100
Pollo                89
Gorditas             82
Carne                81
Chicharrón           75
Menudo               70
Asada                66
Pizza                62
Hamburguesas         59
Empanadas            59
Pescado              56
Enchiladas           56
Desayunos            50
Costillas            50
Hot Dogs             45
Pastor               44
Mariscos             44
Carnitas             44
Cochinita            41
Chilaquiles          37
Arrachera            36
Barbacoa             35
China                32
Antojitos            29
Tacos                28
Mondongo             27
Huaraches            22
Tortas               21
Pozole               21
Carnero              19
Parrilla             18
Memelas              18
Panuchos             12
Birria               12
Sushi                 8
Yucateca              2
Doraditas             0
Tabasqueña            0
Rosticería            0
Baguettes             0
Cocina Económica      0
Name: Tamaulipas

## Adding demand to the main dataframe df

In [19]:
df['GoogleTrend'] = trends
df

Unnamed: 0_level_0,InegiCount,GoogleTrend
Index,Unnamed: 1_level_1,Unnamed: 2_level_1
Cocina Económica,185,0
Antojitos,156,29
Pizza,31,62
Hamburguesas,31,59
Hot Dogs,23,45
Sushi,2,8
Tamales,39,100
Mariscos,45,44
Pescado,11,56
Tacos,344,28


## Interest over time

In [20]:
def getGoogleIOT():
    pytrends = TrendReq(hl='es-MX', tz=360)
    iot = {}
    for comida in foods:
        print(comida)
        pytrends.build_payload([comida], cat=71, geo='MX', gprop='')
        iot[comida] = pytrends.interest_over_time()
        
    return iot

In [None]:
iot = getGoogleIOT()
iot

Cocina Económica
Antojitos
Pizza
Hamburguesas
Hot Dogs
Sushi
Tamales
Mariscos
Pescado
Tacos
Carne
Asada
Panuchos
Cochinita
Pollo
Desayunos
Tortas


In [None]:
import matplotlib.pyplot as plt

x = [i+1 for i in range(len(iot['Tortas']['Tortas']))]

#for food in foods:
plt.figure(figsize=(16,4))

plt.subplot(1,2,1)
plt.title('Tortas')
plt.plot(x, iot['Tortas']['Tortas'].values);

plt.subplot(1,2,2)
plt.title('Tortas')
plt.plot(x, iot['Tacos']['Tacos'].values);

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

def getTrendSlope(foods, iot):
    x = [i+1 for i in range(len(iot['Desayunos']['Desayunos']))]
    linreg = LinearRegression()

    slope = {}
    for food in foods:
        linreg.fit(np.array(x).reshape(-1,1), iot[food][food].values)
        slope[food] = linreg.coef_[0]

    return slope

In [None]:
slope = getTrendSlope(foods, iot)
slope

In [None]:
plt.figure(figsize=(16,4))

plt.subplot(1,2,1)
plt.plot(x, iot['Tortas']['Tortas'].values);
plt.title('Tortas')

print('Tortas slope:',slope['Tortas'])
yslope = [50.3 + i*slope['Tortas'] for i in range(len(iot['Tortas']['Tortas']))]
plt.plot(x, yslope);

plt.subplot(1,2,2)
plt.plot(x, iot['Tacos']['Tacos'].values);
plt.title('Tacos')

print('Tacos slope:',slope['Tacos'])
yslope = [38.145240200412616 + i*slope['Tacos'] for i in range(len(iot['Tacos']['Tacos']))]
plt.plot(x, yslope);

## Adding trending to the main dataframe df

In [None]:
def addSlopeToDf(df):
    df['slope'] = ''
    for key in slope:
        df['slope'].loc[key] = slope[key]
    df.slope = df.slope.astype('float64')
    
    return df

In [None]:
df = addSlopeToDf(df)
df.head()

# User location geocode with GeoPy

In [None]:
from geopy.geocoders import Nominatim
#user_input = input('Introduce lugar:')

def userLocationGeocoding(string):
    geolocator = Nominatim(user_agent="food-business-recommender")
    return geolocator.geocode(string)

### As use case we are selecting 'unidad modelo' as the neiborghood

In [None]:
location = userLocationGeocoding('unidad modelo tampico tamaulipas')
location

# Yelp
In this section we are getting the satisfaction.

We are using the endpoint https://api.yelp.com/v3/businesses/search from the Yelp API https://www.yelp.com/developers

In [None]:
import requests
from bs4 import BeautifulSoup as bs
import json

## Searching for food business on the user location

In [None]:
def searchYelp(foods, location):
    api_key = os.getenv("YELP_API_KEY")
    endpoint = 'https://api.yelp.com/v3/businesses/search?'
    
    yelp_search = []
    for comida in foods:
        term = 'term={}&'.format(comida)
        print(term)
        
        latitude = 'latitude='+str(location.latitude)+'&'
        longitude = 'longitude='+str(location.longitude)+'&'
        locale = 'locale=es_MX&'
        radius = 'radius=3000&'
        limit = 'limit=50' 
        
        res = requests.get(endpoint+term+latitude+longitude+locale+radius+limit, headers={'Authorization':'Bearer '+api_key})
        
        soup = bs(res.content)
        yelp_json = json.loads(soup.findAll('p')[0].text)
        
        ydf = pd.DataFrame(yelp_json['businesses'])
        ydf['tipo'] = comida
        yelp_search.append(ydf)

    return yelp_search

In [None]:
yelp_search = searchYelp(foods, location)
yelp_search

In [None]:
type(yelp_search[0])

## Creating a dataframe with the Yelp searches

In [None]:
def getYelpDf(yelp_search):
    lugares = pd.concat(yelp_search, axis=0, sort=False)
    lugares.reset_index(inplace=True)

    ratings = dict(lugares.tipo.value_counts())
    for comida in foods:
        ratings[comida] = lugares[lugares.tipo == comida]['rating'].mean()

    yf = pd.concat(yelp_search, sort=False)
    yf.drop(columns = ['alias', 'display_phone', 'location', 'id', 'image_url', 'is_closed', 'phone', 'transactions', 'url'], inplace=True)
    yf.reset_index(inplace=True)
    return yf

In [None]:
yf = getYelpDf(yelp_search)
yf

## Adding satisfaction to the main dataframe df

In [None]:
df['YelpRating'] = yf.groupby('tipo').mean().rating
df

# Score

In [None]:
df.head()

## Changing column names in order to understand them better

In [None]:
df.rename(columns={'InegiCount':'Offer', 'GoogleTrend':'Demand', 'slope':'Trending', 'YelpRating':'Satisfaction'}, inplace=True)
df.head()

In [None]:
for column in df.columns:
    df[column] = df[column]/df[column].max()
    
df.head()

## Calculating score

In [None]:
def calculateScore(df):
    #df['score'] = df.Demand/100 + df.Trending/df.Trending.max() - df.Satisfaction/5 - df.Offer/df.Offer.max()
    df['score'] = df.Demand + df.Trending - df.Satisfaction - df.Offer
    return df.sort_values(by='score', ascending=False).head(3)

In [None]:
df = calculateScore(df)
df

# Yelp top competitors by location

In [None]:
from pandas.io.json import json_normalize

## Extracting coordinates from the Yelp dataframe

In [None]:
def flatLatLong(yf):
    yf[['latitude', 'longitude']] = json_normalize(yf.coordinates)
    return yf

In [None]:
yf = flatLatLong(yf)
yf

## Looking for the top competitors

In [None]:
def getTopYf(yf):
    top_yf = pd.DataFrame(columns = yf.columns)
    top_types = df.sort_values(by='score', ascending=False).head().index
    
    for top in top_types:
        top_yf = pd.concat([top_yf, yf[yf.tipo==top].sort_values(by='rating', ascending=False).head(10)])

    return top_yf

In [None]:
top_yf = getTopYf(yf)
top_yf

In [None]:
def getTopCompetitors(top_yf):
    top_yf_list = []
    for tipo in top_yf.tipo.unique():
        toadd = {'tipo': tipo}
        for index in top_yf[top_yf.tipo==tipo].index:
            toadd['name'] = top_yf.name.loc[index]
            toadd['coords'] = [top_yf.latitude.loc[index], top_yf.longitude.loc[index]]
            top_yf_list.append(toadd)

    return top_yf_list

In [None]:
getTopCompetitors(top_yf)

# Foursquare

In [None]:
import foursquare

In [None]:
def getFsResults(top_yf):
    client_id = os.getenv("FS_CLIENT_ID")
    client_secret = os.getenv("FS_CLIENT_SECRET")
    client = foursquare.Foursquare(client_id=client_id, client_secret=client_secret)

    fs_results = {}
    comida = top_yf.tipo.unique()[0]
    #for comida in top_yf.tipo.unique():
    #    print(comida)
    fs_results[comida] = client.venues.search(params={'query': comida, 'intent':'checkin', 'll': str(location.latitude)+', '+str(location.longitude), 'radius':3000, 'limit':10})

    return fs_results

In [None]:
def getFsResults(top_yf):
    client_id = os.getenv("FS_CLIENT_ID")
    client_secret = os.getenv("FS_CLIENT_SECRET")
    client = foursquare.Foursquare(client_id=client_id, client_secret=client_secret)

    fs_results = []
    comida = top_yf.tipo.unique()[0]
    #for comida in top_yf.tipo.unique():
    #    print(comida)
    fs_results.append(client.venues.search(params={'query': comida, 'intent':'checkin', 'll': str(location.latitude)+', '+str(location.longitude), 'radius':3000, 'limit':10}))

    return fs_results[0]['venues']

In [None]:
fs_results = getFsResults(top_yf)
fs_results

In [None]:
'''
def getFsLocs(fs_results):
    fs_locs = {}
    for result in fs_results:
        result_locs = []
        for i in range(len(fs_results[result]['venues'])):
            loc = {}
            loc[fs_results[result]['venues'][i]['name']] = []
            loc[fs_results[result]['venues'][i]['name']].append(fs_results[result]['venues'][i]['location']['lat'])
            loc[fs_results[result]['venues'][i]['name']].append(fs_results[result]['venues'][i]['location']['lng'])
            result_locs.append(loc)
        fs_locs[result] = result_locs

    return fs_locs
'''

In [None]:
def getFsLocs(fs_results):
    fs_locs = []
    for venue in fs_results:
        fs_locs.append({'name':venue['name'], 'coords':[venue['location']['lat'], venue['location']['lng']]})

    return fs_locs

In [None]:
fs_locs = getFsLocs(fs_results)
fs_locs

## Folium map

In [None]:
import folium

In [None]:
top_types = df.sort_values(by='score', ascending=False).head().index

mapa=folium.Map(location=[location.latitude, location.longitude], tiles='openstreetmap',zoom_start=12)

five_colors = ['red', 'orange', 'green']
colors = dict(zip(top_yf.tipo.unique(), five_colors))

folium.Circle([location.latitude, location.longitude], radius=3000, icon=folium.Icon()).add_to(mapa)

for loc in fs_locs:
    folium.Marker(list(loc['coords']), popup='<b>'+ loc['name'] +'</b>', tooltip=top_yf.tipo.unique()[0], icon=folium.Icon(color='gray', icon='info-sign')).add_to(mapa)
    
for i in top_yf.index:
    folium.Marker(list(top_yf.loc[i][['latitude', 'longitude']]), popup='<b>'+top_yf.loc[i]['name']+'</b>', tooltip=top_yf.loc[i]['tipo'], icon=folium.Icon(color=colors[top_yf.loc[i]['tipo']], icon='info-sign')).add_to(mapa)

mapa