In [25]:
#Mount my Google Drive
from google.colab import drive
drive.mount("/content/drive")
import os
directory = '/content/drive/My Drive/CSC575 Final Project'
os.chdir(directory)

#Ensure the files are there (in the folder)
!pwd

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/CSC575 Final Project


In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML
from google.colab import files
#Text preprocessing.
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')
#Warnings.
import warnings
warnings.filterwarnings("ignore")
#Geo location.
import folium
from geopy.geocoders import Nominatim
#String process.
import ast

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## **Task 1: Business Recommendation by Table Lookup**

In [None]:
businessData_original = pd.read_csv('businesses.csv', sep='\t', encoding = 'ISO-8859-1')
print(f'Business data: Row - {businessData_original.shape[0]} | Column - {businessData_original.shape[1]}')

Business data: Row - 100000 | Column - 15


In [None]:
businessData_original.head()

Unnamed: 0.1,Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,34.426679,-119.711197,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",
1,1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,38.551126,-90.335695,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ..."
2,2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,32.223236,-110.880452,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ..."
3,3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
4,4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2..."


#### **Convert 'categories' column for query search. For example 'Local Services' will become 'LocalServices'.**

In [None]:
#Make a copy.
businessData = businessData_original
def processCategories(x):
  '''Convert column for query search.'''

  if isinstance(x, str):
    return ', '.join(category.replace(' ', '') for category in x.split(','))
  else:
    return np.nan
businessData['categories'] = businessData['categories'].apply(processCategories)
businessData.head()

Unnamed: 0.1,Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,34.426679,-119.711197,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, TraditionalChineseMedicine, Naturopat...",
1,1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,38.551126,-90.335695,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"ShippingCenters, LocalServices, Notaries, Mail...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ..."
2,2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,32.223236,-110.880452,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","DepartmentStores, Shopping, Fashion, Home&Gard...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ..."
3,3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, BubbleTea, Coffee&Tea, Bake...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
4,4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2..."


### **Recommendation function.**

In [None]:
def getRec(query, numRec=100000, data=businessData):
    '''Get recommendation places based on a query.
    Accepting template | Best Categories1 Categories2 in City that have/has/are Attribute
    'Best' and Categories1 are optional.'''

    queryList = query.split(" ")

    establishmentType_List = None
    city = None
    attribute = None

    #If the query starts with 'Best'.
    if queryList[0] == 'Best':
        establishment_endIndex = queryList.index('in')
        establishmentType_List = queryList[1 : establishment_endIndex]

    #Query starts with business type.
    else:
        establishment_endIndex = queryList.index('in')
        establishmentType_List = queryList[0 : establishment_endIndex]

    #Iterate through queryList to extract city and services.
    for i, word in enumerate(queryList):
        if word == 'in' and i + 1 < len(queryList):
            #Extract the city after 'in'.
            city_startIndex = i + 1
            city_endIndex = queryList.index('that') if 'that' in queryList else len(queryList)
            city = ' '.join(queryList[city_startIndex : city_endIndex]).title()
            print("City:", city)

        #If services included in the query.
        elif word == 'that' and i + 2 < len(queryList):
            #Extract the services after 'that'.
            attribute = ' '.join(queryList[i + 2:])
            print("Services:", attribute)

    #Capitalize to look up.
    establishmentType_List = [estType.lower() for estType in establishmentType_List]
    print("Establishment Types:", establishmentType_List)

    if establishmentType_List or city or attribute:
      #Get city & 1st category.
      queryRes = data[(data['city'] == city) & data['categories'].str.lower().str.contains(establishmentType_List[0], na=False)]
      #Get attribute.
      if attribute is not None:
        queryRes = queryRes[data['attributes'].astype(str).str.contains(fr"'{attribute}': 'True'")]

      #Get 2nd category.
      if len(establishmentType_List) > 1:
        queryRes = queryRes[data['categories'].str.lower().str.contains(establishmentType_List[1], na=False)]

      if 'Best' in queryList:
        #If 'Best' is in the query, filter by stars > 4.0
        queryRes = queryRes[queryRes['stars'] > 4.0]

      #Sorted according to the 'stars', then 'review_count', then 'name' (alphabetically).
      #Also extract 'latitude' & 'longitude' for geo map.
      queryRes = queryRes.sort_values(by=['stars', 'review_count', 'name'],
                 ascending=[False, False, True]).head(numRec)[['stars', 'name', 'review_count', 'categories', 'latitude', 'longitude']]

      return queryRes, city

#### **Best restaurants in Toronto --> No Toronto City entry. Have double check on Kaggle.**

In [None]:
queryRes_1, city = getRec('Best restaurants in Toronto', 5)
display(queryRes_1)

City: Toronto
Establishment Types: ['restaurants']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude


In [None]:
#Plain query.
queryPlain_1 = businessData_original[
    (businessData_original['city'] == 'Toronto') &
    (businessData_original['categories'].str.contains('Restaurants'))]

queryPlain_1 = queryPlain_1.sort_values(by=['stars', 'review_count', 'name'],
               ascending=[False, False, True]).head(5)[['stars', 'name', 'review_count', 'categories']]
queryPlain_1

Unnamed: 0,stars,name,review_count,categories


#### **Best Chinese restaurants in Las Vegas --> No Las Vegas City entry. . Have double check on Kaggle.**

In [None]:
queryRes_2, city = getRec('Best Chinese restaurants in Las Vegas', 5)
display(queryRes_2)

City: Las Vegas
Establishment Types: ['chinese', 'restaurants']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude


In [None]:
#Plain query.
queryPlain_2 = businessData_original[
    (businessData_original['city'] == 'Las Vegas') &
    (businessData_original['categories'].str.contains('Restaurants', na=False) &
    (businessData_original['categories'].str.contains('Chinese', na=False)))]

queryPlain_2 = queryPlain_2.sort_values(by=['stars', 'review_count', 'name'],
               ascending=[False, False, True]).head(5)[['stars', 'name', 'review_count', 'categories']]
display(queryPlain_2)

Unnamed: 0,stars,name,review_count,categories


#### **Pubs in Philadelphia that are WheelchairAccessible.**

In [None]:
queryRes_3, city = getRec('Pubs in Philadelphia that are WheelchairAccessible', 5)
display(queryRes_3)

City: Philadelphia
Services: WheelchairAccessible
Establishment Types: ['pubs']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude
94471,4.5,Bar Hygge,387,"Food, Restaurants, Breweries, ComfortFood, Bre...",39.967125,-75.166124
53651,4.5,Glory Beer Bar & Kitchen,203,"American(New), LocalFlavor, Bars, Restaurants,...",39.948179,-75.143545
79730,4.5,Love City Brewing Company,162,"Brewpubs, Breweries, Nightlife, Bars, Food, Ba...",39.96031,-75.155415
1106,4.5,Chase's Hop Shop,116,"ChickenWings, Nightlife, Bars, Delis, Food, Be...",40.060386,-75.08459
78757,4.5,Original 13 Ciderworks,65,"American(Traditional), Food, Restaurants, Bars...",39.974598,-75.140215


In [None]:
queryPlain_3 = businessData[
    (businessData['city'] == 'Philadelphia') &
    (businessData['categories'].str.contains('pubs', case=False)) &
    businessData['attributes'].astype(str).str.contains(r"'WheelchairAccessible': 'True'")]

queryPlain_3 = queryPlain_3.sort_values(by=['stars', 'review_count', 'name'],
               ascending=[False, False, True]).head(5)[['stars', 'name', 'review_count', 'categories', 'attributes', 'city']]
display(queryPlain_3)

Unnamed: 0,stars,name,review_count,categories,attributes,city
94471,4.5,Bar Hygge,387,"Food, Restaurants, Breweries, ComfortFood, Bre...","{'DogsAllowed': 'False', 'WheelchairAccessible...",Philadelphia
53651,4.5,Glory Beer Bar & Kitchen,203,"American(New), LocalFlavor, Bars, Restaurants,...","{'OutdoorSeating': 'True', 'RestaurantsTakeOut...",Philadelphia
79730,4.5,Love City Brewing Company,162,"Brewpubs, Breweries, Nightlife, Bars, Food, Ba...","{'BusinessAcceptsBitcoin': 'False', 'BikeParki...",Philadelphia
1106,4.5,Chase's Hop Shop,116,"ChickenWings, Nightlife, Bars, Delis, Food, Be...","{'RestaurantsReservations': 'False', 'Alcohol'...",Philadelphia
78757,4.5,Original 13 Ciderworks,65,"American(Traditional), Food, Restaurants, Bars...","{'BikeParking': 'True', 'RestaurantsDelivery':...",Philadelphia


#### **Best Chinese restaurants in Philadelphia.**

In [None]:
queryRes_4, city = getRec('Best Chinese restaurants in Philadelphia', 5)
display(queryRes_4)

City: Philadelphia
Establishment Types: ['chinese', 'restaurants']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude
41818,5.0,Far East Chinese Cuisine,28,"Cantonese, Chinese, Restaurants",40.019695,-75.174272
67395,5.0,House of Chen,10,"Restaurants, Chinese",39.955173,-75.155511
23052,5.0,Peking Garden,9,"American(Traditional), Chinese, Restaurants",39.986448,-75.155695
63522,5.0,Paradise Inn,8,"Chinese, Restaurants",39.974099,-75.181294
12494,5.0,Jade Palace,6,"Restaurants, Chinese",39.926865,-75.154242


In [None]:
#Plain query.
queryPlain_4 = businessData_original[
    (businessData_original['city'] == 'Philadelphia') &
    (businessData_original['categories'].str.contains('Restaurants', na=False) &
    (businessData_original['categories'].str.contains('Chinese', na=False)))]

queryPlain_4 = queryPlain_4.sort_values(by=['stars', 'review_count', 'name'],
               ascending=[False, False, True]).head(5)[['stars', 'name', 'review_count', 'categories']]
display(queryPlain_4)

Unnamed: 0,stars,name,review_count,categories
41818,5.0,Far East Chinese Cuisine,28,"Cantonese, Chinese, Restaurants"
67395,5.0,House of Chen,10,"Restaurants, Chinese"
23052,5.0,Peking Garden,9,"American(Traditional), Chinese, Restaurants"
63522,5.0,Paradise Inn,8,"Chinese, Restaurants"
12494,5.0,Jade Palace,6,"Restaurants, Chinese"


### **Follow-up from the previous query.**
#### **Business hours for "DeSandro on Main" in Philadelphia for Friday.**

In [None]:
desandroEst = businessData_original[(businessData_original['city'] == 'Philadelphia') &
                                    (businessData_original['name'] == 'DeSandro on Main')]
display(desandroEst)

desandroEst_hoursString = desandroEst['hours'].iloc[0]
print(f"DeSandro on Main full hours: {desandroEst_hoursString}")
desandroEst_hoursDict = ast.literal_eval(desandroEst_hoursString) if isinstance(desandroEst_hoursString, str) else {}
fridayHours = desandroEst_hoursDict.get('Friday', 'Placeholder')
print(f"DeSandro on Main Friday hours: {fridayHours}")


Unnamed: 0.1,Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
31,31,Mjboz24M9NlBeiOJKLEd_Q,DeSandro on Main,4105 Main St,Philadelphia,PA,19127,40.022466,-75.218314,3.0,41,0,"{'RestaurantsReservations': 'False', 'Caters':...","Pizza, Restaurants, Salad, Soup","{'Tuesday': '17:0-21:30', 'Wednesday': '17:0-1..."


DeSandro on Main full hours: {'Tuesday': '17:0-21:30', 'Wednesday': '17:0-1:30', 'Thursday': '17:0-1:30', 'Friday': '17:0-0:30', 'Saturday': '17:0-0:30', 'Sunday': '17:0-21:30'}
DeSandro on Main Friday hours: 17:0-0:30


#### **Business hours and Pros/Cons for "North Avenue Collective" in Chicago.**
We chose this just because of 'Chicago' - We want to look up businesses in the city.

In [None]:
northAve_Collective_Est = businessData_original[(businessData_original['city'] == 'Chicago')]
display(northAve_Collective_Est)

print(f"Business found in Chicago City: {northAve_Collective_Est['name'].iloc[0]}")
northAve_Collective_Est_hoursString = northAve_Collective_Est['hours'].iloc[0]
print(f"Full hours: {northAve_Collective_Est_hoursString} \n")

attributesString = northAve_Collective_Est['attributes'].iloc[0]
attributesDict = ast.literal_eval(attributesString) if isinstance(attributesString, str) else {}
consList = []
prosList = []
for key, value in attributesDict.items():
  if key == 'BusinessParking':
    parkingDict = value if isinstance(value, dict) else ast.literal_eval(value)
    for babyKey, babyValue in parkingDict.items():
        if babyValue == 'True':
          prosList.append(babyKey)
        elif babyValue == 'False':
          consList.append(babyKey)
  else:
     if value == 'True':
          prosList.append(key)
     elif value == 'False':
          consList.append(key)

print(f'Business Pros/Advantages: {prosList}')
print(f'Business Cons/Disadvantages: {consList}')

Unnamed: 0.1,Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
58315,58315,PbxGUBMCnQydOwAu4FAzNA,North Avenue Collective,2511 W North Ave,Chicago,IL,62035,38.924188,-90.201,5.0,11,0,"{'ByAppointmentOnly': 'False', 'RestaurantsPri...","Used, Vintage&Consignment, Shopping, HomeDecor...","{'Thursday': '10:0-18:0', 'Friday': '10:0-18:0..."


Business found in Chicago City: North Avenue Collective
Full hours: {'Thursday': '10:0-18:0', 'Friday': '10:0-18:0', 'Saturday': '10:0-18:0', 'Sunday': '12:0-16:0'} 

Business Pros/Advantages: ['WheelchairAccessible', 'BusinessAcceptsCreditCards', 'DogsAllowed', 'BikeParking']
Business Cons/Disadvantages: ['ByAppointmentOnly', 'BusinessAcceptsBitcoin']


### **Geo Map.**

In [None]:
def geoRec(query, numRec):
  '''Show recommendation as map.'''

  queryRes, city = getRec(query, numRec)
  display(queryRes)
  print('\n')
  geolocator = Nominatim(user_agent="my_geocoder")

  #Get the latitude and longitude of the city.
  cityLocation = geolocator.geocode(city, timeout=None)
  cityLatitude, cityLongitude = cityLocation.latitude, cityLocation.longitude

  #Folium map using city as the center.
  mapCenter = [cityLatitude, cityLongitude]
  mapCity = folium.Map(location=mapCenter, zoom_start=12, width=700, height=500)

  #Add a marker for the city.
  folium.Marker(location=mapCenter, popup=f"City: {city}").add_to(mapCity)

  #Map each establishment in queryRes.
  for index, row in queryRes.iterrows():
    #Use respective latitude and longitude from businesses data.
    establishmentLocation = [row['latitude'], row['longitude']]
    folium.Marker(location=establishmentLocation, popup=row['name']).add_to(mapCity)

  display(mapCity)

In [None]:
geoRec('Pubs in Philadelphia that are WheelchairAccessible', 5)

City: Philadelphia
Services: WheelchairAccessible
Establishment Types: ['pubs']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude
94471,4.5,Bar Hygge,387,"Food, Restaurants, Breweries, ComfortFood, Bre...",39.967125,-75.166124
53651,4.5,Glory Beer Bar & Kitchen,203,"American(New), LocalFlavor, Bars, Restaurants,...",39.948179,-75.143545
79730,4.5,Love City Brewing Company,162,"Brewpubs, Breweries, Nightlife, Bars, Food, Ba...",39.96031,-75.155415
1106,4.5,Chase's Hop Shop,116,"ChickenWings, Nightlife, Bars, Delis, Food, Be...",40.060386,-75.08459
78757,4.5,Original 13 Ciderworks,65,"American(Traditional), Food, Restaurants, Bars...",39.974598,-75.140215






## **Come up with 5 queries.**

In [None]:
geoRec('Health&Medical in Reno that are AcceptsInsurance', 5)

City: Reno
Services: AcceptsInsurance
Establishment Types: ['health&medical']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude
38349,5.0,Advanced Health Chiropractic,56,"PhysicalTherapy, Health&Medical, Beauty&Spas, ...",39.514546,-119.859537
5071,5.0,Northern Nevada Chiropractic,49,"MassageTherapy, Chiropractors, DiagnosticImagi...",39.47322,-119.766852
9299,5.0,Peak Performance Chiropractic,44,"Chiropractors, MassageTherapy, Health&Medical",39.489066,-119.808813
96681,5.0,Alford Pediatric & General Dentistry,35,"GeneralDentistry, Dentists, CosmeticDentists, ...",39.517828,-119.882745
26034,5.0,Active Health Spine & Sport,30,"SportsMedicine, Health&Medical, Chiropractors,...",39.51366,-119.80721






In [None]:
geoRec('Best Beauty&Spas in Nashville that are BusinessAcceptsCreditCards', 5)

City: Nashville
Services: BusinessAcceptsCreditCards
Establishment Types: ['beauty&spas']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude
33028,5.0,Spa Haus Nashville,161,"Cosmetics&BeautySupply, Reiki, SkinCare, Eyela...",36.115331,-86.765821
28969,5.0,East Nails & Spa,90,"Beauty&Spas, NailSalons, SkinCare, HairRemoval...",36.182924,-86.749573
99996,5.0,61Five Health & Wellness,78,"IVHydration, Beauty&Spas, MedicalCenters, Nutr...",36.153587,-86.799671
27761,5.0,Bucca Reflexology And Foot Spa,76,"Beauty&Spas, Reflexology, Massage, Health&Medical",36.127826,-86.842759
95022,5.0,The Wax Pot Studio,65,"Massage, EyelashService, Beauty&Spas, HairRemo...",36.144598,-86.795255






In [None]:
geoRec('FastFood Restaurants in Ashland City', 5)

City: Ashland City
Establishment Types: ['fastfood', 'restaurants']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude
43975,2.0,McDonald's,14,"Restaurants, FastFood, Burgers, Coffee&Tea, Food",36.273119,-87.062329
5,2.0,Sonic Drive-In,6,"Burgers, FastFood, Sandwiches, Food, IceCream&...",36.269593,-87.058943
67798,1.5,KFC,6,"Restaurants, ChickenWings, FastFood, ChickenShop",36.270909,-87.059858
64266,1.0,Burger King,9,"Restaurants, FastFood, Burgers",36.249847,-87.033478






In [None]:
#This query will return empty since there is no fastfood restaurants in Ashland City with stars >= 4.0.
geoRec('Best FastFood Restaurants in Ashland City', 5)

City: Ashland City
Establishment Types: ['fastfood', 'restaurants']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude






In [None]:
geoRec('Italian Restaurants in Affton that do Caters', 5)

City: Affton
Services: Caters
Establishment Types: ['italian', 'restaurants']


Unnamed: 0,stars,name,review_count,categories,latitude,longitude
8,3.0,Tsevi's Pub And Grill,19,"Pubs, Restaurants, Italian, Bars, American(Tra...",38.565165,-90.321087
30500,2.5,Olive Garden Italian Restaurant,87,"Bars, Restaurants, WineBars, Nightlife, Soup, ...",38.523708,-90.356832






In [27]:
!jupyter nbconvert --to html CSS575-Final-Project-Task1.ipynb

[NbConvertApp] Converting notebook CSS575-Final-Project-Task1.ipynb to html
[NbConvertApp] Writing 833409 bytes to CSS575-Final-Project-Task1.html
