## A Recommedation System for Tourism agency in DownTown Toronto 

In [1]:
#import the packages
import pandas as pd
import numpy as np
import json
import requests
import geocoder
import folium
from pandas.io.json import json_normalize
from bs4 import BeautifulSoup

In [4]:
#Lets import the toronto geo coordiantes and postal codes file
df_toronto = pd.read_csv("df_tornto_coords.csv").drop(df_toronto.columns[0],axis=1)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
1,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
2,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor",43.628841,-79.520999
3,M9M,North York,"Emery, Humberlea",43.724766,-79.532242
4,M1R,Scarborough,"Maryvale, Wexford",43.750071,-79.295849


### Let's Visualize Toronto in World Map

In [5]:
toronto_latitude = 43.6932
toronto_longitude = -79.3832

map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10)

# add markers to each coordiates of the boroughs
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

### This above map showing all the neghborhoods of each borough in Toronto City. But we would like to explore more on "Downtown Toronto" Borough  and it's Neighborhoods

In [10]:
# selecting only neighborhoods regarding to "downtown_toronto" borough.
downtown_data = df_toronto[df_toronto['Borough'] == 'Downtown Toronto']
downtown_data = downtown_data.reset_index(drop=True)
downtown_data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
1,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
2,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Sou...",43.628947,-79.39442
3,M5L,Downtown Toronto,Commerce Court,43.648198,-79.379817
4,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937


## Visualize the Downtown Toronto and its Neighborhoods on the World Map

In [14]:
# get the coordinates of the Downtown Toronto
latitude_downtown = 43.6543
longitude_downtown = -79.3860

map_Downtown = folium.Map(location=[latitude_downtown, longitude_downtown], zoom_start=11)

# add markers to map
for lat, lng, label in zip(downtown_data['Latitude'], downtown_data['Longitude'], downtown_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map_Downtown)  
    
map_Downtown

## Foursqure API to get nearest venues and other places around Downtown

In [15]:
# define the foursquare developer credentials
CLIENT_ID = '0MJA3NYYG3U2ZY1LTZN2OYEHS3Y3WVSON2GBSO3IL4EDYVIR' # your Foursquare ID
CLIENT_SECRET = 'WGWSAF2TKVUQPE3PD0N3EOITFVBY5EYP1VCZI3BMUG0ROUS5' # your Foursquare Secret
VERSION = '20181213' # Foursquare API version

In [16]:
def foursquare_crawler(postal_code_list, neighborhood_list, lat_list, lng_list, LIMIT = 500, radius = 1000):
    result_ds = []
    counter = 0
    for postal_code, neighborhood, lat, lng in zip(postal_code_list, neighborhood_list, lat_list, lng_list):
         
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code; tmp_dict['Neighborhood(s)'] = neighborhood; 
        tmp_dict['Latitude'] = lat; tmp_dict['Longitude'] = lng;
        tmp_dict['Crawling_result'] = results;
        result_ds.append(tmp_dict)
        counter += 1
        print('Details received for {} SUCCESSFULLY.'.format(neighborhood))
    return result_ds;


In [19]:
#Call the foursquare api to get all the near venue details for the Downtown
downtown_foursquare_dataset = foursquare_crawler(list(downtown_data['PostalCode']),
                                                   list(downtown_data['Neighbourhood']),
                                                   list(downtown_data['Latitude']),
                                                   list(downtown_data['Longitude']),)


Details received for Berczy Park SUCCESSFULLY.
Details received for Rosedale SUCCESSFULLY.
Details received for CN Tower, King and Spadina, Railway Lands, South Niagara SUCCESSFULLY.
Details received for Commerce Court SUCCESSFULLY.
Details received for Ryerson SUCCESSFULLY.
Details received for Toronto Islands, Union Station SUCCESSFULLY.
Details received for Design Exchange, Toronto Dominion Centre SUCCESSFULLY.
Details received for University of Toronto SUCCESSFULLY.
Details received for Adelaide, King SUCCESSFULLY.
Details received for Church and Wellesley SUCCESSFULLY.
Details received for St. James Town SUCCESSFULLY.
Details received for First Canadian Place, Underground city SUCCESSFULLY.
Details received for Chinatown, Grange Park, Kensington Market SUCCESSFULLY.
Details received for Cabbagetown, St. James Town SUCCESSFULLY.
Details received for Harbourfront, Regent Park SUCCESSFULLY.


In [21]:
downtown_foursquare_dataset

[{'Postal Code': 'M5E',
  'Neighborhood(s)': 'Berczy Park',
  'Latitude': 43.6447708,
  'Longitude': -79.3733064,
  'Crawling_result': [{'reasons': {'count': 0,
     'items': [{'summary': 'This spot is popular',
       'type': 'general',
       'reasonName': 'globalInteractionReason'}]},
    'venue': {'id': '4b56a44ff964a5206e1728e3',
     'name': 'The Keg Steakhouse + Bar',
     'location': {'address': '26 The Esplanade',
      'lat': 43.64667637593993,
      'lng': -79.37482154865866,
      'labeledLatLngs': [{'label': 'display',
        'lat': 43.64667637593993,
        'lng': -79.37482154865866}],
      'distance': 244,
      'cc': 'CA',
      'city': 'Toronto',
      'state': 'ON',
      'country': 'Canada',
      'formattedAddress': ['26 The Esplanade', 'Toronto ON', 'Canada']},
     'categories': [{'id': '4bf58dd8d48988d1cc941735',
       'name': 'Steakhouse',
       'pluralName': 'Steakhouses',
       'shortName': 'Steakhouse',
       'icon': {'prefix': 'https://ss3.4sqi.net/im

## It seems it is unstructured format of venue details and we need to clean it. Let's explore

In [22]:
# This function is created to connect to the saved list which is the received database. 
#It will extract each venue for every neighborhood inside the database

def get_venue_dataset(downtown_foursquare_dataset):
    result_df = pd.DataFrame(columns = ['Postal Code', 'Neighborhood', 
                                           'Neighborhood Latitude', 'Neighborhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
        
    for neigh_dict in downtown_foursquare_dataset:
        postal_code = neigh_dict['Postal Code']; neigh = neigh_dict['Neighborhood(s)']
        lat = neigh_dict['Latitude']; lng = neigh_dict['Longitude']
        print('Number of Venuse in Coordination in {} Negihborhood(s) is {}:'.format(neigh,len(neigh_dict['Crawling_result'])))
        
        for venue_dict in neigh_dict['Crawling_result']:
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            dist = venue_dict['venue']['location']['distance']
            cat =  venue_dict['venue']['categories'][0]['name']
            
            result_df = result_df.append({'Postal Code': postal_code, 'Neighborhood': neigh, 
                              'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
                              'Venue': name, 'Venue Summary': summary, 
                              'Venue Category': cat, 'Distance': dist}, ignore_index = True)
    
    return(result_df)

In [23]:
downtown_venues = get_venue_dataset(downtown_foursquare_dataset)

Number of Venuse in Coordination in Berczy Park Negihborhood(s) is 100:
Number of Venuse in Coordination in Rosedale Negihborhood(s) is 26:
Number of Venuse in Coordination in CN Tower, King and Spadina, Railway Lands, South Niagara Negihborhood(s) is 14:
Number of Venuse in Coordination in Commerce Court Negihborhood(s) is 100:
Number of Venuse in Coordination in Ryerson Negihborhood(s) is 100:
Number of Venuse in Coordination in Toronto Islands, Union Station Negihborhood(s) is 100:
Number of Venuse in Coordination in Design Exchange, Toronto Dominion Centre Negihborhood(s) is 100:
Number of Venuse in Coordination in University of Toronto Negihborhood(s) is 100:
Number of Venuse in Coordination in Adelaide, King Negihborhood(s) is 100:
Number of Venuse in Coordination in Church and Wellesley Negihborhood(s) is 100:
Number of Venuse in Coordination in St. James Town Negihborhood(s) is 100:
Number of Venuse in Coordination in First Canadian Place, Underground city Negihborhood(s) is 10

In [24]:
downtown_venues.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
0,M5E,Berczy Park,43.644771,-79.373306,The Keg Steakhouse + Bar,This spot is popular,Steakhouse,244
1,M5E,Berczy Park,43.644771,-79.373306,LCBO,This spot is popular,Liquor Store,215
2,M5E,Berczy Park,43.644771,-79.373306,Hockey Hall Of Fame (Hockey Hall of Fame),This spot is popular,Museum,406
3,M5E,Berczy Park,43.644771,-79.373306,St. Lawrence Market (South Building),This spot is popular,Farmers Market,465
4,M5E,Berczy Park,43.644771,-79.373306,Sukhothai,This spot is popular,Thai Restaurant,425


### Venues those are Neighborhood to Downtown Area

In [25]:
neigh_list = list(downtown_venues['Neighborhood'].unique())
neigh_list

['Berczy Park',
 'Rosedale',
 'CN Tower, King and Spadina, Railway Lands, South Niagara',
 'Commerce Court',
 'Ryerson',
 'Toronto Islands, Union Station',
 'Design Exchange, Toronto Dominion Centre',
 'University of Toronto',
 'Adelaide, King',
 'Church and Wellesley',
 'St. James Town',
 'First Canadian Place, Underground city',
 'Chinatown, Grange Park, Kensington Market',
 'Cabbagetown, St. James Town',
 'Harbourfront, Regent Park']

## Summerize the Key statistics about Venue Types

In [26]:
neigh_summ = downtown_venues.groupby('Neighborhood').count()
neigh_summ

Unnamed: 0_level_0,Postal Code,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Adelaide, King",100,100,100,100,100,100,100
Berczy Park,100,100,100,100,100,100,100
"CN Tower, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14,14
"Cabbagetown, St. James Town",44,44,44,44,44,44,44
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100,100
Church and Wellesley,100,100,100,100,100,100,100
Commerce Court,100,100,100,100,100,100,100
"Design Exchange, Toronto Dominion Centre",100,100,100,100,100,100,100
"First Canadian Place, Underground city",100,100,100,100,100,100,100
"Harbourfront, Regent Park",100,100,100,100,100,100,100


In [31]:
neigh_category_stats = downtown_venues.groupby('Venue Category').count()
print("Total We have {} Unique Categories".format(len(downtown_venues['Venue Category'].unique())))
neigh_category_stats.head(10)

Total We have 195 Unique Categories


Unnamed: 0_level_0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Distance
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Adult Boutique,2,2,2,2,2,2,2
Afghan Restaurant,1,1,1,1,1,1,1
Airport,1,1,1,1,1,1,1
Airport Lounge,1,1,1,1,1,1,1
American Restaurant,23,23,23,23,23,23,23
Animal Shelter,1,1,1,1,1,1,1
Aquarium,3,3,3,3,3,3,3
Art Gallery,12,12,12,12,12,12,12
Art Museum,2,2,2,2,2,2,2
Arts & Crafts Store,5,5,5,5,5,5,5


In [88]:
downtown_venues['Venue Category'].unique()

array(['Steakhouse', 'Liquor Store', 'Museum', 'Farmers Market',
       'Thai Restaurant', 'Concert Hall', 'Cocktail Bar',
       'Basketball Stadium', 'Food Truck', 'Beer Bar',
       'Japanese Restaurant', 'Seafood Restaurant', 'Park',
       'French Restaurant', 'Sporting Goods Shop', 'Tea Room',
       'Coffee Shop', 'Bistro', 'Bakery', 'Jazz Club', 'Café', 'Lounge',
       'Fish Market', 'Gastropub', 'Hotel', 'Restaurant', 'Cheese Shop',
       'Gym', 'Cosmetics Shop', 'Creperie', 'Sports Bar',
       'Belgian Restaurant', 'Beach', 'Comfort Food Restaurant',
       'Fountain', 'Clothing Store', 'Church', 'Pub',
       'Italian Restaurant', 'Diner', 'Grocery Store', 'Bagel Shop',
       'Tailor Shop', 'Deli / Bodega', 'Salad Place', 'BBQ Joint',
       'Art Gallery', 'Greek Restaurant', 'Lake', 'Office', 'Plaza',
       'Pizza Place', 'Train Station', 'Supermarket', 'Breakfast Spot',
       'Neighborhood', 'Optical Shop', 'Movie Theater',
       'Middle Eastern Restaurant', 'Americ

### It is a promising situation that we have a huge business culture here contains more than 100 different types of business established here.

## Thank You.