In [10]:
from bs4 import BeautifulSoup #Web Scrapping
import pandas as pd #library for dataframe
import numpy as np
from tabulate import tabulate
import json #library for json file
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library

In [11]:
# Import Toranto Neighbourhood wiki html file
import requests
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)
response.text[:100]

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title'

In [12]:
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))[0]
df.head()


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [13]:
print('Number of Borough Not assigned : {}'.format(df[df['Borough'] == 'Not assigned'].count()))

Number of Borough Not assigned : Postcode         77
Borough          77
Neighbourhood    77
dtype: int64


In [14]:
# Ignore cells with a borough that is Not assigned.
df = df[df['Borough'] != 'Not assigned']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [15]:
tornto = df.groupby(['Postcode','Borough']).agg(lambda x: ','.join(set(x)))
tornto = tornto.reset_index()
tornto.columns = ['Postal Code','Borough','Neighborhood']
tornto.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek"
2,M1E,Scarborough,"West Hill,Guildwood,Morningside"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Oakridge,Golden Mile"
8,M1M,Scarborough,"Scarborough Village West,Cliffcrest,Cliffside"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [16]:
# Read GeoFile
geo_data = pd.read_csv('https://cocl.us/Geospatial_data')
geo_data.head()
#geo_data.to_csv(r'Data\geodata.csv')

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
tornto = pd.merge(tornto,geo_data,how='left',on='Postal Code')
tornto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"West Hill,Guildwood,Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Use geopy library to get the latitude and longitude values of Toronto City. 

In [18]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


In [19]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(tornto['Latitude'], tornto['Longitude'],tornto['Borough'], tornto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

## Import _Foursquare_ Credential

In [20]:
CLIENT_ID = 'UBLX5C1RQM4EQJIT20SYTFY2YAMDC51IIGFTHGEHKXR2T44B' # Foursquare ID
CLIENT_SECRET = 'EQ5U14PRWPDADRFB5KEJY3CG4VBDABSWKWZQHTHSU5YPHTWO' #Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: UBLX5C1RQM4EQJIT20SYTFY2YAMDC51IIGFTHGEHKXR2T44B
CLIENT_SECRET:EQ5U14PRWPDADRFB5KEJY3CG4VBDABSWKWZQHTHSU5YPHTWO


This _file_ hace some dhbfhdsbf cbcbc cggcg

# Foursquare
### Function that extracts the category of the venue

In [90]:
def foursquareVenue(neighborhood_name, neighborhood_latitude, neighborhood_longitude,redius=500,limit=100):
    
    """Extract Venues from FourSquare as lat and long given as input"""
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
     CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT)
    
    #Get Url resopnse from FourSquare
    from pandas.io.json import json_normalize

    response = json_normalize(requests.get(url).json()["response"]['groups'][0]['items']) #Save as json file as normalized datafarme
    # filter columns
    venues = response.loc[:, ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']]

    # filter the category for each row
    # use function get_category_type() apply row wise in category object
    venues['venue.categories'] = venues.apply(get_category_type, axis=1)
    # clean columns
    venues.columns = ['vanue','vanue_category','venue_latitude','venue_longitude']
    #Add Neighborhood Details
    venues['neighborhood_name'], venues['neighborhood_latitude'], venues['neighborhood_longitude'] = neighborhood_name, neighborhood_latitude, neighborhood_longitude
    response

    return venues


In [70]:
from pandas.io.json import json_normalize
json_normalize( foursquareVenue(neighborhood_name, neighborhood_latitude, neighborhood_longitude))




Unnamed: 0,referralId,reasons.count,reasons.items,venue.id,venue.name,venue.location.crossStreet,venue.location.lat,venue.location.lng,venue.location.labeledLatLngs,venue.location.distance,venue.location.cc,venue.location.city,venue.location.state,venue.location.country,venue.location.formattedAddress,venue.categories,venue.photos.count,venue.photos.groups,venue.location.address,venue.location.postalCode
0,e-0-4bb6b9446edc76b0d771311c-0,0,"[{'summary': 'This spot is popular', 'type': '...",4bb6b9446edc76b0d771311c,Wendy's,Morningside & Sheppard,43.807448,-79.199056,"[{'label': 'display', 'lat': 43.80744841934756...",387,CA,Toronto,ON,Canada,"[Toronto ON, Canada]","[{'id': '4bf58dd8d48988d16e941735', 'name': 'F...",0,[],,
1,e-0-5539e7d2498edaf4b02673ca-1,0,"[{'summary': 'This spot is popular', 'type': '...",5539e7d2498edaf4b02673ca,Interprovincial Group,,43.80563,-79.200378,"[{'label': 'display', 'lat': 43.8056297, 'lng'...",498,CA,Scarborough,ON,Canada,"[1315 Morningside Avenue, Scarborough ON M1B 3...","[{'id': '52f2ab2ebcbc57f1066b8b28', 'name': 'P...",0,[],1315 Morningside Avenue,M1B 3C5


In [24]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [21]:
neighborhood_latitude = tornto.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = tornto.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = tornto.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge,Malvern are 43.806686299999996, -79.19435340000001.


In [72]:
#foursquareVenue(neighborhood_latitude,neighborhood_longitude)['response']['groups'][0]['items'][0]['venue']

In [79]:
address = 'Kolkata,IN'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} City are {}, {}.'.format(address,latitude, longitude))

The geograpical coordinate of Kolkata,IN City are 22.54541245, 88.3567751581234.


In [92]:
from pandas.io.json import json_normalize

venues = foursquareVenue(address, latitude, longitude)
venues

Unnamed: 0,vanue,vanue_category,venue_latitude,venue_longitude,neighborhood_name,neighborhood_latitude,neighborhood_longitude
0,Nocturne,Nightclub,22.545078,88.357384,"Kolkata,IN",22.545412,88.356775
1,Kookie Jar,Bakery,22.543596,88.358161,"Kolkata,IN",22.545412,88.356775
2,ATS ARUN TEA STALL,Tea Room,22.54485,88.354557,"Kolkata,IN",22.545412,88.356775
3,Jalapeños,Mexican Restaurant,22.547247,88.354486,"Kolkata,IN",22.545412,88.356775
4,Bombay Shiv Sagar,Indian Restaurant,22.546578,88.353349,"Kolkata,IN",22.545412,88.356775
5,Aqua Java Fast Food Pvt. Ltd.,Café,22.542949,88.355867,"Kolkata,IN",22.545412,88.356775
6,Mocha,Café,22.543133,88.360591,"Kolkata,IN",22.545412,88.356775
7,Vardaan Shopping Market,Shopping Mall,22.548193,88.35381,"Kolkata,IN",22.545412,88.356775
8,Kala Mandir,Performing Arts Venue,22.543973,88.359582,"Kolkata,IN",22.545412,88.356775
9,Fortune Select,Hotel,22.548076,88.35818,"Kolkata,IN",22.545412,88.356775


In [82]:
venues

[{'reasons': {'count': 0,
   'items': [{'summary': 'This spot is popular',
     'type': 'general',
     'reasonName': 'globalInteractionReason'}]},
  'venue': {'id': '4bb6b9446edc76b0d771311c',
   'name': "Wendy's",
   'location': {'crossStreet': 'Morningside & Sheppard',
    'lat': 43.80744841934756,
    'lng': -79.19905558052072,
    'labeledLatLngs': [{'label': 'display',
      'lat': 43.80744841934756,
      'lng': -79.19905558052072}],
    'distance': 387,
    'cc': 'CA',
    'city': 'Toronto',
    'state': 'ON',
    'country': 'Canada',
    'formattedAddress': ['Toronto ON', 'Canada']},
   'categories': [{'id': '4bf58dd8d48988d16e941735',
     'name': 'Fast Food Restaurant',
     'pluralName': 'Fast Food Restaurants',
     'shortName': 'Fast Food',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/fastfood_',
      'suffix': '.png'},
     'primary': True}],
   'photos': {'count': 0, 'groups': []}},
  'referralId': 'e-0-4bb6b9446edc76b0d771311c-0'},
 {'reasons':