<b>Introduction/Business Problem:</b>
<br>Montreal is a city covering 431.5 km^2 (166.6 mi^2) with approx. 1.8M inhabitants residing in 21 boroughs.

The City of Saints is visited by over 2M people each year; making it Canada's most visited city. With so much to see and do, I'm going to need a cup of coffee!

With that in mind, I'm going to examine The City of 100 Steeples and see which neighborhoods will shelter and stimulate weary travelers.

<b>Data:</b> 
<br>I will be using zipcode for metropolitan Montreal that is available on wikipedia. I will also be using venue name, location, and category information on Foursquare to identify neighborhoods that have accomodations for travelers (hotels/hostels/inns/etc) and coffee shops.

In [None]:
#putting tools in the toolbox
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import os
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors
!pip -q install folium
import folium
print('Done')

In [None]:
#linking our postal code and neighborhood data
list_url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_H"
source=requests.get(list_url).text
#print(source)

In [None]:
#looking for patterns in the source code
soup=BeautifulSoup(source)
#print(soup.prettify())

In [None]:
#seperating postal code and neighborhood data from the rest of the page
#codes=soup.find_all('td',attrs={'valign':'top'})
#for code in codes:
#    print(code)
code_body=soup.find('tbody')
#code_body

In [None]:
#making a list of postal codes
postal_codes=code_body.find_all('b')
#postal_codes
code_list=[]
for p in postal_codes[0:]:
    result=p.text.strip()
    code_list.append(result)
#(code_list)

In [None]:
len(code_list)

In [None]:
#making a list of neighborhoods
neighborhoods=code_body.find_all('span')
#print(neighborhoods)
ngh_list=[]
for n in neighborhoods[0:]:
    ngh_results=n.text.strip()
    ngh_list.append(ngh_results)
#print(ngh_list)

In [None]:
len(ngh_list)

In [None]:
#create data frame with postal codes and neighborhoods
mtl_ngh=pd.DataFrame({'Postalcode':code_list,'Neighborhood':ngh_list})
mtl_ngh.head()

In [None]:
#remove postal codes that are not assigned
mtl_ngh=mtl_ngh[mtl_ngh['Neighborhood']!='Not assigned']
print(mtl_ngh.head())
len(mtl_ngh)

In [None]:
mtl_temp=mtl_ngh.groupby('Postalcode')['Neighborhood'].apply(lambda x: "%s" % ', '.join(x))
mtl_temp=mtl_temp.reset_index(drop=False)
mtl_temp.rename(columns={'Neighborhood':'Neighborhood_joined'},inplace=True)
mtl_temp=mtl_temp[mtl_temp['Neighborhood_joined']!='Reserved0H0: Santa Claus']
print(mtl_temp.head())
len(mtl_temp)

In [None]:
mtl_merge=pd.merge(mtl_ngh,mtl_temp,on='Postalcode')
mtl_merge.drop(['Neighborhood'],axis=1,inplace=True)
mtl_merge.drop_duplicates(inplace=True)
mtl_merge.rename(columns={'Neighborhood_joined':'Neighborhood'},inplace=True)
print(mtl_merge.head())
len(mtl_merge)

In [None]:
mtl_merge=pd.DataFrame(mtl_merge)
mtl_merge.head()

def get_geocode(postal_code):
    lat_lng_coord = None
    while(lat_lng_coord is None):
        m = geocoder.google('{}, Montreal, Quebec'.format(postal_code))
        lat_lng_coords = m.latlng
    mtl_latitude = lat_lng_coords[0]
    mtl_longitude = lat_lng_coords[1]
    return mtl_latitude,mtl_longitude
#print('Done')
mtl_geo=pd.read_csv('http://cocl.us/Geospatial_data')
mtl_geo.head()

In [None]:
!pip install pgeocode
print('Done')

In [None]:
import pgeocode
print('Done')

In [None]:
nomi=pgeocode.Nominatim('ca')
mtl_dirty_geo=nomi.query_postal_code(code_list)
mtl_dirty_geo

In [None]:
#let's clean up our data
mtl_dirty_geo.rename(columns={'postal_code':'Postalcode','latitude':'Latitude','longitude':'Longitude'},inplace=True)
mtl_geo_ngh=pd.merge(mtl_dirty_geo,mtl_merge,on='Postalcode')
#mtl_geo_ngh.head()
mtl_clean_geo=mtl_geo_ngh[['Postalcode','Neighborhood','Latitude','Longitude']]
mtl_clean_geo.head()

In [None]:
CLIENT_ID = 'MOPQ4QFVYDGEMYB3HYP53QUGJVSFDXOGYGZJHEUKL4UHCVQ0'
CLIENT_SECRET = '0XMDVSZWEROJPZ0K2E14QH0IWMUMBUYBFP3KTSPGTUEUN0ZF'
VERSION = '20180604'

def NearbyVenues(names, latitudes, longitudes):
    radius=500
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
print('done')

venues=NearbyVenues(names=mtl_clean_geo['Neighborhood'],
                   latitudes=mtl_clean_geo['Latitude'],
                   longitudes=mtl_clean_geo['Longitude']
                   )
print('Done')

names=NearbyVenues(names=mtl_clean_geo['Neighborhood'],
                   latitudes=mtl_clean_geo['Latitude'],
                   longitudes=mtl_clean_geo['Longitude']
                   )
except Exception as e:
    print(names + ' has an error')
    
print('Done')

In [None]:
def NearbyVenues(names, latitudes, longitudes):
    radius=500
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
    except Exception as e:
        print(nearby_venues+'has an error')
print('done')