# Note: I have made a single notebook for all three question please scroll down for all review all questions

## <font color='blue'>Part 1:</font>

<font color='red'>Importing libraries</font>

In [None]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis

<font color='red'>Importing the wiki table as a dataframe and deleting rows that have the Borough not assigned:</font>


In [None]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df=df[~df.Borough.str.contains("Not assigned")]
df.reset_index(drop=True, inplace=True)
df

<font color='red'>Checking how many cells have not assigned neighbourhood:</font>


In [None]:
df.loc[df['Neighbourhood'] == 'Not assigned']

<font color='red'>Since there is only one cell who has the neighbourhood not assigned I can change only that cell:</font>



In [None]:
df.at[5,'Neighbourhood']="Queen's Park"

<font color='red'>Grouping rows that have the same Neighbourhood:</font>




In [None]:
df = df.groupby('Postcode').agg({'Borough':'first','Neighbourhood': ', '.join}).reset_index()
df

In [None]:
df.shape

## <font color='blue'>Part 2:</font>

<font color='red'>Importing geospatial data:</font>

In [None]:
df2 = pd.read_csv('http://cocl.us/Geospatial_data')
df2

<font color='red'>Merging the two dataframes:</font>

In [None]:
df3 = pd.merge(left=df,right=df2, left_on='Postcode', right_on='Postal Code')
# In this case `species_id` is the only column name in  both dataframes, so if we skipped `left_on`
# And `right_on` arguments we would still get the same result

# What's the size of the output data?
df3.drop('Postal Code', axis=1, inplace=True)
df3.shape
df3

## <font color='blue'>Part 3:</font>

<font color='red'>Importing libraries:</font>

In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print('Libraries imported.')

<font color='red'>Toronto Neighbourhood Analysis. In my analysis, I will choose "Central Toronto" as the borough of interest and then cluster the neighborhoods around it including those in the boroughs with 'Toronto' in their names, e.g. East Toronto, Downtown Toronto etc.

Therefore, int the code below, we will sort the Boroughs that contain the word 'Toronto':</font>

In [None]:
toronto_data= df3[df3['Borough'].str.contains('Toronto', na = False)].reset_index(drop=True)
toronto_data

<font color='red'>Let's get the geographical coordinates of Central Toronto. After that we will print the map</font>

In [None]:
address = 'Central Toronto, TO'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Central Toronto are {}, {}.'.format(latitude_toronto, longitude_toronto))

In [None]:
map_toronto = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=10)

# Showing Central Toronto as Red on the map ....
folium.CircleMarker([latitude, longitude], radius=10, popup=label, color='Red', fill=True, fill_color='Red', fill_opacity=0.9,).add_to(map_toronto)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = '{}, {}'.format(Neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto             

<font color='red'>Using Foursquare API. I am going to get the top 100 venues that are in Central Toronto within a radius of 500 meters.</font>

In [None]:
CLIENT_ID = '3RPQWX4H2VOKFOKOSERPS0BQGBULI00EELUXSFQEFXVFKJZG' # your Foursquare ID
CLIENT_SECRET = '1IMJFLUAUQA5ZLDGRFWZLDPFHR2TXR1ELVQ0UEW155VXXF41' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [None]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [None]:
radius = 500
LIMIT = 100 # this will retreive data for only 100 venues per neighbourhood
Toronto_venues = NearbyVenues(names = toronto_data['Neighbourhood'], latitudes = toronto_data['Latitude'], longitudes = toronto_data['Longitude'])

In [None]:
print(Toronto_venues.shape)
Toronto_venues.head(10)

<font color='red'>Let's check how many venues were returned for each neighborhood.</font>

In [None]:
Toronto_venues.groupby('Neighbourhood').count()

<font color='red'>Let's find out how many unique categories can be curated from all the returned venues.</font>

In [None]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

<font color='red'>Analyze Each Neighborhood.</font>

In [None]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighbourhood'] = Toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head(5)

In [None]:
Toronto_onehot.shape


<font color='red'>Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category.</font>

In [None]:
Toronto_grouped = Toronto_onehot.groupby('Neighbourhood').mean().reset_index()
Toronto_grouped.head(5)

In [None]:
Toronto_grouped.shape

<font color='red'>Let's print each neighborhood along with the top 5 most common venues.</font>

In [None]:
num_top_venues = 5

for hood in Toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')


<font color='red'>Let's put that into a pandas dataframe.</font>

In [None]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [None]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# Labelling columns as 1st, 2nd and so on
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind])) # for 1st, 2nd, 3rd
    except:
        columns.append('{}th Most Common Venue'.format(ind+1)) ### for 4th, 5th,...

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns) ## assign column names we just created to a new dataframe
neighborhoods_venues_sorted['Neighbourhood'] = Toronto_grouped['Neighbourhood']## add neighborhoods column

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

<font color='red'>Cluster Neighborhoods.</font>

In [None]:
# set number of clusters
kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

In [172]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

Toronto_merged # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Neighborhood,Pub,Coffee Shop,Health Food Store,Trail,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Bookstore,Restaurant,Furniture / Home Store,Yoga Studio,Dessert Shop,Bubble Tea Shop
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0,Pet Store,Ice Cream Shop,Pizza Place,Movie Theater,Pub,Sandwich Place,Burrito Place,Burger Joint,Brewery,Liquor Store
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Bakery,Italian Restaurant,American Restaurant,Yoga Studio,Convenience Store,Seafood Restaurant,Sandwich Place,Cheese Shop
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2,Photography Studio,Park,Bus Line,Swim School,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Hotel,Sandwich Place,Gym,Park,Breakfast Spot,Clothing Store,Food & Drink Shop,Dance Studio,Doner Restaurant,Discount Store
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,0,Coffee Shop,Sporting Goods Shop,Clothing Store,Yoga Studio,Spa,Dessert Shop,Bagel Shop,Ice Cream Shop,Diner,Mexican Restaurant
7,M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Sandwich Place,Dessert Shop,Pizza Place,Sushi Restaurant,Gym,Coffee Shop,Italian Restaurant,Café,Costume Shop,Farmers Market
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,0,Intersection,Gym,Playground,Summer Camp,Tennis Court,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049,0,Coffee Shop,Pub,Spa,Fried Chicken Joint,Liquor Store,Vietnamese Restaurant,Bagel Shop,Supermarket,Sushi Restaurant,Light Rail Station


<font color='red'>Finally, let's visualize the resulting clusters.</font>

In [174]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighbourhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<font color='red'>Examine Clusters.</font>

<font color='green'>Cluster 1.</font>

In [175]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Bookstore,Restaurant,Furniture / Home Store,Yoga Studio,Dessert Shop,Bubble Tea Shop
2,East Toronto,0,Pet Store,Ice Cream Shop,Pizza Place,Movie Theater,Pub,Sandwich Place,Burrito Place,Burger Joint,Brewery,Liquor Store
3,East Toronto,0,Café,Coffee Shop,Bakery,Italian Restaurant,American Restaurant,Yoga Studio,Convenience Store,Seafood Restaurant,Sandwich Place,Cheese Shop
5,Central Toronto,0,Hotel,Sandwich Place,Gym,Park,Breakfast Spot,Clothing Store,Food & Drink Shop,Dance Studio,Doner Restaurant,Discount Store
6,Central Toronto,0,Coffee Shop,Sporting Goods Shop,Clothing Store,Yoga Studio,Spa,Dessert Shop,Bagel Shop,Ice Cream Shop,Diner,Mexican Restaurant
7,Central Toronto,0,Sandwich Place,Dessert Shop,Pizza Place,Sushi Restaurant,Gym,Coffee Shop,Italian Restaurant,Café,Costume Shop,Farmers Market
8,Central Toronto,0,Intersection,Gym,Playground,Summer Camp,Tennis Court,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run
9,Central Toronto,0,Coffee Shop,Pub,Spa,Fried Chicken Joint,Liquor Store,Vietnamese Restaurant,Bagel Shop,Supermarket,Sushi Restaurant,Light Rail Station
11,Downtown Toronto,0,Coffee Shop,Pub,Italian Restaurant,Bakery,Pizza Place,Restaurant,Café,Diner,Snack Place,Japanese Restaurant
12,Downtown Toronto,0,Coffee Shop,Sushi Restaurant,Gay Bar,Japanese Restaurant,Restaurant,Gym,Mediterranean Restaurant,Hotel,Gastropub,Men's Store


<font color='green'>Cluster 2.</font>

In [176]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,1,Park,Playground,Trail,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
23,Central Toronto,1,Sushi Restaurant,Park,Trail,Jewelry Store,Yoga Studio,Donut Shop,Diner,Discount Store,Dog Run,Doner Restaurant


<font color='green'>Cluster 3.</font>

In [177]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,2,Photography Studio,Park,Bus Line,Swim School,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


<font color='green'>Cluster 4.</font>

In [178]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,3,Music Venue,Garden,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


<font color='green'>Cluster 5.</font>

In [179]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,4,Neighborhood,Pub,Coffee Shop,Health Food Store,Trail,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
