## First I download the Libraries needed

In [210]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Libraries for obtaining geospatial coordinates
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 

import requests # library to handle requests

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# map rendering library
!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

print('Libraries imported!')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported!


## Now I uplaod the file with the coordinates 

In [211]:
neighborhoods = pd.read_csv('/Users/martinrivera/Desktop/IBM DS Certificate/Assignments/denver_neighborhoods.csv')

In [212]:
neighborhoods.head()

Unnamed: 0,Neighborhood_name,Latitude,Longitude
0,Auraria,39.7451,-105.0095
1,Cory - Merrill,39.6893,-104.9501
2,Belcaro,39.7038,-104.95
3,Washington Park,39.7002,-104.964
4,Washington Park West,39.702,-104.9793


In [239]:
type(neighborhoods)

pandas.core.frame.DataFrame

In [238]:
neighborhoods.dtypes

Neighborhood_name     object
Latitude             float64
Longitude            float64
dtype: object

## Next using the geopy library to get the latitude and longitude values of Denver

In [237]:
address = 'Denver, Colorado'

geolocator = Nominatim(user_agent="denver_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Denver are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Denver are 39.7392364, -104.9848623.


## Continuing with the map of Denver

In [236]:
map_denver = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Neighborhood_name']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_denver)

In [217]:
map_denver

## Now I link up to Foursquare

In [235]:
CLIENT_ID = 'IQQVXNPOHGE1LIMOIPA1APTTVNRADRVZRCVYTYEZXS0XPYEI' # your Foursquare ID
CLIENT_SECRET = 'VX0U2Z1XMGFWX3LVBTTETKA3SY4GBYOIHBVMZAC01HPU44GO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: IQQVXNPOHGE1LIMOIPA1APTTVNRADRVZRCVYTYEZXS0XPYEI
CLIENT_SECRET:VX0U2Z1XMGFWX3LVBTTETKA3SY4GBYOIHBVMZAC01HPU44GO


## Now I select one neighborhood

[Explanation]

In [240]:
neighborhood_latitude = neighborhoods.loc[6, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighborhoods.loc[6, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods.loc[6, 'Neighborhood_name'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Cherry Creek are 39.7207, -104.9476.


## Now I create the url to import data into a json file format

In [248]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION,
    latitude, 
    longitude,  
    radius,
    LIMIT)
url 

'https://api.foursquare.com/v2/venues/explore?&client_id=IQQVXNPOHGE1LIMOIPA1APTTVNRADRVZRCVYTYEZXS0XPYEI&client_secret=VX0U2Z1XMGFWX3LVBTTETKA3SY4GBYOIHBVMZAC01HPU44GO&v=20180605&ll=39.7392364,-104.9848623&radius=500&limit=100'

In [249]:
results = requests.get(url).json()

## Let's see what venues are nearby

In [250]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [262]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Sassafras American Eatery,Breakfast Spot,39.739949,-104.982756
1,"City, O' City",Vegetarian / Vegan Restaurant,39.736724,-104.984669
2,Phở-natic,Noodle House,39.740081,-104.984111
3,Capitol Hill Books,Bookstore,39.739979,-104.983472
4,Shish Kabob Grill,Middle Eastern Restaurant,39.740246,-104.983633


## Let's explore the data

In [263]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

61 venues were returned by Foursquare.


In [271]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION,
            latitude, 
            longitude,  
            radius,
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [272]:
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Sassafras American Eatery,Breakfast Spot,39.739949,-104.982756
1,"City, O' City",Vegetarian / Vegan Restaurant,39.736724,-104.984669
2,Phở-natic,Noodle House,39.740081,-104.984111
3,Capitol Hill Books,Bookstore,39.739979,-104.983472
4,Shish Kabob Grill,Middle Eastern Restaurant,39.740246,-104.983633


In [273]:
nearby_venues.groupby('categories').count()

Unnamed: 0_level_0,name,lat,lng
categories,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ATM,1,1,1
Art Gallery,3,3,3
Art Museum,1,1,1
Asian Restaurant,2,2,2
Bakery,1,1,1
Bar,1,1,1
Bookstore,1,1,1
Breakfast Spot,1,1,1
Burger Joint,1,1,1
Business Service,1,1,1


In [278]:
denver_venues = getNearbyVenues(names=neighborhoods['Neighborhood_name'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Auraria
Cory - Merrill
Belcaro
Washington Park
Washington Park West
Speer
Cherry Creek
Country Club
Congress Park
City Park
Marston
Fort Logan
Barnum
Barnum West
West Colfax
West Highland
Sloan Lake
Berkeley
Regis
Chaffee Park
Highland
Athmar Park
Wellshire
University
Rosedale
Cheesman Park
Hilltop
Montclair
Hale
North Park Hill
South Park Hill
University Park
Platt Park
Overland
Ruby Hill
Kennedy
Hampden
Southmoor Park
Hampden South
Indian Creek
Goldsmith
University Hills
Harvey Park
Mar Lee
East Colfax
Capitol Hill
North Capitol Hill
Civic Center
Union Station
Central Park
Montbello
Lowry Field
Green Valley Ranch
Harvey Park South
City Park West
Sun Valley
Valverde
Villa Park
Five Points
Globeville
Bear Valley
Virginia Village
Windsor
Washington Virginia Vale
Jefferson Park
Northeast Park Hill
Elyria Swansea
Baker
Clayton
Skyland
Lincoln Park
Whittier
Cole
Westwood
Sunnyside


In [279]:
print(denver_venues.shape)
denver_venues.head()

(4575, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Auraria,39.7451,-105.0095,Sassafras American Eatery,39.739949,-104.982756,Breakfast Spot
1,Auraria,39.7451,-105.0095,"City, O' City",39.736724,-104.984669,Vegetarian / Vegan Restaurant
2,Auraria,39.7451,-105.0095,Phở-natic,39.740081,-104.984111,Noodle House
3,Auraria,39.7451,-105.0095,Capitol Hill Books,39.739979,-104.983472,Bookstore
4,Auraria,39.7451,-105.0095,Shish Kabob Grill,39.740246,-104.983633,Middle Eastern Restaurant


In [281]:
print('There are {} uniques categories.'.format(len(denver_venues['Venue Category'].unique())))

There are 44 uniques categories.


In [283]:
# one hot encoding
denver_onehot = pd.get_dummies(denver_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
denver_onehot['Neighborhood'] = denver_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [denver_onehot.columns[-1]] + list(denver_onehot.columns[:-1])
denver_onehot = denver_onehot[fixed_columns]

denver_onehot.head()

Unnamed: 0,Neighborhood,ATM,Art Gallery,Art Museum,Asian Restaurant,Bakery,Bar,Bookstore,Breakfast Spot,Burger Joint,Business Service,Café,Chinese Restaurant,Coffee Shop,Dance Studio,Dive Bar,Exhibit,Gastropub,Gym,Historic Site,History Museum,Hotel,Japanese Restaurant,Jewelry Store,Lounge,Marijuana Dispensary,Mexican Restaurant,Middle Eastern Restaurant,Museum,Nightclub,Noodle House,Organic Grocery,Outdoor Sculpture,Paper / Office Supplies Store,Park,Pizza Place,Poke Place,Pub,Restaurant,Salad Place,Sandwich Place,Shipping Store,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Yoga Studio
0,Auraria,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Auraria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,Auraria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Auraria,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Auraria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [287]:
denver_grouped = denver_onehot.groupby('Neighborhood').mean().reset_index()
denver_grouped.head()

Unnamed: 0,Neighborhood,ATM,Art Gallery,Art Museum,Asian Restaurant,Bakery,Bar,Bookstore,Breakfast Spot,Burger Joint,Business Service,Café,Chinese Restaurant,Coffee Shop,Dance Studio,Dive Bar,Exhibit,Gastropub,Gym,Historic Site,History Museum,Hotel,Japanese Restaurant,Jewelry Store,Lounge,Marijuana Dispensary,Mexican Restaurant,Middle Eastern Restaurant,Museum,Nightclub,Noodle House,Organic Grocery,Outdoor Sculpture,Paper / Office Supplies Store,Park,Pizza Place,Poke Place,Pub,Restaurant,Salad Place,Sandwich Place,Shipping Store,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Yoga Studio
0,Athmar Park,0.016393,0.04918,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.081967,0.016393,0.016393,0.016393,0.032787
1,Auraria,0.016393,0.04918,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.081967,0.016393,0.016393,0.016393,0.032787
2,Baker,0.016393,0.04918,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.081967,0.016393,0.016393,0.016393,0.032787
3,Barnum,0.016393,0.04918,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.081967,0.016393,0.016393,0.016393,0.032787
4,Barnum West,0.016393,0.04918,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.016393,0.04918,0.016393,0.016393,0.016393,0.032787,0.016393,0.016393,0.016393,0.016393,0.032787,0.016393,0.032787,0.016393,0.016393,0.081967,0.016393,0.016393,0.016393,0.032787


In [286]:
num_top_venues = 5

for hood in denver_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = denver_grouped[denver_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Athmar Park----
                venue  freq
0      Sandwich Place  0.08
1         Coffee Shop  0.05
2         Art Gallery  0.05
3  Mexican Restaurant  0.05
4                 Pub  0.03


----Auraria----
                venue  freq
0      Sandwich Place  0.08
1         Coffee Shop  0.05
2         Art Gallery  0.05
3  Mexican Restaurant  0.05
4                 Pub  0.03


----Baker----
                venue  freq
0      Sandwich Place  0.08
1         Coffee Shop  0.05
2         Art Gallery  0.05
3  Mexican Restaurant  0.05
4                 Pub  0.03


----Barnum----
                venue  freq
0      Sandwich Place  0.08
1         Coffee Shop  0.05
2         Art Gallery  0.05
3  Mexican Restaurant  0.05
4                 Pub  0.03


----Barnum West----
                venue  freq
0      Sandwich Place  0.08
1         Coffee Shop  0.05
2         Art Gallery  0.05
3  Mexican Restaurant  0.05
4                 Pub  0.03


----Bear Valley----
                venue  freq
0      Sandwich P

In [289]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [290]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = denver_grouped['Neighborhood']

for ind in np.arange(denver_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(denver_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Athmar Park,Sandwich Place,Art Gallery,Mexican Restaurant,Coffee Shop,Yoga Studio,Pizza Place,Exhibit,Noodle House,Gym,Pub
1,Auraria,Sandwich Place,Art Gallery,Mexican Restaurant,Coffee Shop,Yoga Studio,Pizza Place,Exhibit,Noodle House,Gym,Pub
2,Baker,Sandwich Place,Art Gallery,Mexican Restaurant,Coffee Shop,Yoga Studio,Pizza Place,Exhibit,Noodle House,Gym,Pub
3,Barnum,Sandwich Place,Art Gallery,Mexican Restaurant,Coffee Shop,Yoga Studio,Pizza Place,Exhibit,Noodle House,Gym,Pub
4,Barnum West,Sandwich Place,Art Gallery,Mexican Restaurant,Coffee Shop,Yoga Studio,Pizza Place,Exhibit,Noodle House,Gym,Pub
