### **Introduction:** In this capstone, several neighborhoods in Frankfurt am Main will be compared to the neighborhood of Five Points, Denver, to determine which are most similar, and which are most different. The comparison will be based on foursquare API data from each neighborhood to compare amenities in each neighborhood.

### **Data:** The following Foursquare API data will be used to compare the selected neighborhoods. Since 5 similar neighborhoods are being compared to a single disparate neighborhood, k means clustering will not be a viable means of analysis. Instead, a pairwise distance matrix will be utilized to compare each neighborhood to the others. This will necessitate a numerical value for the data rather than string categories. Thus, the onehot dataframe will be used, indexed by the neighborhood names(hoodcomp_pdm), to perform the analysis.

In [7]:
#import pandas
import pandas as pd
import numpy as np
! pip install geopy
!pip install folium
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
#create data frame containing names and location data for the two neighborhoods to be compared
hoods = [['Five Points','39.75472','-104.97806'],
         ['Bockenheim','50.12498','8.64538'],
         ['Innenstadt','50.11029','8.68417'],
         ['Westend','50.11651','8.66375'],
         ['Sachsenhausen','50.1008','8.68556'],
         ['Bornheim','50.13135', '8.71251'],
         ['Nordend','50.12988', '8.67946'],
         ]
hoodcomp = pd.DataFrame(hoods, index=[0, 1,2,3,4,5,6], columns=['Neighborhood', 'Latitude', 'Longitude'])
hoodcomp
CLIENT_ID = 'FTIRR55RB4P1OQXANB0D0SQFSFW1FR02DYJH52VQWWZZXXWY' 
CLIENT_SECRET = 'QUP2H2VA23RUES5EXFMTKXLSJM4IISLP15OHT23HYVBCBM3B' 
VERSION = '20180605' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
hoodcomp.columns=['Neighborhood','Latitude','Longitude']
hoodcomp

#obtain and label lat/long of the first neighborhood(s)
neighborhood_latitude = hoodcomp.loc[0, 'Latitude']
neighborhood_longitude = hoodcomp.loc[0, 'Longitude']

#Get request URL
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
neighborhood_name = hoodcomp.loc[0, 'Neighborhood']

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))
#Get request URL
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url
#results of get request
results = requests.get(url).json()

#obtain venue categories
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
#convert results to dataframe
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
# formula to obtain venues in all neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
# run formula for each neighborhood
hoodcomp_venues = getNearbyVenues(names=hoodcomp['Neighborhood'],
                                   latitudes=hoodcomp['Latitude'],
                                   longitudes=hoodcomp['Longitude']
                                  )
#onehot encoding
hoodcomp_onehot = pd.get_dummies(hoodcomp_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
hoodcomp_onehot['Neighborhood'] = hoodcomp_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [hoodcomp_onehot.columns[-1]] + list(hoodcomp_onehot.columns[:-1])
hoodcomp_onehot = hoodcomp_onehot[fixed_columns]

hoodcomp_onehot.head()
hoodcomp_grouped = hoodcomp_onehot.groupby('Neighborhood').mean().reset_index()
hoodcomp_grouped
#the pairwise distance matrix cannot parse the neighborhood str, so i set the neighborhood names as the df index
hoodcomp_pdm=hoodcomp_grouped.set_index("Neighborhood")
hoodcomp_pdm

Your credentails:
CLIENT_ID: FTIRR55RB4P1OQXANB0D0SQFSFW1FR02DYJH52VQWWZZXXWY
CLIENT_SECRET:QUP2H2VA23RUES5EXFMTKXLSJM4IISLP15OHT23HYVBCBM3B
Latitude and longitude values of Five Points are 39.75472, -104.97806.
Five Points
Bockenheim
Innenstadt
Westend
Sachsenhausen
Bornheim
Nordend


Unnamed: 0_level_0,Wine Bar,ATM,Apple Wine Pub,Art Museum,Arts & Crafts Store,Asian Restaurant,Austrian Restaurant,Bagel Shop,Bakery,Bank,...,Spanish Restaurant,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Bockenheim,0.04878,0.0,0.0,0.0,0.02439,0.073171,0.0,0.0,0.02439,0.02439,...,0.04878,0.0,0.02439,0.0,0.02439,0.02439,0.0,0.04878,0.0,0.02439
Bornheim,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,...,0.034483,0.0,0.034483,0.0,0.0,0.034483,0.0,0.0,0.0,0.0
Five Points,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.052632,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Innenstadt,0.012987,0.0,0.0,0.064935,0.0,0.0,0.012987,0.0,0.012987,0.0,...,0.0,0.0,0.0,0.012987,0.012987,0.0,0.0,0.012987,0.012987,0.012987
Nordend,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667
Sachsenhausen,0.02381,0.0,0.071429,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02381,0.0,0.0,0.047619,0.047619,0.0,0.0,0.047619
Westend,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.071429,0.0,...,0.0,0.142857,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0
