### Imports 

In [1]:
import requests
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

### Getting latitude, longitude

In [3]:
addresses = ["Panthéon", "Eiffel Tower", 
            "Arc de Triomphe", "Louvre Museum", 
            "Sacré-Cœur"]
def get_lat_long(address):
    geolocator = Nominatim(user_agent="foursquare_agent")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print(address, latitude, longitude)
    return latitude, longitude

In [4]:
for i in addresses:
    get_lat_long(i)

Panthéon 41.89861595 12.476833414483862
Eiffel Tower 48.858260200000004 2.2944990543196795
Arc de Triomphe 48.8737791 2.295037226037673
Louvre Museum 48.8611473 2.33802768704666
Sacré-Cœur 48.88680575 2.3430153448835087


### Five points of NY

In [16]:
Panthéon = ["Panthéon","41.89861595", "12.476833414483862"]
Eiffel_Tower = ["Eiffel Tower","48.858260200000004", "2.2944990543196795"]
Arc_de_Triomphe = ["Arc de Triomphe","48.8737791", "2.295037226037673"]
Louvre_Museum = ["Louvre Museum","48.8611473", "2.33802768704666"]
Sacré_Cœur = ["Sacré-Cœur","48.88680575", "2.3430153448835087"]

In [17]:
paris_points_with_lat_long = pd.DataFrame([Panthéon, Eiffel_Tower, 
                                              Arc_de_Triomphe, Louvre_Museum, Sacré_Cœur], 
                                             columns=["point name","lat", "lng"])
paris_points_with_lat_long

Unnamed: 0,point name,lat,lng
0,Panthéon,41.89861595,12.476833414483862
1,Eiffel Tower,48.8582602,2.2944990543196795
2,Arc de Triomphe,48.8737791,2.295037226037673
3,Louvre Museum,48.8611473,2.33802768704666
4,Sacré-Cœur,48.88680575,2.3430153448835087


### Api setup

In [25]:
def saving_data(area_name, latitude, longitude):
    url = f"https://api.foursquare.com/v2/venues/explore?client_id=N40W0THAJDZYKLHLHWRDBU01LIMNXBMXZ03X5ZOGZSRVMLSR&client_secret=NAYYDDFD0TGX2G4SSK1E0W1VPZEDUN5TSK2Q02QWKKJAESSM&ll={latitude},{longitude}&v=20180604&radius=30000&limit=100"
    results = requests.get(url).json()
    dataframe = pd.json_normalize(results["response"]["groups"][0]["items"])
    filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
    dataframe_filtered = dataframe.loc[:, filtered_columns]
    dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1) # category for each row
    dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns] # clean columns
    
    dataframe_filtered.to_pickle(f"{area_name}.pkl") # save to pickle so that wont call the api over and over again
    return dataframe_filtered

### Get the Categories

In [26]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Collect the data from the API and merge 

In [29]:
df_Panthéon = saving_data("Panthéon", Panthéon[1], Panthéon[2])
df_Eiffel_Tower = saving_data(*Eiffel_Tower)
df_Arc_de_Triomphe = saving_data(*Arc_de_Triomphe)
df_Louvre_Museum = saving_data(*Louvre_Museum)
df_Sacré_Cœur = saving_data(*Sacré_Cœur)

### Add Column with the Value with the name of NY point

In [31]:
df_Panthéon["point name"] = ["Panthéon" for _ in range(len(df_Panthéon["name"]))]
df_Eiffel_Tower["point name"] =["Eiffel Tower" for _ in range(len(df_Eiffel_Tower["name"]))]
df_Arc_de_Triomphe["point name"] = ["Arc de Triomphe" for _ in range(len(df_Arc_de_Triomphe["name"]))]
df_Louvre_Museum["point name"] = ["Louvre Museum" for _ in range(len(df_Louvre_Museum["name"]))]
df_Sacré_Cœur["point name"] = ["Sacré-Cœur" for _ in range(len(df_Sacré_Cœur["name"]))]

In [32]:
full_df = pd.concat([df_Panthéon, 
               df_Eiffel_Tower, 
               df_Arc_de_Triomphe, 
               df_Louvre_Museum, 
               df_Sacré_Cœur])

In [33]:
full_df.to_csv("full_paris_data.csv")

In [34]:
full_df.head()

Unnamed: 0,name,categories,address,crossStreet,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,neighborhood,id,point name
0,Pantheon,Monument / Landmark,Piazza della Rotonda,Via Palombella,41.899133,12.476805,"[{'label': 'display', 'lat': 41.89913348171708...",57,186,IT,Roma,Lazio,Italia,"[Piazza della Rotonda (Via Palombella), 00186 ...",,4adcdac6f964a5202f5321e3,Panthéon
1,Fontana dei Quattro Fiumi,Fountain,Piazza Navona,,41.898967,12.473132,"[{'label': 'display', 'lat': 41.89896683723093...",309,186,IT,Roma,Lazio,Italia,"[Piazza Navona, 00186 Roma Lazio, Italia]",Parione,4adcdac9f964a520055421e3,Panthéon
2,Piazza Navona,Plaza,Piazza Navona,,41.899239,12.473184,,310,186,IT,Roma,Lazio,Italia,"[Piazza Navona, 00186 Roma Lazio, Italia]",Parione,4adcdac6f964a520285321e3,Panthéon
3,Piazza della Rotonda,Plaza,Piazza della Rotonda,,41.899253,12.476779,"[{'label': 'display', 'lat': 41.89925302095499...",71,186,IT,Roma,Lazio,Italia,"[Piazza della Rotonda, 00186 Roma Lazio, Italia]",Sant'Eustachio,4be05668358fef3b6858648a,Panthéon
4,Il Panino Ingegnoso,Sandwich Place,"Piazza di Pietra, 35",,41.899982,12.479195,"[{'label': 'display', 'lat': 41.89998191151652...",247,186,IT,Roma,Lazio,Italia,"[Piazza di Pietra, 35, 00186 Roma Lazio, Italia]",,5516b0b9498efa5147ff8430,Panthéon


#### Unique Categories

In [35]:
len(full_df["categories"].unique())

75

### One hot encoding

In [40]:
one_hot_paris = pd.get_dummies(full_df[["categories"]], prefix="", prefix_sep="")
one_hot_paris["point name"] = full_df["point name"]

In [41]:
one_hot_paris.head()

Unnamed: 0,Art Gallery,Art Museum,Asian Restaurant,Bakery,Basque Restaurant,Beer Bar,Bistro,Bookstore,Botanical Garden,Boutique,...,Supermarket,Tailor Shop,Temple,Thai Restaurant,Toy / Game Store,Trattoria/Osteria,Udon Restaurant,Vegetarian / Vegan Restaurant,Wine Bar,point name
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon


### Test Frequency of occurrence of each category

In [42]:
one_hot_paris_grouped = one_hot_paris.groupby("point name").mean().reset_index()
one_hot_paris_grouped

Unnamed: 0,point name,Art Gallery,Art Museum,Asian Restaurant,Bakery,Basque Restaurant,Beer Bar,Bistro,Bookstore,Botanical Garden,...,Spa,Supermarket,Tailor Shop,Temple,Thai Restaurant,Toy / Game Store,Trattoria/Osteria,Udon Restaurant,Vegetarian / Vegan Restaurant,Wine Bar
0,Arc de Triomphe,0.0,0.08,0.0,0.01,0.0,0.01,0.01,0.02,0.01,...,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0
1,Eiffel Tower,0.0,0.08,0.0,0.0,0.01,0.0,0.01,0.03,0.0,...,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.02
2,Louvre Museum,0.01,0.06,0.0,0.02,0.0,0.01,0.0,0.05,0.0,...,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.03
3,Panthéon,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.04
4,Sacré-Cœur,0.01,0.06,0.01,0.04,0.0,0.01,0.01,0.03,0.0,...,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.01


In [43]:
one_hot_paris_grouped.shape

(5, 76)

### Top 10 venues

In [44]:
number = 10

for point_name in one_hot_paris_grouped["point name"]:
    print(f"---------{point_name}---------")
    temp = one_hot_paris_grouped[one_hot_paris_grouped["point name"] == point_name].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(number))
    print('\n')

---------Arc de Triomphe---------
                 venue  freq
0                Hotel  0.16
1                Plaza  0.09
2           Art Museum  0.08
3               Garden  0.08
4    French Restaurant  0.06
5        Historic Site  0.05
6   Italian Restaurant  0.03
7             Boutique  0.03
8       Scenic Lookout  0.02
9  Indie Movie Theater  0.02


---------Eiffel Tower---------
               venue  freq
0              Hotel  0.14
1              Plaza  0.10
2         Art Museum  0.08
3             Garden  0.08
4  French Restaurant  0.07
5      Historic Site  0.04
6     Ice Cream Shop  0.03
7          Bookstore  0.03
8           Boutique  0.03
9           Fountain  0.02


---------Louvre Museum---------
               venue  freq
0              Plaza  0.13
1              Hotel  0.10
2         Art Museum  0.06
3          Bookstore  0.05
4      Historic Site  0.04
5  French Restaurant  0.04
6     Ice Cream Shop  0.04
7             Garden  0.04
8   Pedestrian Plaza  0.03
9           W

### Sort the Venues 

In [45]:
def return_top_ten_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [46]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['point name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
point_name_venues_sorted = pd.DataFrame(columns=columns)
point_name_venues_sorted['point name'] = one_hot_ny_grouped['point name']

for ind in np.arange(point_name_venues_sorted.shape[0]):
    point_name_venues_sorted.iloc[ind, 1:] = return_top_ten_venues(one_hot_ny_grouped.iloc[ind, :], num_top_venues)

point_name_venues_sorted.head()

Unnamed: 0,point name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Arc de Triomphe,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Italian Restaurant,Boutique,Scenic Lookout,Indie Movie Theater
1,Eiffel Tower,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Ice Cream Shop,Bookstore,Boutique,Fountain
2,Louvre Museum,Plaza,Hotel,Art Museum,Bookstore,Historic Site,French Restaurant,Ice Cream Shop,Garden,Pedestrian Plaza,Wine Bar
3,Panthéon,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Sandwich Place,Fountain,Church,Italian Restaurant,Hotel,Wine Bar
4,Sacré-Cœur,Hotel,Plaza,Art Museum,Historic Site,Bakery,Sandwich Place,Italian Restaurant,Park,Garden,Bookstore


### Cluster Points 

In [47]:
kclusters = 5
one_hot_paris_grouped_cluster = one_hot_paris_grouped.drop("point name", axis=1)

kmean = KMeans(n_clusters=kclusters, random_state=0).fit(one_hot_paris_grouped_cluster)
kmean.labels_

array([4, 2, 3, 1, 0], dtype=int32)

### Merge dataframe and clusters 

In [48]:
point_name_venues_sorted.insert(0, "Cluster Labels", kmean.labels_)
paris_york_merged = paris_points_with_lat_long
paris_york_merged = paris_york_merged.join(point_name_venues_sorted.set_index("point name"), on="point name")

In [49]:
paris_york_merged

Unnamed: 0,point name,lat,lng,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Panthéon,41.89861595,12.476833414483862,1,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Sandwich Place,Fountain,Church,Italian Restaurant,Hotel,Wine Bar
1,Eiffel Tower,48.8582602,2.2944990543196795,2,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Ice Cream Shop,Bookstore,Boutique,Fountain
2,Arc de Triomphe,48.8737791,2.295037226037673,4,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Italian Restaurant,Boutique,Scenic Lookout,Indie Movie Theater
3,Louvre Museum,48.8611473,2.33802768704666,3,Plaza,Hotel,Art Museum,Bookstore,Historic Site,French Restaurant,Ice Cream Shop,Garden,Pedestrian Plaza,Wine Bar
4,Sacré-Cœur,48.88680575,2.3430153448835087,0,Hotel,Plaza,Art Museum,Historic Site,Bakery,Sandwich Place,Italian Restaurant,Park,Garden,Bookstore


### Map

In [52]:
paris_latitude = "48.8737791"
paris_longitude = "2.295037226037673"

In [54]:
map_clusters = folium.Map(location=[paris_latitude, paris_longitude], zoom_start=11)


x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(paris_york_merged['lat'], paris_york_merged['lng'], paris_york_merged['point name'], paris_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

TypeError: must be real number, not str

In [55]:
paris_york_merged['Cluster Labels']

0    1
1    2
2    4
3    3
4    0
Name: Cluster Labels, dtype: int32

#### Cluster 1

In [56]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 0, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]

Unnamed: 0,point name,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Sacré-Cœur,0,Hotel,Plaza,Art Museum,Historic Site,Bakery,Sandwich Place,Italian Restaurant,Park,Garden,Bookstore


#### Cluster 2

In [57]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 1, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]

Unnamed: 0,point name,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Panthéon,1,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Sandwich Place,Fountain,Church,Italian Restaurant,Hotel,Wine Bar


#### Cluster 3

In [58]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 2, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]

Unnamed: 0,point name,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Eiffel Tower,2,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Ice Cream Shop,Bookstore,Boutique,Fountain


#### Cluster 4

In [59]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 3, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]

Unnamed: 0,point name,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Louvre Museum,3,Plaza,Hotel,Art Museum,Bookstore,Historic Site,French Restaurant,Ice Cream Shop,Garden,Pedestrian Plaza,Wine Bar


#### Cluster 5

In [60]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 4, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]

Unnamed: 0,point name,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Arc de Triomphe,4,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Italian Restaurant,Boutique,Scenic Lookout,Indie Movie Theater
