#### Import Libs

In [1]:
import requests
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import folium

### Loading the data from the Data source. 
All data is kept in data folder. Link: https://github.com/tazV2/Applied-Data-Science-Capstone/tree/main/data
If you are interested how did I get data from Foursquare API then please check:
1. For Manhattan New York area https://github.com/tazV2/Applied-Data-Science-Capstone/blob/main/week_4/new_york.ipynb
2. For Paris https://github.com/tazV2/Applied-Data-Science-Capstone/blob/main/week_4/paris.ipynb

I have kept detailed codes there about how to collect data. I have changed the Foursquare client secret so if you want to run then please change to your client id and secret. 

In [2]:
nyc_full_data = pd.read_csv("/home/taz/PycharmProjects/Applied-Data-Science-Capstone/data/full_nyc_data.csv")
nyc_full_data.drop_duplicates(subset=["name"], inplace=True)
nyc_full_data.rename(columns={"point name": "Point name"}, inplace=True)

paris_full_data = pd.read_csv("/home/taz/PycharmProjects/Applied-Data-Science-Capstone/data/full_paris_data.csv")
paris_full_data.drop_duplicates(subset=["name"], inplace=True)
paris_full_data.rename(columns={"point name":"Point name"}, inplace=True)

### Battle Begins 
#### Unique Categories

In [3]:
f"Manhattan New York has {len(nyc_full_data['categories'].unique())} categories"

'Manhattan New York has 86 categories'

In [4]:
f"Paris center has {len(paris_full_data['categories'].unique())} categories"

'Paris center has 75 categories'

#### One hot encoding 

In [5]:
nyc_one_hot_encoding = pd.get_dummies(nyc_full_data[["categories"]], prefix="", prefix_sep="")

paris_one_hot_encoding = pd.get_dummies(paris_full_data[["categories"]], prefix="", prefix_sep="")

In [6]:
nyc_one_hot_encoding["Point name"] = nyc_full_data["Point name"]
nyc_one_hot_encoding.head()

Unnamed: 0,American Restaurant,Art Gallery,Art Museum,Athletics & Sports,Bakery,Bar,Beach,Beer Store,Bookstore,Botanical Garden,...,Theater,Track,Trail,Udon Restaurant,Volleyball Court,Waterfront,Wine Bar,Wine Shop,Yoga Studio,Point name
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,One World Trade Center
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,One World Trade Center
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,One World Trade Center
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,One World Trade Center
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,One World Trade Center


In [7]:
paris_one_hot_encoding["Point name"] = paris_full_data["Point name"]
paris_one_hot_encoding.head()

Unnamed: 0,Art Gallery,Art Museum,Asian Restaurant,Bakery,Basque Restaurant,Beer Bar,Bistro,Bookstore,Botanical Garden,Boutique,...,Supermarket,Tailor Shop,Temple,Thai Restaurant,Toy / Game Store,Trattoria/Osteria,Udon Restaurant,Vegetarian / Vegan Restaurant,Wine Bar,Point name
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Panthéon


#### Test Frequency of occurrence of each category

#### Manhattan New York

In [8]:
nyc_one_hot_grouped = nyc_one_hot_encoding.groupby("Point name").mean().reset_index()
nyc_one_hot_grouped.head()

Unnamed: 0,Point name,American Restaurant,Art Gallery,Art Museum,Athletics & Sports,Bakery,Bar,Beach,Beer Store,Bookstore,...,Thai Restaurant,Theater,Track,Trail,Udon Restaurant,Volleyball Court,Waterfront,Wine Bar,Wine Shop,Yoga Studio
0,Central park zoo new york,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.043478,0.043478,...,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.043478
1,Empire State Building,0.0,0.0,0.025,0.0,0.05,0.0,0.0,0.0,0.025,...,0.0,0.075,0.025,0.0,0.0,0.0,0.0,0.0,0.025,0.0
2,Museum of the City of New York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0
3,One World Trade Center,0.01,0.03,0.0,0.01,0.03,0.01,0.01,0.0,0.04,...,0.04,0.01,0.01,0.01,0.01,0.01,0.0,0.01,0.02,0.01
4,Yankee Stadium,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Paris

In [9]:
paris_one_hot_grouped = paris_one_hot_encoding.groupby("Point name").mean().reset_index()
paris_one_hot_grouped

Unnamed: 0,Point name,Art Gallery,Art Museum,Asian Restaurant,Bakery,Basque Restaurant,Beer Bar,Bistro,Bookstore,Botanical Garden,...,Spa,Supermarket,Tailor Shop,Temple,Thai Restaurant,Toy / Game Store,Trattoria/Osteria,Udon Restaurant,Vegetarian / Vegan Restaurant,Wine Bar
0,Arc de Triomphe,0.0,0.0,0.0,0.058824,0.0,0.058824,0.058824,0.0,0.058824,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Eiffel Tower,0.0,0.082474,0.0,0.0,0.010309,0.0,0.010309,0.030928,0.0,...,0.010309,0.0,0.020619,0.0,0.0,0.0,0.0,0.010309,0.0,0.020619
2,Louvre Museum,0.03125,0.03125,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,...,0.0,0.03125,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.03125
3,Panthéon,0.0,0.020408,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.020408,0.0,0.010204,0.0,0.0,0.0,0.040816
4,Sacré-Cœur,0.0,0.055556,0.055556,0.111111,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.055556,0.0,0.055556,0.0,0.055556,0.0


#### Lets check top five places/categories of Each Places

##### New York(Manhattan area)

In [10]:
number = 5

for point_name in nyc_one_hot_grouped["Point name"]:
    print(f"---------{point_name}---------")
    temp = nyc_one_hot_grouped[nyc_one_hot_grouped["Point name"] == point_name].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(number))
    print('\n')

---------Central park zoo new york---------
         venue  freq
0         Park  0.17
1  Pizza Place  0.09
2  Yoga Studio  0.04
3      Butcher  0.04
4        Plaza  0.04


---------Empire State Building---------
          venue  freq
0          Park  0.12
1           Gym  0.08
2       Theater  0.08
3  Concert Hall  0.05
4        Bakery  0.05


---------Museum of the City of New York---------
                venue  freq
0  Italian Restaurant   0.2
1             Butcher   0.2
2               Trail   0.2
3          Food Truck   0.2
4      Sandwich Place   0.2


---------One World Trade Center---------
             venue  freq
0             Park  0.15
1   Scenic Lookout  0.05
2   Ice Cream Shop  0.05
3        Bookstore  0.04
4  Thai Restaurant  0.04


---------Yankee Stadium---------
            venue  freq
0          Museum  0.22
1  Tennis Stadium  0.22
2          Garden  0.11
3            Park  0.11
4     Pizza Place  0.11




##### Center Of Paris

In [11]:
number = 5

for point_name in paris_one_hot_grouped["Point name"]:
    print(f"---------{point_name}---------")
    temp = paris_one_hot_grouped[paris_one_hot_grouped["Point name"] == point_name].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(number))
    print('\n')

---------Arc de Triomphe---------
                 venue  freq
0                Hotel  0.18
1  Indie Movie Theater  0.12
2        Historic Site  0.06
3       Breakfast Spot  0.06
4                Plaza  0.06


---------Eiffel Tower---------
               venue  freq
0              Hotel  0.14
1              Plaza  0.10
2         Art Museum  0.08
3             Garden  0.08
4  French Restaurant  0.07


---------Louvre Museum---------
            venue  freq
0           Plaza  0.12
1     Coffee Shop  0.06
2          Bakery  0.06
3  Sandwich Place  0.06
4       Bookstore  0.06


---------Panthéon---------
                 venue  freq
0                Plaza  0.14
1        Historic Site  0.09
2       Ice Cream Shop  0.08
3  Monument / Landmark  0.07
4       Sandwich Place  0.06


---------Sacré-Cœur---------
              venue  freq
0            Bakery  0.11
1              Park  0.11
2        Art Museum  0.06
3              Café  0.06
4  Asian Restaurant  0.06




Above we can see the different types of categories appeared in different points. Two cities are truly unique.

#### Let's get the top ten venues of each point so that we can navigate them easily

In [12]:
def return_top_ten_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Manhattan Area, New York

In [13]:
num_top_venues = 10

indicators = ["st", "nd", "rd"]

# create columns according to number of top venues
columns = ["Point name"]
for ind in np.arange(num_top_venues):
    try:
        columns.append("{}{} Most Common Venue".format(ind+1, indicators[ind]))
    except:
        columns.append("{}th Most Common Venue".format(ind+1))

# create a new dataframe
nyc_name_sorted = pd.DataFrame(columns=columns)
nyc_name_sorted["Point name"] = nyc_one_hot_grouped["Point name"]

for ind in np.arange(nyc_name_sorted.shape[0]):
    nyc_name_sorted.iloc[ind, 1:] = return_top_ten_venues(nyc_one_hot_grouped.iloc[ind, :], num_top_venues)

nyc_name_sorted.head()

Unnamed: 0,Point name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central park zoo new york,Park,Pizza Place,Yoga Studio,Butcher,Plaza,Gym,Reservoir,Garden,Flower Shop,Field
1,Empire State Building,Park,Gym,Theater,Concert Hall,Bakery,Plaza,State / Provincial Park,Dance Studio,Salon / Barbershop,Climbing Gym
2,Museum of the City of New York,Italian Restaurant,Butcher,Trail,Food Truck,Sandwich Place,Restaurant,Reservoir,Plaza,Playground,Pizza Place
3,One World Trade Center,Park,Scenic Lookout,Ice Cream Shop,Bookstore,Thai Restaurant,Art Gallery,Bakery,Wine Shop,Seafood Restaurant,Music Venue
4,Yankee Stadium,Museum,Tennis Stadium,Garden,Park,Pizza Place,Gym,Botanical Garden,Restaurant,Reservoir,Plaza


#### Paris center area

In [14]:
num_top_venues = 10

indicators = ["st", "nd", "rd"]

# create columns according to number of top venues
columns = ["Point name"]
for ind in np.arange(num_top_venues):
    try:
        columns.append("{}{} Most Common Venue".format(ind+1, indicators[ind]))
    except:
        columns.append("{}th Most Common Venue".format(ind+1))

# create a new dataframe
paris_sorted = pd.DataFrame(columns=columns)
paris_sorted["Point name"] = paris_one_hot_grouped["Point name"]

for ind in np.arange(nyc_name_sorted.shape[0]):
    paris_sorted.iloc[ind, 1:] = return_top_ten_venues(paris_one_hot_grouped.iloc[ind, :], num_top_venues)

paris_sorted.head()

Unnamed: 0,Point name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Arc de Triomphe,Hotel,Indie Movie Theater,Historic Site,Breakfast Spot,Plaza,Scenic Lookout,French Restaurant,Gym / Fitness Center,Corsican Restaurant,Botanical Garden
1,Eiffel Tower,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Bookstore,Fountain,Tailor Shop,Pedestrian Plaza
2,Louvre Museum,Plaza,Coffee Shop,Bakery,Sandwich Place,Bookstore,Ice Cream Shop,Concert Hall,Farmers Market,French Restaurant,Furniture / Home Store
3,Panthéon,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Sandwich Place,Fountain,Church,Italian Restaurant,Hotel,Wine Bar
4,Sacré-Cœur,Bakery,Park,Art Museum,Café,Asian Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Trattoria/Osteria,Canal,Thai Restaurant


In [15]:
#### Merge both cities top ten places 
df_merged_sorted = pd.concat([nyc_name_sorted, paris_sorted])
df_merged_sorted.set_index("Point name")

Unnamed: 0_level_0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Point name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Central park zoo new york,Park,Pizza Place,Yoga Studio,Butcher,Plaza,Gym,Reservoir,Garden,Flower Shop,Field
Empire State Building,Park,Gym,Theater,Concert Hall,Bakery,Plaza,State / Provincial Park,Dance Studio,Salon / Barbershop,Climbing Gym
Museum of the City of New York,Italian Restaurant,Butcher,Trail,Food Truck,Sandwich Place,Restaurant,Reservoir,Plaza,Playground,Pizza Place
One World Trade Center,Park,Scenic Lookout,Ice Cream Shop,Bookstore,Thai Restaurant,Art Gallery,Bakery,Wine Shop,Seafood Restaurant,Music Venue
Yankee Stadium,Museum,Tennis Stadium,Garden,Park,Pizza Place,Gym,Botanical Garden,Restaurant,Reservoir,Plaza
Arc de Triomphe,Hotel,Indie Movie Theater,Historic Site,Breakfast Spot,Plaza,Scenic Lookout,French Restaurant,Gym / Fitness Center,Corsican Restaurant,Botanical Garden
Eiffel Tower,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Bookstore,Fountain,Tailor Shop,Pedestrian Plaza
Louvre Museum,Plaza,Coffee Shop,Bakery,Sandwich Place,Bookstore,Ice Cream Shop,Concert Hall,Farmers Market,French Restaurant,Furniture / Home Store
Panthéon,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Sandwich Place,Fountain,Church,Italian Restaurant,Hotel,Wine Bar
Sacré-Cœur,Bakery,Park,Art Museum,Café,Asian Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Trattoria/Osteria,Canal,Thai Restaurant


#### How many Pizza shops around One World Trade Center?

In [16]:
read_pizza_data_for_trade_center = pd.read_csv("/home/taz/PycharmProjects/Applied-Data-Science-Capstone/data/manhattan_pizza.csv")

In [17]:
def mapping_func_with_cat(city_lat, city_long, dataFrame, cat):
    mapping = folium.Map(location=[city_lat, city_long], zoom_start=12)
    for lat, lon, poi in zip(dataFrame["lat"], dataFrame["lng"], dataFrame[dataFrame["categories"] == cat]["name"]):
        label = folium.Popup(str(poi) , parse_html=True)
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=["green"],
            fill=True,
            fill_color="green",
            fill_opacity=0.7).add_to(mapping)
    return mapping

In [37]:
mapping_func_with_cat(40.71227903203283, -74.01331115542504, read_pizza_data_for_trade_center, "Pizza Place")

### Checking particular Venue


#### For example in New York
Here visitor Looking for Threater

In [19]:
theater = nyc_full_data[nyc_full_data["categories"] == "Theater"]

In [20]:
f"Total Theater in New York Manhattan area {theater.shape[0]}"

'Total Theater in New York Manhattan area 5'

#### Theaters are located:

In [21]:
new_york_lat = 40.75098648862609
new_york_long = -73.9803052644599
def mapping_func_with_cat(city_lat, city_long, dataFrame, cat):
    mapping = folium.Map(location=[city_lat, city_long], zoom_start=12)
    for lat, lon, poi in zip(dataFrame["lat"], dataFrame["lng"], dataFrame[dataFrame["categories"] == cat]["name"]):
        label = folium.Popup(str(poi) , parse_html=True)
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=["green"],
            fill=True,
            fill_color="green",
            fill_opacity=0.7).add_to(mapping)
    return mapping

mapping_func_with_cat(new_york_lat, new_york_long, theater, "Theater")

Above we can see the locations of the theaters in the Manhattan area. Same function can be used for other categories as well as for Paris.

#### Lets Explore Paris
Lets see how many Art gallary in center of Paris

In [22]:
art_gal = paris_full_data[paris_full_data["categories"]== "Art Gallery"]

In [23]:
f"Total {art_gal.shape[0]} art gallery in Center of Paris"

'Total 1 art gallery in Center of Paris'

##### What? Only one Art Gallery?

Yes, because they have many Art Museum

In [24]:
art_mus = paris_full_data[paris_full_data["categories"] == "Art Museum"]
f"Total {art_mus.shape[0]} art museum in Center of Paris"

'Total 12 art museum in Center of Paris'

In [38]:
paris_lat = 48.86245165441227 
paris_long = 2.3281755564078788

mapping_func_with_cat(paris_lat, paris_long, art_mus, "Art Museum")

## KMeans to Find Similarities and Dissimilarities

In [26]:
grouped_both_one_hot = pd.concat([nyc_one_hot_grouped, paris_one_hot_grouped])

In [27]:
grouped_both_one_hot.fillna(0, inplace=True)

In [28]:
grouped_both_one_hot.set_index("Point name", inplace=True)

In [29]:
grouped_both_one_hot

Unnamed: 0_level_0,American Restaurant,Art Gallery,Art Museum,Athletics & Sports,Bakery,Bar,Beach,Beer Store,Bookstore,Botanical Garden,...,Pedestrian Plaza,Peruvian Restaurant,Pub,Roof Deck,Supermarket,Tailor Shop,Temple,Toy / Game Store,Trattoria/Osteria,Vegetarian / Vegan Restaurant
Point name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Central park zoo new york,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.043478,0.043478,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Empire State Building,0.0,0.0,0.025,0.0,0.05,0.0,0.0,0.0,0.025,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Museum of the City of New York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
One World Trade Center,0.01,0.03,0.0,0.01,0.03,0.01,0.01,0.0,0.04,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Yankee Stadium,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Arc de Triomphe,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Eiffel Tower,0.0,0.0,0.082474,0.0,0.0,0.0,0.0,0.0,0.030928,0.0,...,0.020619,0.010309,0.0,0.010309,0.0,0.020619,0.0,0.0,0.0,0.0
Louvre Museum,0.0,0.03125,0.03125,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,...,0.03125,0.0,0.0,0.0,0.03125,0.0,0.0,0.03125,0.0,0.0
Panthéon,0.0,0.0,0.020408,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.010204,0.0,0.0,0.0,0.020408,0.010204,0.0,0.0
Sacré-Cœur,0.0,0.0,0.055556,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.055556


In [30]:
kclusters = 4

# k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_both_one_hot)

# each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 0, 3, 2, 1, 1, 1, 1, 3], dtype=int32)

In [31]:
df_merged_sorted.insert(0, "Cluster Labels", kmeans.labels_)

In [32]:
df_merged_sorted.set_index("Cluster Labels")

Unnamed: 0_level_0,Point name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3,Central park zoo new york,Park,Pizza Place,Yoga Studio,Butcher,Plaza,Gym,Reservoir,Garden,Flower Shop,Field
3,Empire State Building,Park,Gym,Theater,Concert Hall,Bakery,Plaza,State / Provincial Park,Dance Studio,Salon / Barbershop,Climbing Gym
0,Museum of the City of New York,Italian Restaurant,Butcher,Trail,Food Truck,Sandwich Place,Restaurant,Reservoir,Plaza,Playground,Pizza Place
3,One World Trade Center,Park,Scenic Lookout,Ice Cream Shop,Bookstore,Thai Restaurant,Art Gallery,Bakery,Wine Shop,Seafood Restaurant,Music Venue
2,Yankee Stadium,Museum,Tennis Stadium,Garden,Park,Pizza Place,Gym,Botanical Garden,Restaurant,Reservoir,Plaza
1,Arc de Triomphe,Hotel,Indie Movie Theater,Historic Site,Breakfast Spot,Plaza,Scenic Lookout,French Restaurant,Gym / Fitness Center,Corsican Restaurant,Botanical Garden
1,Eiffel Tower,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Bookstore,Fountain,Tailor Shop,Pedestrian Plaza
1,Louvre Museum,Plaza,Coffee Shop,Bakery,Sandwich Place,Bookstore,Ice Cream Shop,Concert Hall,Farmers Market,French Restaurant,Furniture / Home Store
1,Panthéon,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Sandwich Place,Fountain,Church,Italian Restaurant,Hotel,Wine Bar
3,Sacré-Cœur,Bakery,Park,Art Museum,Café,Asian Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Trattoria/Osteria,Canal,Thai Restaurant


In [33]:
df_merged_sorted.loc[df_merged_sorted["Cluster Labels"] == 0, df_merged_sorted.columns[[1] + list(range(2, df_merged_sorted.shape[1]))]]

Unnamed: 0,Point name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Museum of the City of New York,Italian Restaurant,Butcher,Trail,Food Truck,Sandwich Place,Restaurant,Reservoir,Plaza,Playground,Pizza Place


In [34]:
df_merged_sorted.loc[df_merged_sorted["Cluster Labels"] == 1, df_merged_sorted.columns[[1] + list(range(2, df_merged_sorted.shape[1]))]]

Unnamed: 0,Point name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Arc de Triomphe,Hotel,Indie Movie Theater,Historic Site,Breakfast Spot,Plaza,Scenic Lookout,French Restaurant,Gym / Fitness Center,Corsican Restaurant,Botanical Garden
1,Eiffel Tower,Hotel,Plaza,Art Museum,Garden,French Restaurant,Historic Site,Bookstore,Fountain,Tailor Shop,Pedestrian Plaza
2,Louvre Museum,Plaza,Coffee Shop,Bakery,Sandwich Place,Bookstore,Ice Cream Shop,Concert Hall,Farmers Market,French Restaurant,Furniture / Home Store
3,Panthéon,Plaza,Historic Site,Ice Cream Shop,Monument / Landmark,Sandwich Place,Fountain,Church,Italian Restaurant,Hotel,Wine Bar


In [35]:
df_merged_sorted.loc[df_merged_sorted["Cluster Labels"] == 2, df_merged_sorted.columns[[1] + list(range(2, df_merged_sorted.shape[1]))]]

Unnamed: 0,Point name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Yankee Stadium,Museum,Tennis Stadium,Garden,Park,Pizza Place,Gym,Botanical Garden,Restaurant,Reservoir,Plaza


In [36]:
df_merged_sorted.loc[df_merged_sorted["Cluster Labels"] == 3, df_merged_sorted.columns[[1] + list(range(2, df_merged_sorted.shape[1]))]]

Unnamed: 0,Point name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central park zoo new york,Park,Pizza Place,Yoga Studio,Butcher,Plaza,Gym,Reservoir,Garden,Flower Shop,Field
1,Empire State Building,Park,Gym,Theater,Concert Hall,Bakery,Plaza,State / Provincial Park,Dance Studio,Salon / Barbershop,Climbing Gym
3,One World Trade Center,Park,Scenic Lookout,Ice Cream Shop,Bookstore,Thai Restaurant,Art Gallery,Bakery,Wine Shop,Seafood Restaurant,Music Venue
4,Sacré-Cœur,Bakery,Park,Art Museum,Café,Asian Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Trattoria/Osteria,Canal,Thai Restaurant
