### Imports 

In [None]:
import requests
import pandas as pd
from geopy.geocoders import Nominatim

import numpy as np
from sklearn.cluster import KMeans

import matplotlib.cm as cm
import matplotlib.colors as colors

### Getting latitude, longitude

In [None]:
addresses = ["Panthéon", "Eiffel Tower", 
            "Arc de Triomphe", "Louvre Museum", 
            "Sacré-Cœur"]
def get_lat_long(address):
    geolocator = Nominatim(user_agent="foursquare_agent")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print(address, latitude, longitude)
    return latitude, longitude

In [None]:
for i in addresses:
    get_lat_long(i)

### Five points of NY

In [None]:
Panthéon = ["Panthéon","41.89861595", "12.476833414483862"]
Eiffel_Tower = ["Eiffel Tower","48.858260200000004", "2.2944990543196795"]
Arc_de_Triomphe = ["Arc de Triomphe","48.8737791", "2.295037226037673"]
Louvre_Museum = ["Louvre Museum","48.8611473", "2.33802768704666"]
Sacré_Cœur = ["Sacré-Cœur","48.88680575", "2.3430153448835087"]

In [None]:
paris_points_with_lat_long = pd.DataFrame([Panthéon, Eiffel_Tower, 
                                              Arc_de_Triomphe, Louvre_Museum, Sacré_Cœur], 
                                             columns=["Point name","lat", "lng"])
paris_points_with_lat_long

### Api setup

In [None]:
def saving_data(area_name, latitude, longitude):
    url = f"https://api.foursquare.com/v2/venues/explore?client_id=N40W0THAJDZYKLHLHWRDBU01LIMNXBMXZ03X5ZOGZSRVMLSR&client_secret=GZNJSGG3SLAYGMN4BJCC5VIOFBT2EW10OUF1V20ZO4P3V2YT&ll={latitude},{longitude}&v=20180604&radius=30000&limit=100"
    results = requests.get(url).json()
    dataframe = pd.json_normalize(results["response"]["groups"][0]["items"])
    filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
    dataframe_filtered = dataframe.loc[:, filtered_columns]
    dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1) # category for each row
    dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns] # clean columns
    
    dataframe_filtered.to_pickle(f"{area_name}.pkl") # save to pickle so that wont call the api over and over again
    return dataframe_filtered

### Get the Categories

In [None]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Collect the data from the API and merge 

In [None]:
df_Panthéon = saving_data("Panthéon", Panthéon[1], Panthéon[2])
# df_Eiffel_Tower = saving_data(*Eiffel_Tower)
# df_Arc_de_Triomphe = saving_data(*Arc_de_Triomphe)
# df_Louvre_Museum = saving_data(*Louvre_Museum)
# df_Sacré_Cœur = saving_data(*Sacré_Cœur)

### Add Column with the Value with the name of NY point

In [None]:
df_Panthéon["Point name"] = ["Panthéon" for _ in range(len(df_Panthéon["name"]))]
df_Eiffel_Tower["Point name"] =["Eiffel Tower" for _ in range(len(df_Eiffel_Tower["name"]))]
df_Arc_de_Triomphe["Point name"] = ["Arc de Triomphe" for _ in range(len(df_Arc_de_Triomphe["name"]))]
df_Louvre_Museum["Point name"] = ["Louvre Museum" for _ in range(len(df_Louvre_Museum["name"]))]
df_Sacré_Cœur["Point name"] = ["Sacré-Cœur" for _ in range(len(df_Sacré_Cœur["name"]))]

In [None]:
full_df = pd.concat([df_Panthéon, 
               df_Eiffel_Tower, 
               df_Arc_de_Triomphe, 
               df_Louvre_Museum, 
               df_Sacré_Cœur])

In [None]:
full_df.to_csv("full_paris_data.csv")

In [None]:
full_df.head()

#### Unique Categories

In [None]:
len(full_df["categories"].unique())

### One hot encoding

In [None]:
one_hot_paris = pd.get_dummies(full_df[["categories"]], prefix="", prefix_sep="")
one_hot_paris["Point name"] = full_df["Point name"]

In [None]:
one_hot_paris.head()

### Test Frequency of occurrence of each category

In [None]:
one_hot_paris_grouped = one_hot_paris.groupby("Point name").mean().reset_index()
one_hot_paris_grouped

In [None]:
one_hot_paris_grouped.shape

### Top 10 venues

In [None]:
number = 10

for point_name in one_hot_paris_grouped["Point name"]:
    print(f"---------{point_name}---------")
    temp = one_hot_paris_grouped[one_hot_paris_grouped["Point name"] == point_name].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(number))
    print('\n')

### Sort the Venues 

In [None]:
def return_top_ten_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [None]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['point name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
point_name_venues_sorted = pd.DataFrame(columns=columns)
point_name_venues_sorted['Point name'] = one_hot_ny_grouped['Point name']

for ind in np.arange(point_name_venues_sorted.shape[0]):
    point_name_venues_sorted.iloc[ind, 1:] = return_top_ten_venues(one_hot_ny_grouped.iloc[ind, :], num_top_venues)

point_name_venues_sorted.head()

### Cluster Points 

In [None]:
kclusters = 5
one_hot_paris_grouped_cluster = one_hot_paris_grouped.drop("Point name", axis=1)

kmean = KMeans(n_clusters=kclusters, random_state=0).fit(one_hot_paris_grouped_cluster)
kmean.labels_

### Merge dataframe and clusters 

In [None]:
point_name_venues_sorted.insert(0, "Cluster Labels", kmean.labels_)
paris_york_merged = paris_points_with_lat_long
paris_york_merged = paris_york_merged.join(point_name_venues_sorted.set_index("Point name"), on="Point name")

In [None]:
paris_york_merged

In [None]:
paris_york_merged['Cluster Labels']

#### Cluster 1

In [None]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 0, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]

#### Cluster 2

In [None]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 1, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]

#### Cluster 3

In [None]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 2, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]

#### Cluster 4

In [None]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 3, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]

#### Cluster 5

In [None]:
paris_york_merged.loc[paris_york_merged['Cluster Labels'] == 4, paris_york_merged.columns[[0] + list(range(3, paris_york_merged.shape[1]))]]