### Imports 

In [5]:
import requests
import pandas as pd
from geopy.geocoders import Nominatim

import numpy as np
from sklearn.cluster import KMeans

import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

### Getting latitude, longitude

In [None]:
addresses = ["One World Trade Center", "Empire State Building", 
            "Central park zoo new york", "Museum of the City of New York", 
            "Yankee Stadium"]
def get_lat_long(address):
    geolocator = Nominatim(user_agent="foursquare_agent")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print(address, latitude, longitude)
    return latitude, longitude

In [None]:
for i in addresses:
    get_lat_long(i)

### Five points of NY

In [None]:
one_world_trade_center = ["One World Trade Center","40.7130186", "-74.01317859995396"]
empire_state_building = ["Empire State Building","40.748428399999995", "-73.98565461987332"]
central_park_zoo = ["Central park zoo new york","40.7676005", "-73.97184547517396"]
museum_city_of_new_york = ["Museum of the City of New York","40.792516250000006", "-73.95180912958764"]
yankee_stadium = ["Yankee Stadium","40.82958275", "-73.92652118491901"]

In [None]:


new_york_points_with_lat_long = pd.DataFrame([one_world_trade_center, empire_state_building, 
                                              central_park_zoo, museum_city_of_new_york, yankee_stadium], 
                                             columns=["Point name","lat", "lng"])
new_york_points_with_lat_long

### Api setup

In [None]:
def saving_data(area_name, latitude, longitude):
    url = f"https://api.foursquare.com/v2/venues/explore?client_id=N40W0THAJDZYKLHLHWRDBU01LIMNXBMXZ03X5ZOGZSRVMLSR&client_secret=PRO2XOOK3SLUG52RR0LM0AZX5RLRSAHIQ2NJWSLXMRVDBEGJ&ll={latitude},{longitude}&v=20180604&radius=30000&limit=100"
    results = requests.get(url).json()
    dataframe = pd.json_normalize(results["response"]["groups"][0]["items"])
    filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
    dataframe_filtered = dataframe.loc[:, filtered_columns]
    dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1) # category for each row
    dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns] # clean columns
    
    dataframe_filtered.to_pickle(f"{area_name}.pkl") # save to pickle so that wont call the api over and over again
    return dataframe_filtered

### Get the Categories

In [7]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Collect the data from the API and merge 

In [None]:
df_one_world_trade_center = saving_data(*one_world_trade_center)
df_empire_state_building = saving_data(*empire_state_building)
df_central_park_zoo = saving_data(*central_park_zoo)
df_museum_city_of_new_york = saving_data(*museum_city_of_new_york)
df_yankee_stadium = saving_data(*yankee_stadium)

### Add Column with the Value with the name of NY point

In [None]:
df_one_world_trade_center["Point name"] = ["One World Trade Center" for _ in range(len(df_one_world_trade_center["name"]))]
df_empire_state_building["Point name"] =["Empire State Building" for _ in range(len(df_empire_state_building["name"]))]
df_central_park_zoo["Point name"] = ["Central park zoo new york" for _ in range(len(df_central_park_zoo["name"]))]
df_museum_city_of_new_york["Point name"] = ["Museum of the City of New York" for _ in range(len(df_museum_city_of_new_york["name"]))]
df_yankee_stadium["Point name"] = ["Yankee Stadium" for _ in range(len(df_yankee_stadium["name"]))]

In [None]:
all_df_data = [df_one_world_trade_center, 
               df_empire_state_building, 
               df_central_park_zoo, 
               df_museum_city_of_new_york, 
               df_yankee_stadium]

full_df = pd.concat([df_one_world_trade_center, 
               df_empire_state_building, 
               df_central_park_zoo, 
               df_museum_city_of_new_york, 
               df_yankee_stadium])

In [None]:
full_df.to_csv("full_nyc_data.csv")

In [None]:
fulll_df = pd.read_csv("/home/taz/PycharmProjects/Applied-Data-Science-Capstone/week_4/full_nyc_data.csv")
full_df.head()

#### Unique Categories

In [None]:
len(full_df["categories"].unique())

### One hot encoding

In [None]:
one_hot_ny = pd.get_dummies(full_df[["categories"]], prefix="", prefix_sep="")
one_hot_ny["Point name"] = full_df["Point name"]

In [None]:
one_hot_ny.head()

### Test Frequency of occurrence of each category

In [None]:
one_hot_ny_grouped = one_hot_ny.groupby("Point name").mean().reset_index()
one_hot_ny_grouped

In [None]:
one_hot_ny_grouped.shape

### Top 10 venues

In [None]:
number = 10

for point_name in one_hot_ny_grouped["Point name"]:
    print(f"---------{point_name}---------")
    temp = one_hot_ny_grouped[one_hot_ny_grouped["Point name"] == point_name].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(number))
    print('\n')

### Sort the Venues 

In [None]:
def return_top_ten_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [None]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Point name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
point_name_venues_sorted = pd.DataFrame(columns=columns)
point_name_venues_sorted['Point name'] = one_hot_ny_grouped['Point name']

for ind in np.arange(point_name_venues_sorted.shape[0]):
    point_name_venues_sorted.iloc[ind, 1:] = return_top_ten_venues(one_hot_ny_grouped.iloc[ind, :], num_top_venues)

point_name_venues_sorted.head()

### Cluster Points 

In [None]:
kclusters = 5
one_hot_ny_grouped_cluster = one_hot_ny_grouped.drop("Point name", axis=1)

kmean = KMeans(n_clusters=kclusters, random_state=0).fit(one_hot_ny_grouped_cluster)
kmean.labels_

### Merge dataframe and clusters 

In [None]:
point_name_venues_sorted.insert(0, "Cluster Labels", kmean.labels_)
new_york_merged = new_york_points_with_lat_long
new_york_merged = new_york_merged.join(point_name_venues_sorted.set_index("Point name"), on="Point name")



In [None]:
new_york_merged

### Map

In [None]:
new_york_latitude = "40.77359725490544"
new_work_longitude = "-73.96332140842885"

In [None]:
map_clusters = folium.Map(location=[new_york_latitude, new_work_longitude], zoom_start=11)


x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(new_york_merged['lat'], new_york_merged['lng'], new_york_merged['point name'], new_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
new_york_merged['Cluster Labels']

#### Cluster 1

In [None]:
new_york_merged.loc[new_york_merged['Cluster Labels'] == 0, new_york_merged.columns[[0] + list(range(3, new_york_merged.shape[1]))]]

#### Cluster 2

In [None]:
new_york_merged.loc[new_york_merged['Cluster Labels'] == 1, new_york_merged.columns[[0] + list(range(3, new_york_merged.shape[1]))]]

#### Cluster 3

In [None]:
new_york_merged.loc[new_york_merged['Cluster Labels'] == 2, new_york_merged.columns[[0] + list(range(3, new_york_merged.shape[1]))]]

#### Cluster 4

In [None]:
new_york_merged.loc[new_york_merged['Cluster Labels'] == 3, new_york_merged.columns[[0] + list(range(3, new_york_merged.shape[1]))]]

#### Cluster 5

In [None]:
new_york_merged.loc[new_york_merged['Cluster Labels'] == 4, new_york_merged.columns[[0] + list(range(3, new_york_merged.shape[1]))]]

Mapping 

In [None]:
park = full_df[full_df["categories"] == "Park"]

In [None]:
park.head()

In [None]:
new_york_latitude = 40.77359725490544 # 40.77359725490544 -73.96332140842885
new_work_longitude = -73.96332140842885

park_map = folium.Map(location=[new_york_latitude, new_work_longitude], zoom_start=11)
for lat, lon, poi in zip(park["lat"], park["lng"], park["name"]):
    label = folium.Popup(str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=["green"],
        fill=True,
        fill_color="green",
        fill_opacity=0.7).add_to(park_map)
park_map

In [12]:
def mapping_func_with_cat(city_lat, city_long, dataFrame, cat):
    mapping = folium.Map(location=[city_lat, city_long], zoom_start=12)
    for lat, lon, poi in zip(dataFrame["lat"], dataFrame["lng"], dataFrame[dataFrame["categories"] == cat]["name"]):
        label = folium.Popup(str(poi) , parse_html=True)
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=["green"],
            fill=True,
            fill_color="green",
            fill_opacity=0.7).add_to(mapping)
    return mapping

In [None]:
mapping_func_with_cat(40.77359725490544, -73.96332140842885, full_df, "Bookstore")

In [None]:
museume = new_york_merged.loc[new_york_merged['Cluster Labels'] == 4, new_york_merged.columns[[0] + list(range(3, new_york_merged.shape[1]))]]

In [None]:
for i in museume.iloc[0][2:]:
    print(i)

In [None]:
city_lat = new_york_latitude
city_long = new_work_longitude

for i in museume.iloc[0][2:]:
    
    temp = df_museum_city_of_new_york[['lat', 'lng', 'categories', 'name']][df_museum_city_of_new_york["categories"] == i]
    pic = mapping_func_with_cat(city_lat, city_long, temp, i)
    pic.save(f"{temp['categories']}")

In [None]:
map_clusters = folium.Map(location=[new_york_latitude, new_work_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
park = full_df[full_df["categories"] == "Park"]

In [None]:
park

In [None]:
theater = full_df[full_df["categories"] == "Theater"]
theater

In [None]:
mapping_func_with_cat(40.707355539749734, -74.01073759252262, full_df, "Theater")

In [None]:
park.shape

In [8]:
def find_q(area_name, latitude, longitude, search):
    url = f"https://api.foursquare.com/v2/venues/explore?client_id=N40W0THAJDZYKLHLHWRDBU01LIMNXBMXZ03X5ZOGZSRVMLSR&client_secret=PRO2XOOK3SLUG52RR0LM0AZX5RLRSAHIQ2NJWSLXMRVDBEGJ&ll={latitude},{longitude}&query={search}&v=20180604&radius=30000&limit=100"
    results = requests.get(url).json()
    dataframe = pd.json_normalize(results["response"]["groups"][0]["items"])
    filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
    dataframe_filtered = dataframe.loc[:, filtered_columns]
    dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1) # category for each row
    dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns] # clean columns
    
    dataframe_filtered.to_pickle(f"{area_name}.pkl") # save to pickle so that wont call the api over and over again
    return dataframe_filtered

In [17]:
pizza = find_q("one_trade_pizza_data", 40.71227903203283, -74.01331115542504, "pizza")
pizza_2 = find_q("one_trade_pizza_data", 40.72004928407483, -74.00111045221124, "pizza")

In [21]:
pizzas = pd.concat([pizza, pizza_2])
pizzas.shape

(200, 16)

In [23]:
pizzas.drop_duplicates(subset=["name"], inplace=True)
pizzas.to_csv("pizza_nyc.csv")


In [25]:
pizza.shape

(100, 16)

In [24]:
mapping_func_with_cat(40.71227903203283, -74.01331115542504, pizzas, "Pizza Place")

In [26]:
restaurant = find_q("restaurants", 40.71227903203283, -74.01331115542504, "restaurant")
restaurant

Unnamed: 0,name,categories,address,crossStreet,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,neighborhood,id
0,Hudson Eats,Food Court,225 Liberty St,at S End Ave,40.712666,-74.015901,"[{'label': 'display', 'lat': 40.71266597148143...",222,10281,US,New York,NY,United States,"[225 Liberty St (at S End Ave), New York, NY 1...",,5362a2ae498e3b18c22334be
1,Los Tacos No. 1,Taco Place,136 Church St,,40.714267,-74.008756,"[{'label': 'display', 'lat': 40.714267, 'lng':...",443,10007,US,New York,NY,United States,"[136 Church St, New York, NY 10007, United Sta...",,5d5f24ec09484500079aee00
2,Crown Shy,Restaurant,70 Pine St,,40.706187,-74.007490,"[{'label': 'display', 'lat': 40.70618693053086...",837,10005,US,New York,NY,United States,"[70 Pine St, New York, NY 10005, United States]",,5c883f65f4b525002c0bf2ca
3,Takahachi Bakery,Bakery,25 Murray St,at Church St,40.713653,-74.008804,"[{'label': 'display', 'lat': 40.71365284530189...",409,10007,US,New York,NY,United States,"[25 Murray St (at Church St), New York, NY 100...",,4c154c9a77cea593c401d260
4,Pisillo Italian Panini,Sandwich Place,97 Nassau St,at Ann St,40.710530,-74.007526,"[{'label': 'display', 'lat': 40.71053008446078...",525,10038,US,New York,NY,United States,"[97 Nassau St (at Ann St), New York, NY 10038,...",,528bf16711d2b7722da6b51c
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Thai Villa,Thai Restaurant,5 E 19th St,btwn 5th Ave & Broadway,40.739118,-73.990579,"[{'label': 'display', 'lat': 40.73911832471403...",3550,10003,US,New York,NY,United States,"[5 E 19th St (btwn 5th Ave & Broadway), New Yo...",,57e83df3498eebbe238cb36f
96,Rucola,Italian Restaurant,190 Dean St,at Bond St,40.685659,-73.985769,"[{'label': 'display', 'lat': 40.68565940643840...",3766,11217,US,Brooklyn,NY,United States,"[190 Dean St (at Bond St), Brooklyn, NY 11217,...",,4d9f5a9efc4f721e7e5a9d5f
97,Court Street Grocers Hero Shop,Sandwich Place,116 Sullivan St,between Van Brunt & Conover St,40.678517,-74.013193,"[{'label': 'display', 'lat': 40.67851709176271...",3758,11231,US,Brooklyn,NY,United States,[116 Sullivan St (between Van Brunt & Conover ...,,51e5766c454ace0791feee38
98,Frankies 457 Spuntino,Italian Restaurant,457 Court St,btwn 4th Pl & Luquer St,40.677360,-73.998047,"[{'label': 'display', 'lat': 40.67735978455962...",4095,11231,US,Brooklyn,NY,United States,"[457 Court St (btwn 4th Pl & Luquer St), Brook...",,41abb800f964a520561e1fe3


In [29]:
mapping_func_with_cat(40.71227903203283, -74.01331115542504, restaurant, "categories")