In [27]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


### Importing the necessary libraries

In [28]:
# Importing all the necessary libraries we will be needing to do the Ananlysis


import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# for webscraping import Beautiful Soup 
from bs4 import BeautifulSoup

import xml

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print ("Libraries Imported.")

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Libraries Imported.


### Getting the data from URL

In [29]:
link = 'https://en.wikipedia.org/wiki/List_of_United_States_cities_by_population'
page = requests.get(link)
soup = BeautifulSoup(page.text)

### Finding the table that has the data that we need i.e. list of all cities with their population, Square Area, Location (coordinates)

In [30]:
table = soup.find_all('table')[4]

In [31]:
table_rows = table.find_all('tr')
res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)
df = pd.DataFrame(res, columns=["Rank", "City", "State", "del1", "del2", "del3", "Sq.Area", "del5", "population density in Sq Mi", "Population density in Km2", "Location"])
df.head()

Unnamed: 0,Rank,City,State,del1,del2,del3,Sq.Area,del5,population density in Sq Mi,Population density in Km2,Location
0,1,New York[d],New York,8398748,8175133,+2.74%,301.5 sq mi,780.9 km2,"28,317/sq mi","10,933/km2",40°39′49″N 73°56′19″W﻿ / ﻿40.6635°N 73.9387°W﻿...
1,2,Los Angeles,California,3990456,3792621,+5.22%,468.7 sq mi,"1,213.9 km2","8,484/sq mi","3,276/km2",34°01′10″N 118°24′39″W﻿ / ﻿34.0194°N 118.4108°...
2,3,Chicago,Illinois,2705994,2695598,+0.39%,227.3 sq mi,588.7 km2,"11,900/sq mi","4,600/km2",41°50′15″N 87°40′54″W﻿ / ﻿41.8376°N 87.6818°W﻿...
3,4,Houston[3],Texas,2325502,2100263,+10.72%,637.5 sq mi,"1,651.1 km2","3,613/sq mi","1,395/km2",29°47′12″N 95°23′27″W﻿ / ﻿29.7866°N 95.3909°W﻿...
4,5,Phoenix,Arizona,1660272,1445632,+14.85%,517.6 sq mi,"1,340.6 km2","3,120/sq mi","1,200/km2",33°34′20″N 112°05′24″W﻿ / ﻿33.5722°N 112.0901°...


### Finding the radius of each city with the help of Sq.Area, this step involves in preprocessing of the the column Sq.Area (changing its data type to float) then finding its square root

In [32]:
new= df["Sq.Area"].str.split("s", n=1, expand = True)
new = new[0].str.replace(u'\xa0',u'')
df["Sq.Area"] = new.str.replace(',','')
df["Sq.Area"] = df["Sq.Area"].astype(float)
df["Radius"] = np.sqrt(df["Sq.Area"])

### Dropping the unnecessary columns that we don't require

In [33]:
df.drop(columns = ["Rank", "del1", "del2", "del3", "del5", "Sq.Area", "population density in Sq Mi"], inplace = True)
df

Unnamed: 0,City,State,Population density in Km2,Location,Radius
0,New York[d],New York,"10,933/km2",40°39′49″N 73°56′19″W﻿ / ﻿40.6635°N 73.9387°W﻿...,17.363755
1,Los Angeles,California,"3,276/km2",34°01′10″N 118°24′39″W﻿ / ﻿34.0194°N 118.4108°...,21.64948
2,Chicago,Illinois,"4,600/km2",41°50′15″N 87°40′54″W﻿ / ﻿41.8376°N 87.6818°W﻿...,15.076472
3,Houston[3],Texas,"1,395/km2",29°47′12″N 95°23′27″W﻿ / ﻿29.7866°N 95.3909°W﻿...,25.248762
4,Phoenix,Arizona,"1,200/km2",33°34′20″N 112°05′24″W﻿ / ﻿33.5722°N 112.0901°...,22.750824
5,Philadelphia[e],Pennsylvania,"4,511/km2",40°00′34″N 75°08′00″W﻿ / ﻿40.0094°N 75.1333°W﻿...,11.584472
6,San Antonio,Texas,"1,250/km2",29°28′21″N 98°31′30″W﻿ / ﻿29.4724°N 98.5251°W﻿...,21.470911
7,San Diego,California,"1,670/km2",32°48′55″N 117°08′06″W﻿ / ﻿32.8153°N 117.1350°...,18.033303
8,Dallas,Texas,"1,493/km2",32°47′36″N 96°45′59″W﻿ / ﻿32.7933°N 96.7665°W﻿...,18.463477
9,San Jose,California,"2,231/km2",37°17′48″N 121°49′08″W﻿ / ﻿37.2967°N 121.8189°...,13.322913


### Splitting the cooridnates to Latitudes and Longitudes for each city

In [34]:
df["Location"]= df["Location"].str.split("/", n = 2, expand = True)[1]
df.head()

Unnamed: 0,City,State,Population density in Km2,Location,Radius
0,New York[d],New York,"10,933/km2",﻿40.6635°N 73.9387°W﻿,17.363755
1,Los Angeles,California,"3,276/km2",﻿34.0194°N 118.4108°W﻿,21.64948
2,Chicago,Illinois,"4,600/km2",﻿41.8376°N 87.6818°W﻿,15.076472
3,Houston[3],Texas,"1,395/km2",﻿29.7866°N 95.3909°W﻿,25.248762
4,Phoenix,Arizona,"1,200/km2",﻿33.5722°N 112.0901°W﻿,22.750824


In [35]:
new = df["Location"].str.split(" ", n = 0, expand = False)
k = df.copy(deep = True)

In [36]:
Latitude = []
Longitude = []
for i in range(len(new)):
    Latitude.append(new[i][1][:-2])
    Longitude.append(new[i][2][:-3]) 

k["Latitude"] = Latitude
k["Longitude"] = Longitude
k["Latitude"] = k["Latitude"].str.replace(u'\ufeff',u'')
k.drop(columns = ["Location"], inplace = True)
k.head()
df = k.copy(deep = True)

In [37]:
df['Longitude'] = -df['Longitude'].astype(float)
df['Latitude'] = df['Latitude'].astype(float)
df['Radius'] = df['Radius']* 1000
df.head()

Unnamed: 0,City,State,Population density in Km2,Radius,Latitude,Longitude
0,New York[d],New York,"10,933/km2",17363.755354,40.6635,-73.9387
1,Los Angeles,California,"3,276/km2",21649.480363,34.0194,-118.4108
2,Chicago,Illinois,"4,600/km2",15076.471736,41.8376,-87.6818
3,Houston[3],Texas,"1,395/km2",25248.762346,29.7866,-95.3909
4,Phoenix,Arizona,"1,200/km2",22750.824161,33.5722,-112.0901


### Now getting the data from per capita income state wise for USA

In [38]:
link1 = 'https://en.wikipedia.org/wiki/List_of_United_States_counties_by_per_capita_income'
page1 = requests.get(link1)
soup1 = BeautifulSoup(page1.text)

In [47]:
table = soup1.find_all('table')[2]

In [48]:
table_rows = table.find_all('tr')
res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)
df_state = pd.DataFrame(res, columns=["Rank", "Country-equivalent", "State", "Per capita income", "del2", "del3", "Population", "del5"])
df_state.head()

Unnamed: 0,Rank,Country-equivalent,State,Per capita income,del2,del3,Population,del5
0,1,New York County,New York,"$62,498","$69,659","$84,627",1605272,736192
1,2,Arlington,Virginia,"$62,018","$103,208","$139,244",214861,94454
2,3,Falls Church City,Virginia,"$59,088","$120,000","$152,857",12731,5020
3,4,Marin,California,"$56,791","$90,839","$117,357",254643,102912
4,5,Alexandria City,Virginia,"$54,608","$85,706","$107,511",143684,65369


### Dropping the unnecessary columns from the table of per capita income of US as per state wise

In [49]:
df_state.drop(columns = ['Rank', 'del2', 'del3', 'del5'], axis = 1, inplace = True)

In [51]:
df_state.head()

Unnamed: 0,Country-equivalent,State,Per capita income,Population
0,New York County,New York,"$62,498",1605272
1,Arlington,Virginia,"$62,018",214861
2,Falls Church City,Virginia,"$59,088",12731
3,Marin,California,"$56,791",254643
4,Alexandria City,Virginia,"$54,608",143684


### Now map will be created of US cities with the help of latitudes and longitudes of each cities using folium

In [142]:
map_tohood = folium.Map(location=[37.0902,-95.7129], zoom_start=3)

# add markers to map
for lat, lng, state, city in zip(df['Latitude'], df['Longitude'], df['State'], df['City']):
    label = '{}, {}'.format(city, state)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_tohood)  
    
map_tohood

In [53]:
CLIENT_ID = 'GLOAB03UNGTF2EFBBJ2GDPJAYN3OQCHY5U01IGXZM13WRQLS' # your Foursquare ID
CLIENT_SECRET = 'MBQ4UF0LQFGQFM12M4CDZU41O5YYVLILRFVZOFTVNIYBWTNQ' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 20
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GLOAB03UNGTF2EFBBJ2GDPJAYN3OQCHY5U01IGXZM13WRQLS
CLIENT_SECRET:MBQ4UF0LQFGQFM12M4CDZU41O5YYVLILRFVZOFTVNIYBWTNQ


In [54]:
def getNearbyVenues(names, latitudes, longitudes, radius):
    
    venues_list=[]
    for name, lat, lng,radius in zip(names, latitudes, longitudes,radius):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
       # print(results)
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [57]:
df_venues = getNearbyVenues(names = df['City'], latitudes = df['Latitude'],longitudes = df['Longitude'], radius = df['Radius'])
df_venues.head()

Unnamed: 0,City,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,New York[d],40.6635,-73.9387,Ix,40.660844,-73.961344,Café
1,New York[d],40.6635,-73.9387,Brooklyn Botanic Garden,40.667622,-73.963191,Botanical Garden
2,New York[d],40.6635,-73.9387,Covenhoven,40.675143,-73.960203,Beer Bar
3,New York[d],40.6635,-73.9387,Prospect Park Boathouse & Audubon Center,40.660884,-73.964949,Building
4,New York[d],40.6635,-73.9387,Brooklyn Museum,40.671521,-73.963677,Art Museum


###  Weights is to be assigned to the categories of venues that my boss is intrested in setting up of fast food outlet

In [105]:
k = df_venues.copy(deep = True)
weights_dict={'Movie Theater':3.0,'Beach':3.0,'Concert Hall':2.5,'Playground':3.0,'Coffee Shop':3.5,'Arcadia':4.0,'Nightclub':4.0,'Toy / Game Store':4.5,'Theme Park Ride / Attraction':4.0,'Pub':4.0}
data = df_venues['Venue Category']

In [106]:
weights = []
for i in data:
    if i in weights_dict.keys():
        weights.append(weights_dict[i])
    else :
        weights.append(0.0)
df_venues['weights'] = weights;
df_venues.head()

Unnamed: 0,City,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,weights
0,New York[d],40.6635,-73.9387,Ix,40.660844,-73.961344,Café,0.0
1,New York[d],40.6635,-73.9387,Brooklyn Botanic Garden,40.667622,-73.963191,Botanical Garden,0.0
2,New York[d],40.6635,-73.9387,Covenhoven,40.675143,-73.960203,Beer Bar,0.0
3,New York[d],40.6635,-73.9387,Prospect Park Boathouse & Audubon Center,40.660884,-73.964949,Building,0.0
4,New York[d],40.6635,-73.9387,Brooklyn Museum,40.671521,-73.963677,Art Museum,0.0


### Dropping the rows that consist of weight less than 1 i.e. 0

In [108]:
df_venues.weights=df_venues.weights.astype(float)
df_venues.drop(df_venues[df_venues.weights < 1.0].index, inplace=True)
df_venues.head()

Unnamed: 0,City,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,weights
23,Los Angeles,34.0194,-118.4108,Blue Bottle Coffee,34.027115,-118.387637,Coffee Shop,3.5
48,Chicago,41.8376,-87.6818,Cafe Jumping Bean,41.857846,-87.662747,Coffee Shop,3.5
76,Houston[3],29.7866,-95.3909,White Oak Music Hall,29.785994,-95.367173,Concert Hall,2.5
78,Houston[3],29.7866,-95.3909,Boomtown Coffee,29.802849,-95.400855,Coffee Shop,3.5
86,Phoenix,33.5722,-112.0901,Starbucks,33.581418,-112.127408,Coffee Shop,3.5


### Relevants columns like city and weights to group all the venues by city and calculating the means for each city

In [109]:
citywise_venues_weights = df_venues[['City','weights']].copy()
citywise_venues_weights_means = citywise_venues_weights.groupby(['City']).mean()
citywise_venues_weights_means = citywise_venues_weights_means.reset_index(drop=False)
citywise_venues_weights_means.head()

Unnamed: 0,City,weights
0,Abilene,3.5
1,Akron,3.5
2,Albuquerque,3.0
3,Alexandria[m],3.5
4,Allen,2.5


### Merging the table for which we calculated the means of weights city wise to the actual table that we got from the wiki page

In [117]:
city_selection = pd.merge(df, citywise_venues_weights_means, on='City')
city_selection = city_selection[['City','Population density in Km2','weights']].copy()
city_selection.head()

Unnamed: 0,City,Population density in Km2,weights
0,Los Angeles,"3,276/km2",3.5
1,Chicago,"4,600/km2",3.5
2,Houston[3],"1,395/km2",3.0
3,Phoenix,"1,200/km2",3.5
4,Philadelphia[e],"4,511/km2",3.5


In [118]:
# Preprocessing the population density in Km2 column as we have to normalize these values
k = city_selection.copy(deep = True)
k['Population density in Km2'] = k['Population density in Km2'].str.split("/", n = 0, expand = True)
k['Population density in Km2'] = k['Population density in Km2'].str.replace(',','')
k['Population density in Km2'] = k['Population density in Km2'].astype(float)
city_selection = k.copy(deep = True)
city_selection.head()

Unnamed: 0,City,Population density in Km2,weights
0,Los Angeles,3276.0,3.5
1,Chicago,4600.0,3.5
2,Houston[3],1395.0,3.0
3,Phoenix,1200.0,3.5
4,Philadelphia[e],4511.0,3.5


In [119]:
# Normalizing the data frame
from sklearn import preprocessing
column_names_to_normalize = ['Population density in Km2', 'weights']
x = city_selection[column_names_to_normalize].values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
city_selection[column_names_to_normalize] = pd.DataFrame(x_scaled)
city_selection.head()

Unnamed: 0,City,Population density in Km2,weights
0,Los Angeles,0.470174,0.5
1,Chicago,0.664224,0.5
2,Houston[3],0.194489,0.25
3,Phoenix,0.165909,0.5
4,Philadelphia[e],0.65118,0.5


### Calculating the sum of normalized columns to determine the city that has maximum sum and conclude that one locality in that city would be the best fit

In [120]:
city_selection['sum'] = city_selection['Population density in Km2'] + city_selection['weights']
row_num = city_selection['sum'].argmax()
city_name = city_selection['City'].iloc[row_num]
city_name

'Jersey City'

In [121]:
# Finding the state in which that city belongs
row = df.loc[df['City']== city_name].index[0]
state_name = df['State'].iloc[row]
state_name

'New Jersey'

### To calculate the per capita income of New Jersey

### In order to set up an outlet in a city, boss suggested that minimum per capita income of should be 50,000 Dollars

In [123]:
p_row = df_state.loc[df_state['State'] == state_name].index[0]
per_capital_income = df_state['Per capita income'].iloc[p_row]
print("Per capita income of New Jercy is :", per_capital_income)

Per capita income of New Jercy is : $50,349


## As per the above inputs and outputs, we can certainly conclude that best city for setting up the fast food outlet would be "Jersey City" and also its per capita income is 50,349 USD

### Now we have to check the best location in jersey city in order to set up our fast food outlet

In [124]:
# Getting coordinates of New Jersey
lat_newJercy = df['Latitude'].iloc[row]
long_newJercy = df['Longitude'].iloc[row]
print(lat_newJercy, long_newJercy)

40.7114 -74.0648


### Now extracting all venues in Jersey City

In [125]:
def getNearbyVenues1(name, latitudes, longitudes, radius):
    
    LIMIT = 150       
        # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            latitudes, 
            longitudes, 
            radius, 
            LIMIT)
            
        # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
   # print(results)
    venues_list=[]
    venues_list.append([(name,lat,lng,v['venue']['name'],v['venue']['location']['lat'],v['venue']['location']['lng'],v['venue']['categories'][0]['name'])for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 'Latitude', 'Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude','Venue Category']
    return(nearby_venues)


new_jersey_venues = getNearbyVenues1(name = 'Jersey City', latitudes = lat_newJercy ,longitudes = long_newJercy, radius = 2500)
new_jersey_venues.head()

Unnamed: 0,City,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Jersey City,38.3539,-121.9728,The Grind Shop,40.71167,-74.062872,Coffee Shop
1,Jersey City,38.3539,-121.9728,Harry’s Daughter,40.710904,-74.062071,Caribbean Restaurant
2,Jersey City,38.3539,-121.9728,Corgi Spirits at The Jersey City Distillery,40.708304,-74.064803,Distillery
3,Jersey City,38.3539,-121.9728,Mordis Sandwich Shop,40.710412,-74.061095,Sandwich Place
4,Jersey City,38.3539,-121.9728,Hooked JC,40.714709,-74.067009,Fish & Chips Shop


In [126]:
venues_in_newjersey = new_jersey_venues.copy(deep = True)
venues_in_newjersey.shape

(100, 7)

### As we got the 100 venues in Jersey City so we are going to assign weights to each of them

In [127]:
k = new_jersey_venues.copy(deep = True)
new_weightage_dict= {'Coffee Shop' : 1, 
'Caribbean Restaurant':1,
'Distillery':2,
'Fish & Chips Shop':2,
'Science Museum':3,
'Latin American Restaurant':1,
'Restaurant':1,
'State / Provincial Park':5,
'Diner':1,
'Supermarket':4,
'Bar':2,
'Jazz Club':3,
'Golf Course':4,
'Park':5,
'Cajun / Creole Restaurant':2,
'Bakery':2,
'Go Kart Track':4,
'Taco Place':2,
'Hot Dog Joint':1,
'Food Truck':2,
'Beer Garden':3,
'Boutique':3,
'Café':3,
'Bagel Shop':4,
'Record Shop':1,
'Bakery':1,
'Pizza Place':1,
'Ramen Restaurant':1,
'Wine Bar':3,
'Middle Eastern Restaurant':1,
'French Restaurant':1,
'Theater':5,
'Lounge':4,
'Wine Shop':3,
'Cocktail Bar':2,
'New American Restaurant':3,
'Residential Building (Apartment / Condo)':5,
'Pool':5,
'Burger Joint':2,
'Cheese Shop':1,
'Coffee Shop':1,
'Bagel Shop':1,
'Vietnamese Restaurant':1,
'Portuguese Restaurant':1,
'Ice Cream Shop':4,
'Italian Restaurant':2,
'Gym':1,
'Farmers Market':4,
'Bar':3,
'Pizza Place':1,
'Bakery':2,
'Bookstore':4,
'Bar':2,
'Farmers Market':4,
'Asian Restaurant':1,
'Tea Room':3,
'Donut Shop':3,
'Historic Site':4,
'Gym / Fitness Center':1,
'Café':1,
'Mexican Restaurant':3,
'Plaza':2,
'Gay Bar':2,
'Bar':3,
'College Administrative Building':3,
'Mexican Restaurant':2,
'Bakery':3,
'American Restaurant':1,
'American Restaurant':2,
'American Restaurant':3,
'Café':1,
'New American Restaurant':1,
'Chocolate Shop':1,
'Gym':1,
'Grocery Store':1,
'Middle Eastern Restaurant':3,
'American Restaurant':2,
'Frozen Yogurt Shop':2,
'Japanese Restaurant':2,
'Bar':3,
'Liquor Store':3,
'Ice Cream Shop':2,
'Fish Market':3,
'Indie Movie Theater':5,
'Grocery Store':4,
'Modern European Restaurant':2,
'American Restaurant':1,
'Poke Place':1,
'Ramen Restaurant':1,
'Diner':1,
'Brewery':1,
'Burger Joint':3,
'Burger Joint':2,
'Café':2,
'Fried Chicken Joint':2,
'Beer Garden':2,
'Gym / Fitness Center':2,
'Vietnamese Restaurant':2,
'Italian Restaurant':2,
'Pet Store':4}

### Plotting all the venues that we have got from the Four Square API

In [128]:
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium

# create map of the venues that we have using latitude and longitudes
venues_map = folium.Map(location=[lat_newJercy, long_newJercy], zoom_start=15) # generate map centred around Jersey city


# add Jersey City as a red circle mark
folium.features.CircleMarker(
    [lat_newJercy, long_newJercy],
    radius=10,
    popup='Jersey city',
    fill=True,
    color='red',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(venues_map)

<folium.features.CircleMarker at 0xdf7cb0a588>

In [141]:
for lat, lng, label in zip(venues_in_newjersey['Venue Latitude'], venues_in_newjersey['Venue Longitude'], venues_in_newjersey['Venue']):
    label=folium.Popup(label,parse_html=True)
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6,
        parse_html = False).add_to(venues_map)
venues_map

### Assigning weights to each category, same as we gave for each city

In [130]:

# Calculating new weights for our data frame as we have given weights for all categories

allVenuesinCity1 = k['Venue Category']

f_weights1 = []
for i in allVenuesinCity1:
    if i in new_weightage_dict.keys():
        f_weights1.append(new_weightage_dict[i])
    else :
        f_weights1.append(0)
k['weights'] = f_weights1;
k.head()

Unnamed: 0,City,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,weights
0,Jersey City,38.3539,-121.9728,The Grind Shop,40.71167,-74.062872,Coffee Shop,1
1,Jersey City,38.3539,-121.9728,Harry’s Daughter,40.710904,-74.062071,Caribbean Restaurant,1
2,Jersey City,38.3539,-121.9728,Corgi Spirits at The Jersey City Distillery,40.708304,-74.064803,Distillery,2
3,Jersey City,38.3539,-121.9728,Mordis Sandwich Shop,40.710412,-74.061095,Sandwich Place,0
4,Jersey City,38.3539,-121.9728,Hooked JC,40.714709,-74.067009,Fish & Chips Shop,2


In [131]:
# Dropping unnecessary columns 

newframe = k[['City','Venue Category','weights']].copy()
newframe = k.groupby(['Venue Category']).mean()
newframe.drop(columns = ["Latitude", "Longitude"], inplace = True)
newframe

Unnamed: 0_level_0,Venue Latitude,Venue Longitude,weights
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghan Restaurant,40.709155,-74.073795,0
American Restaurant,40.717295,-74.047777,1
Bagel Shop,40.72299,-74.058068,1
Bakery,40.723219,-74.058142,3
Bar,40.719338,-74.063597,3
Beer Garden,40.718166,-74.043789,2
Bookstore,40.719984,-74.043205,4
Boutique,40.717606,-74.044299,3
Brewery,40.72066,-74.040287,1
Burger Joint,40.724874,-74.048082,2


### K Means algorithm would be used to cluster the venues and calculating the weights for each cluster to decide which cluster would be the best area to set up a fast food outlet 

In [132]:
from scipy import stats
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
#Standardize
clmns = ['weights','Venue Latitude', 'Venue Longitude']
df_tr_std = stats.zscore(newframe[clmns])
#Cluster the data
kmeans = KMeans(n_clusters=3, random_state=0).fit(df_tr_std)
labels = kmeans.labels_
newframe['clusters'] = labels
#Add the column into our list
clmns.extend(['clusters'])
#Lets analyze the clusters
kframe = newframe[clmns].groupby(['Venue Category']).mean()
kframe = kframe.reset_index(drop = False)
kframe.head()

Unnamed: 0,Venue Category,weights,Venue Latitude,Venue Longitude,clusters
0,Afghan Restaurant,0,40.709155,-74.073795,2
1,American Restaurant,1,40.717295,-74.047777,1
2,Bagel Shop,1,40.72299,-74.058068,1
3,Bakery,3,40.723219,-74.058142,0
4,Bar,3,40.719338,-74.063597,0


In [136]:
#new group by clusters and add weights of each cluster 
finalWeight = kframe.groupby(['clusters']).mean()
finalWeight

Unnamed: 0_level_0,weights,Venue Latitude,Venue Longitude
clusters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,3.736842,40.721485,-74.050876
1,1.30303,40.719943,-74.048141
2,1.8,40.709072,-74.06613


In [137]:
# Final coordinates of the place where we will be setting up a fast food outlet is the one that has maximum weight for, in the above data frame
lat1 = 40.720102
long1 = -74.048121

###  As we have a location with the maximum weight, let us plot the same in the graph with a circle of radius 50M 

In [140]:
# create map of the venues that we have using latitude and longitudes
final_map = folium.Map(location=[lat1, long1], zoom_start=15) # generate map centred around Jersey city


# add prefered location in the City as a green circle mark
folium.features.CircleMarker(
    [lat1, long1],
    radius=50,
    popup='Fast food outlet can be installed within this green cirle',
    fill=True,
    color='green',
    fill_color='green',
    fill_opacity=0.6
    ).add_to(final_map)
final_map

## Conclusion

### As per the above input we have finally got a better venue in the Jersey City. Best Venue in order to set up the fast food outlet has been highlighted in green circle in above map

### The best city would be considered as Jersey City in order to set up an fast food outlet