In [1]:
import pandas as pd
import requests
import numpy as np

from pandas import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
%matplotlib inline

from sklearn.cluster import KMeans

import folium

from geopy.geocoders import Nominatim

*Import* **Maryland** *zip code data*

In [2]:
md_zip = pd.read_csv('MD_zip_code.csv')

*Filter data to* **Baltimore**

In [3]:
baltimore_zip = md_zip[md_zip['County'].str.contains('Baltimore')].reset_index(drop=True)

*Import* **Maryland** *coordinates*

In [4]:
md_coord = pd.read_csv('MD_lat_long.csv')

*Rename columns to match*

In [5]:
md_coord.rename(columns={'Zip': 'Zip Code'}, inplace=True)

*Merge zip codes and coordinates into one dataframe*

In [6]:
baltimore_data = pd.merge(baltimore_zip, md_coord, on=['Zip Code', 'City'], how='left').fillna(0)

*Remove duplicates from the dataframe*

In [7]:
baltimore_merged = baltimore_data.drop_duplicates(subset=['City'], keep='first')

*Reduce dataframe to include only required columns and display completed dataframe*

In [8]:
df_baltimore = baltimore_merged[['City', 'Latitude', 'Longitude']]
df_baltimore.head()

Unnamed: 0,City,Latitude,Longitude
0,Baldwin,39.508739,-76.49191
1,Boring,39.521276,-76.804699
2,Brooklandville,39.397861,-76.671742
3,Butler,39.532955,-76.743196
4,Chase,39.438964,-76.592139


*Find coordinates of* **Baltimore**

In [9]:
address = 'Baltimore'

geolocator = Nominatim(user_agent="md_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

*Map the neighborhoods that comprise* **Baltimore**

In [10]:
baltimore_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, ward in zip(df_baltimore['Latitude'], df_baltimore['Longitude'], df_baltimore['City']):
    label = '{}'.format(ward)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(baltimore_map)  
    
baltimore_map

*Foursquare credentials entered in hidden cell*

In [11]:
CLIENT_ID = 'SHZ2DPYFTNYYCZUKNXJXAKM3IXIODDW3KXQXIE2ODT5Q0QDB'
CLIENT_SECRET = 'VXHE0PTUDH3J1I3ZVH3P5BN3JVVHIR1FQ2DR4UJ4BE3KP4YW'
VERSION = '20180604'
LIMIT = 30

*Search for existing Coffee Shops in the* **Baltimore** *area*

In [12]:
search_query = 'Coffee'
radius = 500

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)

coffee_results = requests.get(url).json()

*Shape and convert* json *data into a pandas dataframe*

In [13]:
coffee_venues = coffee_results['response']['venues']

coffee_df = json_normalize(coffee_venues)

*Filter to only needed columns*

In [14]:
filtered_columns = ['name', 'categories'] + [col for col in coffee_df.columns if col.startswith('location.')] + ['id']
coffee_filtered = coffee_df.loc[:, filtered_columns]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

coffee_filtered['categories'] = coffee_filtered.apply(get_category_type, axis=1)

coffee_filtered.columns = [column.split('.')[-1] for column in coffee_filtered.columns]

*Rename columns to match existing* **Baltimore** *dataframe*

In [15]:
coffee_filtered.rename(columns={'city': 'City', 'name': 'Name', 'lat': 'Latitude', 'lng': 'Longitude'}, inplace=True)

*Merge dataframes to make one* **Baltimore** *dataframe related only to* **coffee shops**

In [16]:
coffee_data = pd.merge(coffee_filtered, baltimore_merged, on=['City', 'Latitude', 'Longitude'], how='left').fillna(0)

*Show only needed columns for the* **Baltimore Coffee Shops** *and display completed dataframe*

In [17]:
baltimore_coffee = coffee_data[['Name', 'categories', 'Latitude', 'Longitude', 'City']]
baltimore_coffee.head()

Unnamed: 0,Name,categories,Latitude,Longitude,City
0,Zeke's Coffee @ Baltimore Farmer's Market,Coffee Shop,39.293517,-76.609854,Baltimore
1,Coffee Land,Coffee Shop,39.29183,-76.615493,Baltimore
2,The Living Stage + Coffee Bar by Hotel RL,Coffee Shop,39.288774,-76.611892,Baltimore
3,Peet's Coffee and Tea,Café,39.290279,-76.614958,Baltimore
4,Saratoga Coffee Service,Wine Bar,39.292332,-76.610062,Baltimore


*Search for existing Bakeries in the* **Baltimore** *area*

In [18]:
search_query = 'Bakery'
radius = 500

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)

bakery_results = requests.get(url).json()

*Shape and convert* json *data into a pandas dataframe*

In [19]:
bakery_venues = bakery_results['response']['venues']

bakery_df = json_normalize(bakery_venues)

*Filter to only needed columns*

In [20]:
filtered_col = ['name', 'categories'] + [col for col in bakery_df.columns if col.startswith('location.')] + ['id']
bakery_filtered = bakery_df.loc[:, filtered_col]

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

bakery_filtered['categories'] = bakery_filtered.apply(get_category_type, axis=1)

bakery_filtered.columns = [column.split('.')[-1] for column in bakery_filtered.columns]

*Rename columns to match existing* **Baltimore** *dataframe*

In [21]:
bakery_filtered.rename(columns={'city': 'City', 'name': 'Name', 'lat': 'Latitude', 'lng': 'Longitude'}, inplace=True)

*Merge dataframes to make one* **Baltimore** *dataframe related only to* **bakeries**

In [22]:
bakery_data = pd.merge(bakery_filtered, baltimore_merged, on=['City', 'Latitude', 'Longitude'], how='left').fillna(0)

*Show only needed columns for the* **Baltimore Bakeries** *and display completed dataframe*

In [23]:
baltimore_bakery = bakery_data[['Name', 'categories', 'Latitude', 'Longitude', 'City']]
baltimore_bakery.head()

Unnamed: 0,Name,categories,Latitude,Longitude,City
0,Muhly's Bakery @ Lexington Market,Bakery,39.290997,-76.615985,Baltimore
1,The LB Bakery,Bakery,39.289515,-76.616263,Baltimore


*Map* **Baltimore Coffee Shops** *and* **Baltimore Bakeries** *to determine current locations*

In [24]:
baltimore_venues_map = folium.Map(location=[latitude, longitude], zoom_start=15)

for lat, lng, ward in zip(baltimore_coffee['Latitude'], baltimore_coffee['Longitude'], baltimore_coffee['City']):
    label = '{}'.format(ward)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(baltimore_venues_map)  
    
for lat, lng, ward in zip(baltimore_bakery['Latitude'], baltimore_bakery['Longitude'], baltimore_bakery['City']):
    label = '{}'.format(ward)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(baltimore_venues_map)  
    

baltimore_venues_map

*Target* **Coffee Shop** *location is in the vicinity of* **Center Plaza Park** *because there is a bakery but no adjacent coffee shop*