# **Oakland & San Fran Breweries**
##### By Peter J. Lindner, Ph.D.

In [1]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analysis

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Make a dataframe from FourSquare of Brewery, Brewpubs, etc.

In [3]:
address = 'San Francisco'

geolocator = Nominatim(user_agent="SanFrancisco_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of San Francisco are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of San Francisco are 37.7792808, -122.4192363.


Category id for:
* Brewery is 50327c8591d4c4b30a586d5d
* Beer garden is 4bf58dd8d48988d117941735

https://developer.foursquare.com/docs/resources/categories

In [4]:
CLIENT_ID = '23CV1SKWTESEYTSKVVOS5MG5RE2X5PZ5BTYVJEDRNOJONGAH' # your Foursquare ID
CLIENT_SECRET = 'SOP0UXDMPOZEFDO2TNSEWAQRTMGMYM5ZJWGU1KSDOFE0SSPR' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
CATEGORY_ID = '50327c8591d4c4b30a586d5d'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
print('CATEGORY_ID:' + CATEGORY_ID)

Your credentails:
CLIENT_ID: 23CV1SKWTESEYTSKVVOS5MG5RE2X5PZ5BTYVJEDRNOJONGAH
CLIENT_SECRET:SOP0UXDMPOZEFDO2TNSEWAQRTMGMYM5ZJWGU1KSDOFE0SSPR
CATEGORY_ID:50327c8591d4c4b30a586d5d


In [5]:
LIMIT = 1000 # limit of number of venues returned by Foursquare API
radius = 8000 # define radius (8000meters ~ 5 miles)
 # create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT,
    CATEGORY_ID
)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=23CV1SKWTESEYTSKVVOS5MG5RE2X5PZ5BTYVJEDRNOJONGAH&client_secret=SOP0UXDMPOZEFDO2TNSEWAQRTMGMYM5ZJWGU1KSDOFE0SSPR&v=20180605&ll=37.7792808,-122.4192363&radius=8000&limit=1000&categoryId=50327c8591d4c4b30a586d5d'

In [6]:
results = requests.get(url).json()

In [7]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [8]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,Cellarmaker Brewing Company,Brewery,37.777116,-122.410714
1,Anchor Brewing Company,Brewery,37.763395,-122.401021
2,21st Amendment Brewery & Restaurant,Brewery,37.782346,-122.392588
3,Local Brewing Co.,Brewery,37.77655,-122.397171
4,Southern Pacific Brewing,Brewery,37.760077,-122.414008
5,Anchor Public Taps,Brewery,37.764007,-122.401256
6,Black Sands Brewery,Brewery,37.771612,-122.433775
7,Woods Cervecería,Brewery,37.76123,-122.42853
8,Standard Deviant Brewing,Brewery,37.768361,-122.419516
9,Harmonic Brewing,Brewery,37.751553,-122.39007


In [9]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

69 venues were returned by Foursquare.


In [10]:
unique_venues = nearby_venues.categories.unique()

In [11]:
print(unique_venues)

['Brewery' 'Beer Bar' 'Bar' 'Gastropub' 'Beer Garden' 'Dive Bar'
 'Pizza Place' 'Beer Store' 'BBQ Joint' 'American Restaurant'
 'Liquor Store' 'Italian Restaurant' 'Chocolate Shop' 'Burger Joint'
 'Café' 'General Travel' 'Winery']


In [12]:
Breweries = nearby_venues

In [13]:
Breweries.head()

Unnamed: 0,name,categories,lat,lng
0,Cellarmaker Brewing Company,Brewery,37.777116,-122.410714
1,Anchor Brewing Company,Brewery,37.763395,-122.401021
2,21st Amendment Brewery & Restaurant,Brewery,37.782346,-122.392588
3,Local Brewing Co.,Brewery,37.77655,-122.397171
4,Southern Pacific Brewing,Brewery,37.760077,-122.414008


### Cluster the data based on DBSCAN Clustering

In [14]:
X = Breweries.drop(['name','categories'], axis = 1).values

In [15]:
type(X)

numpy.ndarray

In [16]:
epsilon = .2
minimumSamples = 4

In [17]:
from sklearn.cluster import DBSCAN
import sklearn.utils
from sklearn.preprocessing import StandardScaler
sklearn.utils.check_random_state(1000)
X = StandardScaler().fit_transform(X)

# Compute DBSCAN
db = DBSCAN(eps=epsilon, min_samples=minimumSamples).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_+2
Breweries["Clus_DB"]=labels

realClusterNum=len(set(labels)) - (1 if -1 in labels else 0)
clusterNum = len(set(labels)) 

# A sample of clusters
Breweries[["name","categories","lat","lng","Clus_DB"]].head(5)

Unnamed: 0,name,categories,lat,lng,Clus_DB
0,Cellarmaker Brewing Company,Brewery,37.777116,-122.410714,4
1,Anchor Brewing Company,Brewery,37.763395,-122.401021,5
2,21st Amendment Brewery & Restaurant,Brewery,37.782346,-122.392588,2
3,Local Brewing Co.,Brewery,37.77655,-122.397171,2
4,Southern Pacific Brewing,Brewery,37.760077,-122.414008,1


In [18]:
set(labels)

{1, 2, 3, 4, 5}

In [19]:
num_cluster= max(set(labels))

In [20]:
# create map of Portland with Breweries using latitude and longitude values
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(num_cluster)
ys = [i + x + (i*x)**2 for i in range(num_cluster)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to map  
markers_colors = []
for lat, lng, name, cat, cluster in zip(Breweries['lat'], Breweries['lng'], Breweries['name'], Breweries['categories'], Breweries['Clus_DB']):
    #label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    label = '{}, {}'.format(name, cluster) #'{}, {}'.format(name, cat)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-2],
        fill=True,
        fill_color=rainbow[cluster-2],
        fill_opacity=0.8).add_to(map_clusters)   
    
map_clusters

In [21]:
Breweries.loc[Breweries['Clus_DB'] == 2, Breweries.columns[[0]]]

Unnamed: 0,name
2,21st Amendment Brewery & Restaurant
3,Local Brewing Co.
13,Black Hammer Brewing
61,Pacific Brewing Laboratory


In [22]:
Breweries.loc[Breweries['Clus_DB'] == 3, Breweries.columns[[0]]]

Unnamed: 0,name
16,Bartlett Hall
23,ThirstyBear Brewing Company
35,Mikkeller Bar SF
39,Hopwater Distribution
42,Topsy's Fun House
59,Bartlett Brewing Co.
60,Super Duper Burgers


In [23]:
Breweries.loc[Breweries['Clus_DB'] == 4, Breweries.columns[[0]]]

Unnamed: 0,name
0,Cellarmaker Brewing Company
30,The Beer Hall
37,Fermentation Lab
55,K-OZ Restaurant & Brewery


In [24]:
Breweries.loc[Breweries['Clus_DB'] == 5, Breweries.columns[[0]]]

Unnamed: 0,name
1,Anchor Brewing Company
5,Anchor Public Taps
41,Oda Restaurant & Brewery
52,Connecticut Yankee


###  Oakland

In [25]:
address = 'Oakland'

geolocator = Nominatim(user_agent="Oakland_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Oakland are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Oakland are 37.8044557, -122.2713563.


In [26]:
LIMIT = 1000 # limit of number of venues returned by Foursquare API
radius = 8000 # define radius (8000meters ~ 5 miles)
 # create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT,
    CATEGORY_ID
)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=23CV1SKWTESEYTSKVVOS5MG5RE2X5PZ5BTYVJEDRNOJONGAH&client_secret=SOP0UXDMPOZEFDO2TNSEWAQRTMGMYM5ZJWGU1KSDOFE0SSPR&v=20180605&ll=37.8044557,-122.2713563&radius=8000&limit=1000&categoryId=50327c8591d4c4b30a586d5d'

In [27]:
results = requests.get(url).json()

In [66]:
epsilon = .21
minimumSamples = 3

In [67]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [68]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,Woods Bar & Brewery,Brewery,37.806889,-122.270415
1,Original Pattern Brewing Company,Brewery,37.795997,-122.271575
2,Diving Dog Brewhouse,Brewery,37.807751,-122.269858
3,Almanac Beer Co. Barrel House & Taproom,Brewery,37.783144,-122.298627
4,Drake's Dealership,Brewery,37.812825,-122.266742
5,Old Kan Beer & Co.,Brewery,37.799681,-122.287634
6,Temescal Brewing,Brewery,37.830427,-122.264394
7,Oakland United Beerworks,Brewery,37.79956,-122.28793
8,Faction Brewing,Brewery,37.786619,-122.309823
9,Ghost Town Brewing,Brewery,37.814237,-122.284287


In [69]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

31 venues were returned by Foursquare.


In [70]:
Breweries = nearby_venues
X = Breweries.drop(['name','categories'], axis = 1).values

In [71]:
from sklearn.cluster import DBSCAN
import sklearn.utils
from sklearn.preprocessing import StandardScaler
sklearn.utils.check_random_state(1000)
X = StandardScaler().fit_transform(X)

# Compute DBSCAN
db = DBSCAN(eps=epsilon, min_samples=minimumSamples).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_+2
Breweries["Clus_DB"]=labels

realClusterNum=len(set(labels)) - (1 if -1 in labels else 0)
clusterNum = len(set(labels)) 

# A sample of clusters
Breweries[["name","categories","lat","lng","Clus_DB"]].head(5)

Unnamed: 0,name,categories,lat,lng,Clus_DB
0,Woods Bar & Brewery,Brewery,37.806889,-122.270415,2
1,Original Pattern Brewing Company,Brewery,37.795997,-122.271575,3
2,Diving Dog Brewhouse,Brewery,37.807751,-122.269858,2
3,Almanac Beer Co. Barrel House & Taproom,Brewery,37.783144,-122.298627,1
4,Drake's Dealership,Brewery,37.812825,-122.266742,2


In [72]:
# create map of Portland with Breweries using latitude and longitude values
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(num_cluster)
ys = [i + x + (i*x)**2 for i in range(num_cluster)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to map  
markers_colors = []
for lat, lng, name, cat, cluster in zip(Breweries['lat'], Breweries['lng'], Breweries['name'], Breweries['categories'], Breweries['Clus_DB']):
    #label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    label = '{}, {}'.format(name, cluster) #'{}, {}'.format(name, cat)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-2],
        fill=True,
        fill_color=rainbow[cluster-2],
        fill_opacity=0.8).add_to(map_clusters)   
    
map_clusters

In [73]:
Breweries.loc[Breweries['Clus_DB'] == 2, Breweries.columns[[0]]]

Unnamed: 0,name
0,Woods Bar & Brewery
2,Diving Dog Brewhouse
4,Drake's Dealership
23,Telegraph Bar and Beer Garden
27,Modern Times
28,Floodcraft Taproom
29,Crossburgers


In [74]:
Breweries.loc[Breweries['Clus_DB'] == 3, Breweries.columns[[0]]]

Unnamed: 0,name
1,Original Pattern Brewing Company
12,Independent Brewing Company
20,Federation Brewing
24,The Trappist


In [37]:
Breweries.loc[Breweries['Clus_DB'] == 4, Breweries.columns[[0]]]

Unnamed: 0,name
