### Importing Libraries

In [1]:
import requests
import pandas as pd
import numpy as np

from geopy.geocoders import Nominatim
from IPython.display import Image 
from IPython.core.display import HTML 

import matplotlib.cm as cm
import matplotlib.colors as colors

import folium
from sklearn.cluster import KMeans

### Main Data Extracted from [public.opendatasoft.com](https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/table/)

In [2]:
raw = pd.read_csv('./us-zip-code-latitude-and-longitude.csv',sep=';')
raw.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
0,55795,Willow River,MN,46.317812,-92.84315,-6,1,"46.317812,-92.84315"
1,45388,Yorkshire,OH,40.328535,-84.47938,-5,1,"40.328535,-84.47938"
2,39483,Foxworth,MS,31.218509,-89.90761,-6,1,"31.218509,-89.90761"
3,31503,Waycross,GA,31.205194,-82.37534,-5,1,"31.205194,-82.37534"
4,45833,Delphos,OH,40.841409,-84.34178,-5,1,"40.841409,-84.34178"


### California Cities

In [3]:
ca = raw[raw.State=='CA'].sort_values('State')
ca.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
6,95717,Gold Run,CA,39.177026,-120.8451,-8,1,"39.177026,-120.8451"
29047,96090,Tehama,CA,40.024337,-122.12378,-8,1,"40.024337,-122.12378"
29090,96157,South Lake Tahoe,CA,38.934409,-119.976707,-8,1,"38.934409,-119.976707"
29136,93441,Los Olivos,CA,34.709973,-120.09201,-8,1,"34.709973,-120.09201"
29162,93906,Salinas,CA,36.709717,-121.63997,-8,1,"36.709717,-121.63997"


### Nevada Cities

In [4]:
nv = raw[raw.State=='NV'].sort_values('State')
nv.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
17,89409,Gabbs,NV,38.872139,-117.78194,-8,1,"38.872139,-117.78194"
27918,89833,Ruby Valley,NV,40.453542,-115.33257,-8,1,"40.453542,-115.33257"
28391,89007,Bunkerville,NV,36.766887,-114.13298,-8,1,"36.766887,-114.13298"
28448,89116,Las Vegas,NV,35.927901,-114.972061,-8,1,"35.927901,-114.972061"
28664,89721,Carson City,NV,39.167833,-119.776409,-8,1,"39.167833,-119.776409"


### Washington Cities

In [5]:
wa = raw[raw.State=='WA'].sort_values('State')
wa.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
7,98849,Riverside,WA,48.528793,-119.45276,-8,1,"48.528793,-119.45276"
28312,99210,Spokane,WA,47.653568,-117.431742,-8,1,"47.653568,-117.431742"
28405,98131,Seattle,WA,47.432251,-121.803388,-8,1,"47.432251,-121.803388"
28446,99021,Mead,WA,47.81108,-117.22306,-8,1,"47.81108,-117.22306"
28600,98857,Warden,WA,46.970666,-119.0672,-8,1,"46.970666,-119.0672"


### Florida Cities

In [6]:
fl = raw[raw.State=='FL'].sort_values('State')
fl.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
5,34611,Spring Hill,FL,28.564167,-82.416515,-5,1,"28.564167,-82.416515"
27504,34117,Naples,FL,26.1785,-81.55495,-5,1,"26.1785,-81.55495"
27466,34223,Englewood,FL,26.974858,-82.3665,-5,1,"26.974858,-82.3665"
27463,33031,Homestead,FL,25.526214,-80.49632,-5,1,"25.526214,-80.49632"
27462,32819,Orlando,FL,28.463509,-81.47514,-5,1,"28.463509,-81.47514"


### New York Cities

In [7]:
ny = raw[raw.State=='NY'].sort_values('State')
ny.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
12,13758,East Pharsalia,NY,42.583518,-75.721931,-5,1,"42.583518,-75.721931"
28908,11101,Long Island City,NY,40.745341,-73.93907,-5,1,"40.745341,-73.93907"
28907,10023,New York,NY,40.776099,-73.98285,-5,1,"40.776099,-73.98285"
28891,10292,New York,NY,40.780751,-73.977182,-5,1,"40.780751,-73.977182"
28890,14009,Arcade,NY,42.582693,-78.40069,-5,1,"42.582693,-78.40069"


### Combining Geographical Data with Foursquare Data

In [8]:
code = 'RILA0BKQSO00ZAI3R4NOBF2QY40PEFLZSDY0OTPYSJUCQIMG'
CLIENT_ID = 'IDQPORJB14MMWJSIZZ1VOF4FQOJU5TKYIDRMD3U51YU4K0HC' # your Foursquare ID
CLIENT_SECRET = '5B2GKQ3DQPO4XTA133KZX2S434ITQ4JUWZFWQDE3QTK3UWZT' # your Foursquare Secret
ACCESS_TOKEN = 'S3DM0BPCF5KZJOCH3VQT5KNN2SVLFKPUJMVD3KRJECCYX2F0' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
#https://foursquare.com/oauth2/access_token?client_id=IDQPORJB14MMWJSIZZ1VOF4FQOJU5TKYIDRMD3U51YU4K0HC&client_secret=5B2GKQ3DQPO4XTA133KZX2S434ITQ4JUWZFWQDE3QTK3UWZT&grant_type=authorization_code&redirect_uri=https://www.google.com&code=RILA0BKQSO00ZAI3R4NOBF2QY40PEFLZSDY0OTPYSJUCQIMG
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: IDQPORJB14MMWJSIZZ1VOF4FQOJU5TKYIDRMD3U51YU4K0HC
CLIENT_SECRET:5B2GKQ3DQPO4XTA133KZX2S434ITQ4JUWZFWQDE3QTK3UWZT


In [11]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
#         print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Zip', 
                  'Zip Latitude', 
                  'Zip Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Merging Zip Codes with Foursquare: California (In Progress...)

In [13]:
# ca_venues = getNearbyVenues(names=ca['Zip'],
#                                    latitudes=ca['Latitude'],
#                                    longitudes=ca['Longitude']
#                                   )
# ca_venues.head()

In [None]:
# ca_venues.to_csv('./ca_venues.csv', index=False)

In [None]:
# nv_venues = getNearbyVenues(names=nv['Zip'],
#                                    latitudes=nv['Latitude'],
#                                    longitudes=nv['Longitude']
#                                   )
# nv_venues

In [None]:
# wa_venues = getNearbyVenues(names=wa['Zip'],
#                                    latitudes=wa['Latitude'],
#                                    longitudes=wa['Longitude']
#                                   )
# wa_venues

In [None]:
# fl_venues = getNearbyVenues(names=fl['Zip'],
#                                    latitudes=fl['Latitude'],
#                                    longitudes=fl['Longitude']
#                                   )
# fl_venues

In [None]:
print('Total amount of queries per zip code :',raw[raw.State.str.contains('(?i)CA|NV|WA|FL')].City.count())