<a href="https://colab.research.google.com/github/nmack41/NickMackowskicom/blob/main/Google_Maps_API_Part_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import libraries
import json
import requests
import time
!pip install googlemaps
import googlemaps
import pandas as pd
import os
import re
from datetime import datetime
from google.colab import files
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import pylab 

Collecting googlemaps
  Downloading googlemaps-4.6.0.tar.gz (31 kB)
Building wheels for collected packages: googlemaps
  Building wheel for googlemaps (setup.py) ... [?25l[?25hdone
  Created wheel for googlemaps: filename=googlemaps-4.6.0-py3-none-any.whl size=38554 sha256=bcc3ed8d15d8cf0b490e9c99284e4007aca56333a211993703b87b8b23981d88
  Stored in directory: /root/.cache/pip/wheels/80/db/c0/6d958585fa97b20e250bf437acf7e6e715b4809c2dd4e55367
Successfully built googlemaps
Installing collected packages: googlemaps
Successfully installed googlemaps-4.6.0


In [None]:
# We will get the distance between two coordinates using the Haversine formula which will allow to calculate distance (https://en.wikipedia.org/wiki/Haversine_formula). 
# The Haversine formula to calculate the distance, which uses a line from point to point.

!pip install haversine
from haversine import haversine, Unit

Collecting haversine
  Downloading haversine-2.5.1-py2.py3-none-any.whl (6.1 kB)
Installing collected packages: haversine
Successfully installed haversine-2.5.1


In [None]:
# googleapi.txt holds my API key
# Open txt and assign API key to API_KEY

with open('googleapi.txt') as f:
  for line in f:
    API_KEY = line

In [None]:
# get origin street address
ad = "1600 Pennsylvania Avenue NW, Washington, DC 20500"

In [None]:
# get radius to search
rad = 10

In [None]:
#get search term 
search_string = "fast food"

In [None]:
map_client = googlemaps.Client(API_KEY)

geocode = map_client.geocode(address=ad)
(lat, lng) = map(geocode[0]['geometry']['location'].get, ('lat', 'lng'))
origin = (lat, lng)

In [None]:
origin

(38.8976633, -77.0365739)

In [None]:
# Google 'Nearby Search' documentation: https://developers.google.com/maps/documentation/places/web-service/search-nearby?hl=en_US#maps_http_places_nearbysearch-py
# googlemaps library documentation: https://googlemaps.github.io/google-maps-services-python/docs/
# Google Places Nearby API is only able to get 20 total items at a time. Next_page_token is able pull an additional 20 twice
# Foundation of this code is from https://learndataanalysis.org/source-code-search-nearby-businesses-with-google-maps-api-and-python/

#The API uses meters, so we need to convert miles to meters
def miles_to_meters(miles):
    try:
        return miles * 1_609.344
    except:
        return 0
        
# Connet to Google Maps API
map_client = googlemaps.Client(API_KEY)


# Convert origin address to latitutde and longitude
geocode = map_client.geocode(address=ad)
(lat, lng) = map(geocode[0]['geometry']['location'].get, ('lat', 'lng'))
origin = (lat, lng)


# Convert input radius into meters
distance = miles_to_meters(rad)


# Create empty list so we can add API output to it in the next step
business_list = []


# Run Places_Nearby using origin, search terms, and radius provided earlier
response = map_client.places_nearby(
    location=(lat, lng),
    keyword=search_string,
    radius=distance,
)  


# Add results from Places_Nearby API into the empty list we creaetd, business_list
business_list.extend(response.get('results'))


# next_page_token: token for retrieving the next page of results
# Original API pull is only able to get 20 results, this will aim to get the remaining 60, which is the max allowed by the Google API
next_page_token = response.get('next_page_token')


# This 'while' loop gets the 20 + 20 more results and stops functioning upon the API not allowing additional pulls
while next_page_token:
    # Need to pause the script for token to generate otherwise may not work
    time.sleep(2)
    response = map_client.places_nearby(
        location=(lat, lng),
        keyword=search_string,
        radius=distance,
        page_token=next_page_token
    )   
    business_list.extend(response.get('results'))
    next_page_token = response.get('next_page_token')

In [None]:
# Create dataframe from list of search items
df = pd.DataFrame(business_list)

In [None]:
# Function to extract latitude and longitude
def coord(dictionary):
    for key, value in dictionary.items():
        return value['lat'],value['lng']

In [None]:
# Apply the function created previously 'coord' to the 'geometry' column to extract nested IF statement
df['coord'] = df['geometry'].apply(coord)

In [None]:
# Calculate distance of fast food restaurant from origin location
df['distance_origin'] = df['coord'].apply(lambda x: haversine(origin,x, unit = 'mi'))

In [None]:
#Let's see what type of restuarnts we're looking at
df['types'].explode().unique()

array(['restaurant', 'food', 'point_of_interest', 'establishment', 'cafe',
       'store', 'meal_takeaway'], dtype=object)

In [None]:
# Let's add those types of 'fast food' and make them columns in our dataframe
df = df.join(pd.DataFrame(columns=['restaurant', 'food', 'point_of_interest', 'establishment', 'cafe', 'store', 'meal_takeaway']))

In [None]:
# We'll make it '1' if they are that type and '0' if they are not that type
df['restaurant'] = df['types'].apply(lambda x: 1 if 'restaurant' in x else 0)
df['food'] = df['types'].apply(lambda x: 1 if 'food' in x else 0)
df['point_of_interest'] = df['types'].apply(lambda x: 1 if 'point_of_interest' in x else 0)
df['establishment'] = df['types'].apply(lambda x: 1 if 'establishment' in x else 0)
df['cafe'] = df['types'].apply(lambda x: 1 if 'cafe' in x else 0)
df['store'] = df['types'].apply(lambda x: 1 if 'store' in x else 0)
df['meal_takeaway'] = df['types'].apply(lambda x: 1 if 'meal_takeaway' in x else 0)

In [None]:
# Take a look at the top of the dataframe
df.head()

Unnamed: 0,business_status,geometry,icon,icon_background_color,icon_mask_base_uri,name,opening_hours,photos,place_id,plus_code,...,vicinity,coord,distance_origin,restaurant,food,point_of_interest,establishment,cafe,store,meal_takeaway
0,OPERATIONAL,"{'location': {'lat': 38.8545488, 'lng': -77.04...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Chick-fil-A,{'open_now': True},"[{'height': 500, 'html_attributions': ['<a hre...",ChIJA5feLi-3t4kRmoQG_66sN0Y,"{'compound_code': 'VX32+R4 Arlington, Virginia...",...,"2200 Crystal Dr Ste G, Arlington","(38.8545488, -77.04974519999999)",3.062018,1,1,1,1,0,0,0
1,OPERATIONAL,"{'location': {'lat': 38.8326619, 'lng': -77.20...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Wendy's,{'open_now': True},"[{'height': 720, 'html_attributions': ['<a hre...",ChIJcT2yV8VMtokR_lC02LiSXpc,"{'compound_code': 'RQMV+3H Annandale, Virginia...",...,"7530 Little River Turnpike, Annandale","(38.8326619, -77.2060243)",10.162344,1,1,1,1,0,0,0
2,OPERATIONAL,"{'location': {'lat': 38.7735678, 'lng': -77.18...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Chick-fil-A,{'open_now': True},"[{'height': 500, 'html_attributions': ['<a hre...",ChIJWfhCVVett4kRIhKFzfi88wc,"{'compound_code': 'QRF8+CQ Springfield, Virgin...",...,"6681A Backlick Rd, Springfield","(38.7735678, -77.1831675)",11.651774,1,1,1,1,0,0,0
3,OPERATIONAL,"{'location': {'lat': 38.8680938, 'lng': -77.15...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Wendy's,{'open_now': True},"[{'height': 720, 'html_attributions': ['<a hre...",ChIJA2j92Ii0t4kRT4r3YbB3V34,"{'compound_code': 'VR9X+76 Falls Church, Virgi...",...,"6349 Seven Corners, Falls Church","(38.8680938, -77.15192789999999)",6.531983,1,1,1,1,0,0,0
4,OPERATIONAL,"{'location': {'lat': 38.8789934, 'lng': -77.11...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Chick-Fil-A,{'open_now': False},"[{'height': 1908, 'html_attributions': ['<a hr...",ChIJMyRwx4i1t4kRLZ1oN3Kdt7U,"{'compound_code': 'VVHQ+HG Arlington, Virginia...",...,"671 N Glebe Rd Ste 1270, Arlington","(38.8789934, -77.11115459999999)",4.213303,1,1,1,1,0,0,0


# Getting a quick overview of the data

In [None]:
# Is there any missing data?
df_na = df.isnull().sum()
df_na

# No there isn't

business_status          0
geometry                 0
icon                     0
icon_background_color    0
icon_mask_base_uri       0
name                     0
opening_hours            0
photos                   0
place_id                 0
plus_code                0
price_level              0
rating                   0
reference                0
scope                    0
types                    0
user_ratings_total       0
vicinity                 0
coord                    0
distance_origin          0
restaurant               0
food                     0
point_of_interest        0
establishment            0
cafe                     0
store                    0
meal_takeaway            0
dtype: int64

In [None]:
#Let's get an idea of of the descriptive stats of the numberic variables
df.describe()

Unnamed: 0,price_level,rating,user_ratings_total,distance_origin,restaurant,food,point_of_interest,establishment,cafe,store,meal_takeaway
count,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0
mean,1.033333,3.881667,1042.85,5.60353,1.0,1.0,1.0,1.0,0.466667,0.466667,0.066667
std,0.18102,0.287886,577.513572,3.549077,0.0,0.0,0.0,0.0,0.503098,0.503098,0.251549
min,1.0,3.2,159.0,0.208385,1.0,1.0,1.0,1.0,0.0,0.0,0.0
25%,1.0,3.7,583.0,2.631547,1.0,1.0,1.0,1.0,0.0,0.0,0.0
50%,1.0,3.8,908.0,5.539805,1.0,1.0,1.0,1.0,0.0,0.0,0.0
75%,1.0,4.125,1349.0,8.41679,1.0,1.0,1.0,1.0,1.0,1.0,0.0
max,2.0,4.4,3177.0,11.906898,1.0,1.0,1.0,1.0,1.0,1.0,1.0


- We see that the average 'Price Level' is 1 out of 5, max is a 2. So we have plenty of cheap fast food around
- The average rating is 3.9 with a range of 3.2 to 4.4
- The average amount of user ratings is 1017 with a range of 159 to 3175
- I do not believe we can draw many conclusions from the other data

# Cleaning Data

In [None]:
df.head()

Unnamed: 0,business_status,geometry,icon,icon_background_color,icon_mask_base_uri,name,opening_hours,photos,place_id,plus_code,...,vicinity,coord,distance_origin,restaurant,food,point_of_interest,establishment,cafe,store,meal_takeaway
0,OPERATIONAL,"{'location': {'lat': 38.8545488, 'lng': -77.04...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Chick-fil-A,{'open_now': True},"[{'height': 500, 'html_attributions': ['<a hre...",ChIJA5feLi-3t4kRmoQG_66sN0Y,"{'compound_code': 'VX32+R4 Arlington, Virginia...",...,"2200 Crystal Dr Ste G, Arlington","(38.8545488, -77.04974519999999)",3.062018,1,1,1,1,0,0,0
1,OPERATIONAL,"{'location': {'lat': 38.8326619, 'lng': -77.20...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Wendy's,{'open_now': True},"[{'height': 720, 'html_attributions': ['<a hre...",ChIJcT2yV8VMtokR_lC02LiSXpc,"{'compound_code': 'RQMV+3H Annandale, Virginia...",...,"7530 Little River Turnpike, Annandale","(38.8326619, -77.2060243)",10.162344,1,1,1,1,0,0,0
2,OPERATIONAL,"{'location': {'lat': 38.7735678, 'lng': -77.18...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Chick-fil-A,{'open_now': True},"[{'height': 500, 'html_attributions': ['<a hre...",ChIJWfhCVVett4kRIhKFzfi88wc,"{'compound_code': 'QRF8+CQ Springfield, Virgin...",...,"6681A Backlick Rd, Springfield","(38.7735678, -77.1831675)",11.651774,1,1,1,1,0,0,0
3,OPERATIONAL,"{'location': {'lat': 38.8680938, 'lng': -77.15...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Wendy's,{'open_now': True},"[{'height': 720, 'html_attributions': ['<a hre...",ChIJA2j92Ii0t4kRT4r3YbB3V34,"{'compound_code': 'VR9X+76 Falls Church, Virgi...",...,"6349 Seven Corners, Falls Church","(38.8680938, -77.15192789999999)",6.531983,1,1,1,1,0,0,0
4,OPERATIONAL,"{'location': {'lat': 38.8789934, 'lng': -77.11...",https://maps.gstatic.com/mapfiles/place_api/ic...,#FF9E67,https://maps.gstatic.com/mapfiles/place_api/ic...,Chick-Fil-A,{'open_now': False},"[{'height': 1908, 'html_attributions': ['<a hr...",ChIJMyRwx4i1t4kRLZ1oN3Kdt7U,"{'compound_code': 'VVHQ+HG Arlington, Virginia...",...,"671 N Glebe Rd Ste 1270, Arlington","(38.8789934, -77.11115459999999)",4.213303,1,1,1,1,0,0,0


In [None]:
df.columns

Index(['business_status', 'geometry', 'icon', 'icon_background_color',
       'icon_mask_base_uri', 'name', 'opening_hours', 'photos', 'place_id',
       'plus_code', 'price_level', 'rating', 'reference', 'scope', 'types',
       'user_ratings_total', 'vicinity', 'coord', 'distance_origin',
       'restaurant', 'food', 'point_of_interest', 'establishment', 'cafe',
       'store', 'meal_takeaway'],
      dtype='object')

In [None]:
# Does geometry tell us anything unique?
df['geometry'][0]

# We only wanted latitude and longitude, which we extracted. We will drop this column.

{'location': {'lat': 38.8545488, 'lng': -77.04974519999999},
 'viewport': {'northeast': {'lat': 38.85582092989273,
   'lng': -77.04822592010727},
  'southwest': {'lat': 38.85312127010728, 'lng': -77.05092557989272}}}

In [None]:
# Are any businesses closed?
df['business_status'].unique()

# It appears all businesses are operational

array(['OPERATIONAL'], dtype=object)

In [None]:
# Icon background color, icon mask base, photos, place id, plus code, reference, scope, and types are not relevant; will drop.
# Will keep name
# Will drop opening_hours / open now as it is only relevant at the time I am pulling the data
# Keeping the variables I added

df = df.drop(['business_status', 'geometry', 'icon', 'icon_background_color', 'icon_mask_base_uri', 'opening_hours', 'photos', 'place_id', 'plus_code','reference', 'scope', 'types'], axis=1)

In [None]:
df.head()

Unnamed: 0,name,price_level,rating,user_ratings_total,vicinity,coord,distance_origin,restaurant,food,point_of_interest,establishment,cafe,store,meal_takeaway
0,Wendy's,1,4.1,440,"7530 Little River Turnpike, Annandale","(38.8326619, -77.2060243)",10.162344,1,1,1,1,0,0,0
1,Chick-fil-A,1,4.4,2235,"6681A Backlick Rd, Springfield","(38.7735678, -77.1831675)",11.651774,1,1,1,1,0,0,0
2,Chick-Fil-A,1,4.4,478,"671 N Glebe Rd Ste 1270, Arlington","(38.8789934, -77.11115459999999)",4.213303,1,1,1,1,0,0,0
3,Wendy's,1,3.8,743,"229 S Van Dorn St, Alexandria","(38.8106487, -77.1326067)",7.927502,1,1,1,1,0,0,0
4,Chick-fil-A,1,4.4,1287,"2200 Crystal Dr Ste G, Arlington","(38.8545488, -77.04974519999999)",3.062018,1,1,1,1,0,0,0


In [None]:
# Prep to download data as .csv

today = datetime.now()
fileext = today.strftime('%m_%d_%Y') + '_' + search_string + '_from_' + str(rad) + '_PROJECT' + '.csv'
fileext = fileext.replace(" ", "_")

filecsv = fileext.replace("\\","/")

# Download data as .csv

df.to_csv(filecsv) 
files.download(filecsv)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>