# Goal
Code to randomly select 5 Starbucks from any given city when searched in google maps
We'll use this to get an estimate of Starbucks prices per city

In [46]:
# Import libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
import random

In [109]:
# Parameters
## City we searched for in google maps
city = "Chicago"

In [110]:
# google maps url
url = 'https://www.google.com/maps/search/starbucks/@41.8565559,-87.7684128,11z/data=!3m1!4b1?entry=ttu&g_ep=EgoyMDI0MTIxMS4wIKXMDSoASAFQAw%3D%3D'
html = requests.get(url)
# html.text

In [111]:
# Try to isolate Starbucks addresses from text
## pattern we're looking for:
pattern = r'\[\\\"Starbucks, [^\\\"]+\\\"\]'

# Search for the pattern in the text
matches = re.findall(pattern, html.text)
matches

['[\\"Starbucks, 7788 S Cicero Ave, Burbank, IL 60459\\"]',
 '[\\"Starbucks, 55 E Jackson Blvd, Chicago, IL 60604\\"]',
 '[\\"Starbucks, 8359 W Grand Ave, River Grove, IL 60171\\"]',
 '[\\"Starbucks, 6576 N Sheridan Rd, Chicago, IL 60626\\"]',
 '[\\"Starbucks, 715 S Seeley Ave, Chicago, IL 60612\\"]',
 '[\\"Starbucks, 4558 N Kedzie Ave, Chicago, IL 60625\\"]',
 '[\\"Starbucks, 200 S Michigan Ave, Chicago, IL 60604\\"]',
 '[\\"Starbucks, 8 N Michigan Ave, Chicago, IL 60602\\"]',
 '[\\"Starbucks, 7112 W North Ave, Chicago, IL 60707\\"]',
 '[\\"Starbucks, 40 W Lake St, Chicago, IL 60601\\"]',
 '[\\"Starbucks, 11016 S Cicero Ave, Oak Lawn, IL 60453\\"]',
 '[\\"Starbucks, 9564 S Western Ave, Evergreen Park, IL 60805\\"]',
 '[\\"Starbucks, 6350 N Broadway, Chicago, IL 60660\\"]',
 '[\\"Starbucks, 2655 Mannheim Rd, Des Plaines, IL 60018\\"]',
 '[\\"Starbucks, 4365 W Irving Park Rd, Chicago, IL 60641\\"]',
 '[\\"Starbucks, 10259 S Harlem Ave, Chicago Ridge, IL 60415\\"]',
 '[\\"Starbucks, 2730

In [112]:
# clean up addresses
## get rid of \\"Starbucks before the address and the \\" after the address
cleaned_data = [item.replace(r'[\"Starbucks, ', '').replace(r'\"]', '') for item in matches]
print(cleaned_data)

['7788 S Cicero Ave, Burbank, IL 60459', '55 E Jackson Blvd, Chicago, IL 60604', '8359 W Grand Ave, River Grove, IL 60171', '6576 N Sheridan Rd, Chicago, IL 60626', '715 S Seeley Ave, Chicago, IL 60612', '4558 N Kedzie Ave, Chicago, IL 60625', '200 S Michigan Ave, Chicago, IL 60604', '8 N Michigan Ave, Chicago, IL 60602', '7112 W North Ave, Chicago, IL 60707', '40 W Lake St, Chicago, IL 60601', '11016 S Cicero Ave, Oak Lawn, IL 60453', '9564 S Western Ave, Evergreen Park, IL 60805', '6350 N Broadway, Chicago, IL 60660', '2655 Mannheim Rd, Des Plaines, IL 60018', '4365 W Irving Park Rd, Chicago, IL 60641', '10259 S Harlem Ave, Chicago Ridge, IL 60415', '2730 N Narragansett Ave, Chicago, IL 60639', '10301 S Cicero Ave, Oak Lawn, IL 60453', '555 S Dearborn St, Chicago, IL 60605', '4830 N Pulaski Rd, Chicago, IL 60630']


In [113]:
# randomly select 5 starbucks:
random_selection = random.sample(cleaned_data, 5)
print(random_selection)

['7788 S Cicero Ave, Burbank, IL 60459', '10259 S Harlem Ave, Chicago Ridge, IL 60415', '40 W Lake St, Chicago, IL 60601', '11016 S Cicero Ave, Oak Lawn, IL 60453', '715 S Seeley Ave, Chicago, IL 60612']


For the 5 randomly selected starbucks, get the price of a tall latte at that address (manually, not part of this code)

In [114]:
# write out locations
df = pd.DataFrame(cleaned_data, columns=['locations'])
# df

In [115]:
# Add a new column 'selected' to show which locations were randomly chosen
df['selected'] = df['locations'].apply(lambda x: 'yes' if x in random_selection else 'no')
# df

In [116]:
# add a column to show the city we're looking at
df['city'] = city
# df

In [117]:
# reorder data frame
column_order = ['city', 'locations', 'selected']
df = df[column_order]
# df

In [118]:
df.head()

Unnamed: 0,city,locations,selected
0,Chicago,"7788 S Cicero Ave, Burbank, IL 60459",yes
1,Chicago,"55 E Jackson Blvd, Chicago, IL 60604",no
2,Chicago,"8359 W Grand Ave, River Grove, IL 60171",no
3,Chicago,"6576 N Sheridan Rd, Chicago, IL 60626",no
4,Chicago,"715 S Seeley Ave, Chicago, IL 60612",yes


In [119]:
# write out dataframe
filename =  "../data/" + city + "_google_maps_locations.csv"
df.to_csv(filename, index = False)