# Yelp Scraper - Search Results

Search keyword on Yelp and scrape results.

In [332]:
import requests
import lxml.html
import pandas as pd
import re
from selenium import webdriver
from bs4 import BeautifulSoup

### Search

In [333]:
chrome_path = r"/Users/selenalu/Documents/chromedriver"
driver = webdriver.Chrome(chrome_path)

In [337]:
url = 'https://www.yelp.com'
driver.get(url)

In [338]:
search = input('Search: ')

Search: fast food


In [339]:
element = driver.find_elements_by_xpath('//input[@id="find_desc"]')[0]
element.send_keys(search);
element.submit()

### Scrape Results

Get names and URLs.

In [340]:
def names_urls(driver):
    element = driver.find_elements_by_xpath('//a[@class = " link__09f24__1kwXV link-color--inherit__09f24__3PYlA link-size--inherit__09f24__2Uj95"]')
    names = [i.text for i in element]
    urls = [i.get_attribute('href') for i in element]
    for url in list(urls):
        if 'adredir?' in url:
            script = "return document.getElementsByClassName(' container__09f24__21w3G hoverable__09f24__2nTf3 margin-t3__09f24__5bM2Z margin-b3__09f24__1DQ9x padding-t3__09f24__-R_5x padding-r3__09f24__1pBFG padding-b3__09f24__1vW6j padding-l3__09f24__1yCJf border--top__09f24__1H_WE border--right__09f24__28idl border--bottom__09f24__2FjZW border--left__09f24__33iol border-color--default__09f24__R1nRO')[i].remove();"
            script = script.replace("[i]","[{}]".format(urls.index(url)))
            driver.execute_script(script);
            del names[urls.index(url)]
            del urls[urls.index(url)]
    return [names, urls]

Get number of stars.

In [341]:
def STARS(driver):
    html = driver.page_source 
    soup = BeautifulSoup(html, "html.parser")
    element = soup.select('div[aria-label*="star rating"]')
    stars = [float(i['aria-label'].replace(' star rating','')) for i in element]
    return stars

Get categories and price range.

In [342]:
def categories_price(driver):    
    element = driver.find_elements_by_xpath('//div[@class=" priceCategory__09f24__2IbAM display--inline-block__09f24__FsgS4 border-color--default__09f24__R1nRO"]')
    categories = [i.text.replace('$','') for i in element]
    price = [re.match('\$*',i.text).group(0) if '$' in i.text else '' for i in element]
    price = [sum(map(lambda x : 1 if '$' in x else 0, i)) for i in price]
    return [categories, price]

Get number of reviews.

In [343]:
def REVIEWS(driver):
    element = driver.find_elements_by_xpath('//span[@class=" text__09f24__2tZKC reviewCount__09f24__EUXPN text-color--black-extra-light__09f24__38DtK text-align--left__09f24__3Drs0"]')
    reviews = [i.text for i in element]
    return reviews

Get results for 1-9 pages.

In [344]:
element = driver.find_elements_by_xpath('//a[@class=" link__09f24__1kwXV pagination-link-component__09f24__H0ICg link-color--inherit__09f24__3PYlA link-size--inherit__09f24__2Uj95"]')
pages = [i.get_attribute("href") for i in element]

In [345]:
def get_results(url):
    driver.get(url)
    names = names_urls(driver)[0]
    urls = names_urls(driver)[1]
    stars = STARS(driver)
    category = categories_price(driver)[0]
    price = categories_price(driver)[1]
    reviews = REVIEWS(driver)
    df = pd.DataFrame(data={'Name':names,'Category':category,'Stars':stars,'Reviews':reviews,'Price Range (number of $ signs)':price,'Url':urls})
    return df

In [346]:
num_pages = input("Number of pages: ")

Number of pages: 2


In [347]:
names = names_urls(driver)[0]
urls = names_urls(driver)[1]
stars = STARS(driver)
category = categories_price(driver)[0]
price = categories_price(driver)[1]
reviews = REVIEWS(driver)
df = pd.DataFrame(data={'Name':names,'Category':category,'Stars':stars,'Reviews':reviews,'Price Range (number of $ signs)':price,'Url':urls})
if int(num_pages)>1:
    for i in range(int(num_pages)):
        df = df.append(get_results(pages[i]))
df = df.reset_index(drop=True)

Sort results by Name, Stars, Reviews, or Price Range (number of $ signs).

In [348]:
sort = input("Sort by: ")

Sort by: Price Range (number of $ signs)


In [349]:
number = input("Number of results shown: ")

Number of results shown: 10


In [350]:
if sort == 'Name':
    df = df.sort_values(sort).reset_index(drop=True)
else:
    df = df.sort_values(sort, ascending=False).reset_index(drop=True)
df[:int(number)]

Unnamed: 0,Name,Category,Stars,Reviews,Price Range (number of $ signs),Url
0,Wingstop,Chicken Wings,2.0,254,2,https://www.yelp.com/biz/wingstop-la-puente?os...
1,Boca Del Rio,"Mexican, Fast Food",4.0,587,1,https://www.yelp.com/biz/boca-del-rio-la-puent...
2,In-N-Out Burger,"Fast Food, Burgers",4.5,387,1,https://www.yelp.com/biz/in-n-out-burger-baldw...
3,Rally’s,"American (Traditional), Fast Food, Burgers",2.0,76,1,https://www.yelp.com/biz/rallys-baldwin-park?o...
4,G&D Burgers,Burgers,4.0,57,1,https://www.yelp.com/biz/g-and-d-burgers-la-pu...
5,El Pollo Loco,"Fast Food, Mexican",2.0,109,1,https://www.yelp.com/biz/el-pollo-loco-la-puen...
6,Burger King,"Fast Food, Burgers",1.5,46,1,https://www.yelp.com/biz/burger-king-la-puente...
7,Wendy’s,"Fast Food, Burgers",2.0,105,1,https://www.yelp.com/biz/wendys-w-covina?osq=f...
8,In-N-Out Burger,"Burgers, Fast Food",3.5,172,1,https://www.yelp.com/biz/in-n-out-burger-hacie...
9,Pepe’s,Mexican,4.0,332,1,https://www.yelp.com/biz/pepes-hacienda-height...
