In [63]:
# Import libaries
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Step 1: Create a soup object from the home page

In [64]:
url = 'http://chubbygrub.com/'
res = requests.get(url)
res.status_code

200

In [65]:
soup = BeautifulSoup(res.content, 'lxml')

In [66]:
a = soup.find_all('a')

In [67]:
a

[<a href="http://chubbygrub.com"><img alt="" border="0" height="50" src="http://chubbygrub.com/wp-content/themes/chubbygrub-2/images/logo_chubbygrub.png" width="210"/></a>,
 <a href="http://chubbygrub.com/">Home</a>,
 <a href="http://chubbygrub.com/categories/">Categories</a>,
 <a href="http://chubbygrub.com/restaurants/">Restaurants</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/aw-restaurants">A&amp;W Restaurants</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/applebees">Applebee's</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/arbys">Arby's</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/atlanta-bread-company">Atlanta Bread Company</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/bojangles-famous-chicken-n-biscuits">Bojangle's Famous Chicken 'n Biscuits</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/buffalo-wild-wings">Buffalo Wild Wings</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/burger-king">Burger 

In [68]:
# Getting the particular entries that only have a class tag associated with them because home and the logo do not
a = soup.find_all('a', attrs = {'class': 'btn btn-lg btn-primary'})
a                                

[<a class="btn btn-lg btn-primary" href="/restaurants/aw-restaurants">A&amp;W Restaurants</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/applebees">Applebee's</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/arbys">Arby's</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/atlanta-bread-company">Atlanta Bread Company</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/bojangles-famous-chicken-n-biscuits">Bojangle's Famous Chicken 'n Biscuits</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/buffalo-wild-wings">Buffalo Wild Wings</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/burger-king">Burger King</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/captain-ds">Captain D's</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/carls-jr">Carl's Jr.</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/charleys-grilled-subs">Charley's Grilled Subs</a>,
 <a class="btn btn-lg btn-primary" href="/restaurants/chick-fil

In [69]:
# Stripping only the text from this a's
for i in a:
    print(i.text)
    print(i['href'].replace('/restaurants/',''))
    print('')


A&W Restaurants
aw-restaurants

Applebee's
applebees

Arby's
arbys

Atlanta Bread Company
atlanta-bread-company

Bojangle's Famous Chicken 'n Biscuits
bojangles-famous-chicken-n-biscuits

Buffalo Wild Wings
buffalo-wild-wings

Burger King
burger-king

Captain D's
captain-ds

Carl's Jr.
carls-jr

Charley's Grilled Subs
charleys-grilled-subs

Chick-fil-A
chick-fil-a

Chili's
chilis

Chipotle Mexican Grill
chipotle-mexican-grill

Church's
churchs

Corner Bakery Cafe
corner-bakery-cafe

Dairy Queen
dairy-queen

Denny's
dennys

El Pollo Loco
el-pollo-loco

FATZ
fatz

Fazoli's
fazolis

Five Guys Burgers and Fries
five-guys-burgers-and-fries

Golden Chick
golden-chick

Hardee's
hardees

IHOP
ihop

In-N-Out Burger
in-n-out-burger

Jack in the Box
jack-in-the-box

Jimmy Johns
jimmy-johns

Joe's Crab Shack
joes-crab-shack

KFC
kfc

McDonald's
mcdonalds

O'Charley's
ocharleys

Olive Garden
olive-garden

Outback Steakhouse
outback-steakhouse

Panda Express
panda-express

Panera Bread
panera-bread


### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and slug. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'slug': 'aw-restaurants'}, 
    {'name': "Applebee's", 'slug': 'applebees'},
    ...
]
```

In [70]:
a[2].attrs

{'href': '/restaurants/arbys', 'class': ['btn', 'btn-lg', 'btn-primary']}

In [71]:
for i in a:
    print(i['href'].replace('/restaurants/',''))
    

aw-restaurants
applebees
arbys
atlanta-bread-company
bojangles-famous-chicken-n-biscuits
buffalo-wild-wings
burger-king
captain-ds
carls-jr
charleys-grilled-subs
chick-fil-a
chilis
chipotle-mexican-grill
churchs
corner-bakery-cafe
dairy-queen
dennys
el-pollo-loco
fatz
fazolis
five-guys-burgers-and-fries
golden-chick
hardees
ihop
in-n-out-burger
jack-in-the-box
jimmy-johns
joes-crab-shack
kfc
mcdonalds
ocharleys
olive-garden
outback-steakhouse
panda-express
panera-bread
popeyes
quiznos
red-robin-gourmet-burgers
romanos-macaroni-grill
ruby-tuesday
subway
taco-bell
taco-bueno
wendys


In [72]:
restaurants = []

for i in a:
    restaurant = {} # empty dictionary
    restaurant['name'] = i.text # creating a new entry in the dictionary the
    restaurant['slug'] = i['href'].replace('/restaurants/','') # Getting the email address associated with that entry
    restaurants.append(restaurant) # appending that entry to the above list and then looping again
restaurants

[{'name': 'A&W Restaurants', 'slug': 'aw-restaurants'},
 {'name': "Applebee's", 'slug': 'applebees'},
 {'name': "Arby's", 'slug': 'arbys'},
 {'name': 'Atlanta Bread Company', 'slug': 'atlanta-bread-company'},
 {'name': "Bojangle's Famous Chicken 'n Biscuits",
  'slug': 'bojangles-famous-chicken-n-biscuits'},
 {'name': 'Buffalo Wild Wings', 'slug': 'buffalo-wild-wings'},
 {'name': 'Burger King', 'slug': 'burger-king'},
 {'name': "Captain D's", 'slug': 'captain-ds'},
 {'name': "Carl's Jr.", 'slug': 'carls-jr'},
 {'name': "Charley's Grilled Subs", 'slug': 'charleys-grilled-subs'},
 {'name': 'Chick-fil-A', 'slug': 'chick-fil-a'},
 {'name': "Chili's", 'slug': 'chilis'},
 {'name': 'Chipotle Mexican Grill', 'slug': 'chipotle-mexican-grill'},
 {'name': "Church's", 'slug': 'churchs'},
 {'name': 'Corner Bakery Cafe', 'slug': 'corner-bakery-cafe'},
 {'name': 'Dairy Queen', 'slug': 'dairy-queen'},
 {'name': "Denny's", 'slug': 'dennys'},
 {'name': 'El Pollo Loco', 'slug': 'el-pollo-loco'},
 {'name'

In [90]:
print(restaurant['name'])

Wendy's


### Step 3: Using the slug, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [94]:
foods = []
for restaurant in restaurants:
    url_2 = url + 'restaurants/' + restaurant['slug'] + '/'
    res = requests.get(url_2)
    res.status_code
    soup = BeautifulSoup(res.content, 'lxml')
    table = soup.find('table', {'id':'items'}) # the second thing here is the attribute of the table that we are searching for
    for row in table('tr')[1:]:
        food = {'calories' : row.find('td', {'itemprop':'calories'}).text,
        'carbs': row.find('td', {'itemprop':'carbohydrateContent'}).text,
        'fat' : row.find('td', {'itemprop':'fatContent'}).text,
        'name' : row.find('td').text,
        'category' : row.find('a').text,
        'restaurant' : restaurant['name']}
        foods.append(food)

In [78]:
for row in table('tr')[1:]:
    print(row.find('a').text)

Chicken
Chicken
Chicken
Chicken
Chicken
Chicken
Salads
Salads
Desserts
Sandwiches
Salads
Salads
Sides
Burgers
Salads
Salads
Sides
Salads
French Fries
Shakes
Wraps
Sandwiches
Burgers
Burgers
Burgers
Burgers
Salads
Wraps
Sandwiches
Burgers
Burgers
Burgers
Burgers
Shakes
Shakes
Shakes
Burgers
French Fries
Burgers
Shakes
Entrees
Shakes
French Fries
Shakes
Shakes
Shakes
French Fries
Shakes
Shakes
Sandwiches
Shakes
Entrees
Shakes
French Fries
Shakes
Shakes
Shakes
Burgers
Sides
Salads
Salads
Wraps
Sandwiches
Sandwiches
French Fries
French Fries
Shakes


In [79]:
for row in table('tr')[1:]:
    print(row.find('td').text)


10-piece Chicken Nuggets
10-piece Spicy Chicken Nuggets
4-piece Chicken Nuggets
4-piece Spicy Chicken Nuggets
6-piece Chicken Nuggets
6-piece Spicy Chicken Nuggets
Apple Pecan Chicken Salad
Apple Pecan Chicken Salad Half-Size
Apple Slices
Asiago Ranch Chicken Club
Asian Cashew Chicken Salad
Asian Cashew Chicken Salad, Half-Size
Bacon Cheese Potato
Baconator®
BBQ Ranch Chicken Salad
BBQ Ranch Chicken Salad, Half Size
Broccoli Cheese Potato
Caesar Side Salad
Chili Cheese Fries
Chocolate Frosty Waffle Cone
Crispy Chicken Caesar Wrap
Crispy Chicken Sandwich
Dave’s Hot ‘N Juicy™ 1/2 lb.* Double
Dave’s Hot ‘N Juicy™ 1/4 lb.* Single
Dave’s Hot ‘N Juicy™ 3/4 lb.* Triple
Double Stack™
Garden Side Salad
Grilled Chicken Go Wrap
Homestyle Chicken Fillet
JBC (Jr. Bacon Cheeseburger)
Jr. Cheeseburger
Jr. Cheeseburger Deluxe
Jr. Hamburger
Jr. Original Chocolate Frosty™
Jr. Original Chocolate Frosty™
Jr. Vanilla Frosty™
Kids’ Cheeseburger
Kids’ French Fries
Kids’ Hamburger
Large Caramel Frosty™ Shake


In [80]:
for row in table('tr')[1:]:
#     row.find('td', {'itemprop':'calories'}).text
    print(row.find('td', {'itemprop':'calories'}).text)

450
430
180
170
270
260
570
340
40
670
380
240
540
940
580
310
440
250
530
300
410
350
820
580
1090
460
200
260
510
390
290
350
260
200
200
190
290
220
250
990
270
870
500
590
780
560
410
410
390
480
650
180
580
310
290
540
280
670
320
780
440
330
510
370
220
220
290


In [81]:
for i in restaurants:
    print(i)

{'name': 'A&W Restaurants', 'slug': 'aw-restaurants'}
{'name': "Applebee's", 'slug': 'applebees'}
{'name': "Arby's", 'slug': 'arbys'}
{'name': 'Atlanta Bread Company', 'slug': 'atlanta-bread-company'}
{'name': "Bojangle's Famous Chicken 'n Biscuits", 'slug': 'bojangles-famous-chicken-n-biscuits'}
{'name': 'Buffalo Wild Wings', 'slug': 'buffalo-wild-wings'}
{'name': 'Burger King', 'slug': 'burger-king'}
{'name': "Captain D's", 'slug': 'captain-ds'}
{'name': "Carl's Jr.", 'slug': 'carls-jr'}
{'name': "Charley's Grilled Subs", 'slug': 'charleys-grilled-subs'}
{'name': 'Chick-fil-A', 'slug': 'chick-fil-a'}
{'name': "Chili's", 'slug': 'chilis'}
{'name': 'Chipotle Mexican Grill', 'slug': 'chipotle-mexican-grill'}
{'name': "Church's", 'slug': 'churchs'}
{'name': 'Corner Bakery Cafe', 'slug': 'corner-bakery-cafe'}
{'name': 'Dairy Queen', 'slug': 'dairy-queen'}
{'name': "Denny's", 'slug': 'dennys'}
{'name': 'El Pollo Loco', 'slug': 'el-pollo-loco'}
{'name': 'FATZ', 'slug': 'fatz'}
{'name': "Faz

In [82]:
foods = []

for i in restaurants:

    for row in table('tr')[1:]:
        food = {'calories' : row.find('td', {'itemprop':'calories'}).text,
        'carbs': row.find('td', {'itemprop':'carbohydrateContent'}).text,
        'fat' : row.find('td', {'itemprop':'fatContent'}).text,
        'name' : row.find('td').text,
        'category' : row.find('a').text,
        'restaurant' : 'rest'}
        foods.append(food)


In [98]:
len(foods)

4977

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 4,977 rows

In [99]:
foods = pd.DataFrame(foods)

In [100]:
foods

Unnamed: 0,calories,carbs,category,fat,name,restaurant
0,0,0,Drinks,0,A&W® Diet Root Beer,A&W Restaurants
1,0,0,Drinks,0,A&W® Diet Root Beer,A&W Restaurants
2,0,0,Drinks,0,A&W® Diet Root Beer,A&W Restaurants
3,0,0,Drinks,0,A&W® Diet Root Beer,A&W Restaurants
4,0,0,Drinks,0,A&W® Diet Root Beer,A&W Restaurants
5,0,0,Drinks,0,A&W® Diet Root Beer,A&W Restaurants
6,350,60,Shakes,10,A&W® Diet Root Beer Float (large),A&W Restaurants
7,170,30,Shakes,5,A&W® Diet Root Beer Float (medium),A&W Restaurants
8,170,30,Shakes,5,A&W® Diet Root Beer Float (small),A&W Restaurants
9,600,92,Shakes,18,A&W® Diet Root Beer Freeze (large),A&W Restaurants


### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [101]:
# Exporting to a dataframe while not including the index
pd.DataFrame(foods).to_csv('foods.csv', index=False)