In [1]:
# Import libaries
import pandas as pd
import requests
import time
from bs4 import BeautifulSoup

### Step 1: Create a soup object from the home page

In [2]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
resto = requests.get(url)
soup = BeautifulSoup(resto.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [3]:
# isolate table with restaurant links
restaurants_table = soup.find('table', {'id': 'restaurants'})
restaurants_table

<table class="table" id="restaurants">
<thead>
<tr>
<th>Name</th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<td>
<a href="restaurants/1.html">A&amp;W Restaurants</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/2.html">Applebee's</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/3.html">Arby's</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/4.html">Atlanta Bread Company</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/5.html">Bojangle's Famous Chicken 'n Biscuits</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/6.html">Buffalo Wild Wings</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/7.html">Burger King</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/8.html">Captain D's</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/9.html">Carl's Jr.</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/10.html">Charley's Grilled Subs</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/11.html">Chick-fil-A</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/12.html">Chili's</a> </td>
</tr>
<tr>
<td>
<a href="restaurants/13.html">Chi

In [4]:
resto_list = []

# iterate thru each link in restaurants_table
for link in restaurants_table.find_all('a'):
    # create empty restaurant dict
    resto_dict = {}

    # add name
    resto_dict['name'] = link.text

    # add href
    resto_dict['href'] = link['href']

    # add each resto_dict to resto_list
    resto_list.append(resto_dict)

In [5]:
resto_list

[{'name': 'A&W Restaurants', 'href': 'restaurants/1.html'},
 {'name': "Applebee's", 'href': 'restaurants/2.html'},
 {'name': "Arby's", 'href': 'restaurants/3.html'},
 {'name': 'Atlanta Bread Company', 'href': 'restaurants/4.html'},
 {'name': "Bojangle's Famous Chicken 'n Biscuits",
  'href': 'restaurants/5.html'},
 {'name': 'Buffalo Wild Wings', 'href': 'restaurants/6.html'},
 {'name': 'Burger King', 'href': 'restaurants/7.html'},
 {'name': "Captain D's", 'href': 'restaurants/8.html'},
 {'name': "Carl's Jr.", 'href': 'restaurants/9.html'},
 {'name': "Charley's Grilled Subs", 'href': 'restaurants/10.html'},
 {'name': 'Chick-fil-A', 'href': 'restaurants/11.html'},
 {'name': "Chili's", 'href': 'restaurants/12.html'},
 {'name': 'Chipotle Mexican Grill', 'href': 'restaurants/13.html'},
 {'name': "Church's", 'href': 'restaurants/14.html'},
 {'name': 'Corner Bakery Cafe', 'href': 'restaurants/15.html'},
 {'name': 'Dairy Queen', 'href': 'restaurants/16.html'},
 {'name': "Denny's", 'href': 'res

### Step 3: Using the `href`, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [8]:
food_list = []

# iterate thru each restaurant in resto_list
for restaurant in resto_list:
    
    href = restaurant['href']
    restaurant_url = f'https://pages.git.generalassemb.ly/rldaggie/for-scraping/{href}'
    
    # get content from each restaurant page
    restaurant_res = requests.get(restaurant_url)
    
    # create soup object from restauarant html
    restaurant_soup = BeautifulSoup(restaurant_res.content, 'lxml')
    
    # isolate the foods table from restaurant page
    table = restaurant_soup.find('table')
    
    # iterate thru each row in the tbody of the restaurants table
    for row in table.find('tbody').find_all('tr'):
        cells = row.find_all('td')
        
        # create empty food dict
        food_dict = {}
        
        # add tall categories
        food_dict['restaurant'] = restaurant['name']
        food_dict['name'] = cells[0].text
        food_dict['category'] = cells[1].text.strip()
        food_dict['calories'] = cells[2].text
        food_dict['fat'] = cells[3].text
        food_dict['carbs'] = cells[4].text
        
        food_list.append(food_dict)

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows

In [9]:
food_df = pd.DataFrame(food_list)
food_df.head()

Unnamed: 0,restaurant,name,category,calories,fat,carbs
0,A&W Restaurants,Original Bacon Double Cheeseburger,Burgers,760,45,45
1,A&W Restaurants,Coney (Chili) Dog,Entrees,340,20,26
2,A&W Restaurants,Chili Fries,French Fries,370,15,49
3,A&W Restaurants,Strawberry Milkshake (small),Shakes,670,29,90
4,A&W Restaurants,A&W® Root Beer Freeze (large),Shakes,820,18,150


In [10]:
food_df.shape

(5131, 6)

### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [12]:
# food_df.to_csv('food_list.csv', index=False)