In [1]:
# Import libaries
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Step 1: Create a soup object from the home page

In [2]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)

In [3]:
res.status_code

200

In [4]:
soup = BeautifulSoup(res.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [5]:
soup.a.text

'Nutrition Information'

In [6]:
table = soup.find('table', {'id':'restaurants'})

In [7]:
restaurants = []
for row in table.find_all('tr')[1:]:
    restaurant = {}
    restaurant['name']=row.find('a').text
    restaurant['href']=row.find('a').attrs['href']
    
    restaurants.append(restaurant)
restaurants

[{'name': 'A&W Restaurants', 'href': 'restaurants/1.html'},
 {'name': "Applebee's", 'href': 'restaurants/2.html'},
 {'name': "Arby's", 'href': 'restaurants/3.html'},
 {'name': 'Atlanta Bread Company', 'href': 'restaurants/4.html'},
 {'name': "Bojangle's Famous Chicken 'n Biscuits",
  'href': 'restaurants/5.html'},
 {'name': 'Buffalo Wild Wings', 'href': 'restaurants/6.html'},
 {'name': 'Burger King', 'href': 'restaurants/7.html'},
 {'name': "Captain D's", 'href': 'restaurants/8.html'},
 {'name': "Carl's Jr.", 'href': 'restaurants/9.html'},
 {'name': "Charley's Grilled Subs", 'href': 'restaurants/10.html'},
 {'name': 'Chick-fil-A', 'href': 'restaurants/11.html'},
 {'name': "Chili's", 'href': 'restaurants/12.html'},
 {'name': 'Chipotle Mexican Grill', 'href': 'restaurants/13.html'},
 {'name': "Church's", 'href': 'restaurants/14.html'},
 {'name': 'Corner Bakery Cafe', 'href': 'restaurants/15.html'},
 {'name': 'Dairy Queen', 'href': 'restaurants/16.html'},
 {'name': "Denny's", 'href': 'res

### Step 3: Using the slug, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [8]:
restaurant1='https://pages.git.generalassemb.ly/rldaggie/for-scraping/restaurants/1.html'

req = requests.get(restaurant1)

req.status_code

200

In [9]:
req.text

'<!DOCTYPE html>\n<html lang="en">\n  <head>\n    <meta charset="utf-8"/>\n    <meta http-equiv="X-UA-Compatible" content="IE=edge"/>\n    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>\n    <title>Nutrition Information</title>\n    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">\n\n  </head>\n  <body>\n    <header>\n      <section class="container">\n        <nav role="navigation" class="navbar navbar-expand-lg navbar-light bg-light">\n<a class="navbar-brand" href="/">Nutrition Information</a>        </nav>\n      </section>\n    </header>\n    <main role="main" class="container">\n      <br>\n      <div class="alert alert-danger">\n        NOTE: This data is super old and rife with errors. It\'s meant for scraping practice only.\n      </div>\n<h3 class=display-3>\nA&amp;W Restaurants</h3>\n\n<p cl

In [10]:
soup = BeautifulSoup(req.content, 'lxml')
foodtable = soup.find('tbody')

allfoods= foodtable.find_all('tr')
foodattr= allfoods[0].find_all('td')

food_rest = soup.find('h3').text.strip() #grab restaurant name and rid of extra symbols

In [11]:
allfoods[0].find_all('td')

[<td>Original Bacon Double Cheeseburger</td>,
 <td>Burgers</td>,
 <td>760</td>,
 <td>45</td>,
 <td>45</td>]

In [12]:
allfoods[0].find_all('td')

[<td>Original Bacon Double Cheeseburger</td>,
 <td>Burgers</td>,
 <td>760</td>,
 <td>45</td>,
 <td>45</td>]

In [13]:
foods = []
for row in table.find_all('a'): #loop through restaurants
    req = requests.get(f"https://pages.git.generalassemb.ly/rldaggie/for-scraping/{row['href']}")
    soup = BeautifulSoup(req.content, 'lxml')
    foodtable = soup.find('tbody')
    for food in foodtable.find_all('tr'):
        d_food = {
            'calories': food.find_all('td')[2].text,
            'carbs': food.find_all('td')[4].text,
            'category': food.find_all('td')[0].text,
            'fat': food.find_all('td')[3].text,
            'name':food.find_all('td')[0].text,
            'restaurant': soup.find('h3').text.strip() #grab restaurant name from header
        }
        foods.append(d_food)
foods

[{'calories': '760',
  'carbs': '45',
  'category': 'Original Bacon Double Cheeseburger',
  'fat': '45',
  'name': 'Original Bacon Double Cheeseburger',
  'restaurant': 'A&W Restaurants'},
 {'calories': '340',
  'carbs': '26',
  'category': 'Coney (Chili) Dog',
  'fat': '20',
  'name': 'Coney (Chili) Dog',
  'restaurant': 'A&W Restaurants'},
 {'calories': '370',
  'carbs': '49',
  'category': 'Chili Fries',
  'fat': '15',
  'name': 'Chili Fries',
  'restaurant': 'A&W Restaurants'},
 {'calories': '670',
  'carbs': '90',
  'category': 'Strawberry Milkshake (small)',
  'fat': '29',
  'name': 'Strawberry Milkshake (small)',
  'restaurant': 'A&W Restaurants'},
 {'calories': '820',
  'carbs': '150',
  'category': 'A&W® Root Beer Freeze (large)',
  'fat': '18',
  'name': 'A&W® Root Beer Freeze (large)',
  'restaurant': 'A&W Restaurants'},
 {'calories': '340',
  'carbs': '57',
  'category': 'Caramel Sundae',
  'fat': '9',
  'name': 'Caramel Sundae',
  'restaurant': 'A&W Restaurants'},
 {'calor

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows

In [14]:
foods = pd.DataFrame(foods,index=None)

In [15]:
foods.head()

Unnamed: 0,calories,carbs,category,fat,name,restaurant
0,760,45,Original Bacon Double Cheeseburger,45,Original Bacon Double Cheeseburger,A&W Restaurants
1,340,26,Coney (Chili) Dog,20,Coney (Chili) Dog,A&W Restaurants
2,370,49,Chili Fries,15,Chili Fries,A&W Restaurants
3,670,90,Strawberry Milkshake (small),29,Strawberry Milkshake (small),A&W Restaurants
4,820,150,A&W® Root Beer Freeze (large),18,A&W® Root Beer Freeze (large),A&W Restaurants


In [16]:
foods.shape

(5131, 6)

### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [17]:
foods.to_csv('foods.csv',index=False)