In [2]:
# Import libaries
import pandas as pd 
import numpy as np
import praw 
import requests 
from bs4 import BeautifulSoup

### Step 1: Create a soup object from the home page

In [3]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'

In [4]:
response = requests.get(url)

In [5]:
response.status_code

200

In [6]:
soup = BeautifulSoup(response.content, 'html.parser')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [7]:
restaurant = soup.find('td')

In [8]:
print(restaurant)

<td>
<a href="restaurants/1.html">A&amp;W Restaurants</a> </td>


In [10]:
restaurant_names = []
for a_tag in soup.find_all('a'):
    restaurant_names.append(a_tag.text)
print(restaurant_names)

['Nutrition Information', 'A&W Restaurants', "Applebee's", "Arby's", 'Atlanta Bread Company', "Bojangle's Famous Chicken 'n Biscuits", 'Buffalo Wild Wings', 'Burger King', "Captain D's", "Carl's Jr.", "Charley's Grilled Subs", 'Chick-fil-A', "Chili's", 'Chipotle Mexican Grill', "Church's", 'Corner Bakery Cafe', 'Dairy Queen', "Denny's", 'El Pollo Loco', 'FATZ', "Fazoli's", 'Five Guys Burgers and Fries', 'Golden Chick', "Hardee's", 'IHOP', 'In-N-Out Burger', 'Jack in the Box', 'Jimmy Johns', "Joe's Crab Shack", 'KFC', "McDonald's", "O'Charley's", 'Olive Garden', 'Outback Steakhouse', 'Panda Express', 'Panera Bread', "Popeye's", 'Quiznos', 'Red Robin Gourmet Burgers', "Romano's Macaroni Grill", 'Ruby Tuesday', 'Subway', 'Taco Bell', 'Taco Bueno', "Wendy's"]


In [11]:
restaurant_dict = {}

# Find all <a> tags within <td> tags
for a_tag in soup.find_all('td'):
    # Extract the restaurant name (text) and href attribute value
    restaurant_name = a_tag.get_text()
    href_value = a_tag.find('a').get('href')
    # Store the restaurant name and href in the dictionary
    restaurant_dict[restaurant_name] = href_value

print(restaurant_dict)

{'\nA&W Restaurants ': 'restaurants/1.html', "\nApplebee's ": 'restaurants/2.html', "\nArby's ": 'restaurants/3.html', '\nAtlanta Bread Company ': 'restaurants/4.html', "\nBojangle's Famous Chicken 'n Biscuits ": 'restaurants/5.html', '\nBuffalo Wild Wings ': 'restaurants/6.html', '\nBurger King ': 'restaurants/7.html', "\nCaptain D's ": 'restaurants/8.html', "\nCarl's Jr. ": 'restaurants/9.html', "\nCharley's Grilled Subs ": 'restaurants/10.html', '\nChick-fil-A ': 'restaurants/11.html', "\nChili's ": 'restaurants/12.html', '\nChipotle Mexican Grill ': 'restaurants/13.html', "\nChurch's ": 'restaurants/14.html', '\nCorner Bakery Cafe ': 'restaurants/15.html', '\nDairy Queen ': 'restaurants/16.html', "\nDenny's ": 'restaurants/17.html', '\nEl Pollo Loco ': 'restaurants/18.html', '\nFATZ ': 'restaurants/19.html', "\nFazoli's ": 'restaurants/20.html', '\nFive Guys Burgers and Fries ': 'restaurants/21.html', '\nGolden Chick ': 'restaurants/22.html', "\nHardee's ": 'restaurants/23.html', '

### Step 3: Using the `href`, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [17]:
foods = []
for restaurant, href in restaurant_dict.items():
    # Construct the full URL of the restaurant page
    fullurl = url + href  
    
    # Send a GET request to the restaurant page
    response = requests.get(fullurl)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the restaurant page
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Find all food items on the page
        for food_item in soup.find_all('div', class_='food-item'):
            # Extract food information from each food item
            name = food_item.find('h3').get_text().strip()
            category = food_item.find('span', class_='category').get_text().strip()
            calories = food_item.find('span', class_='calories').get_text().strip()
            fat = food_item.find('span', class_='fat').get_text().strip()
            carbs = food_item.find('span', class_='carbs').get_text().strip()
            
            # Create a food dictionary
            food_dict = {
                'name': name,
                'category': category,
                'calories': calories,
                'fat': fat,
                'carbs': carbs,
                'restaurant': restaurant
            }
            
            # Append the food dictionary to the list of foods
            foods.append(food_dict)
    else:
        print(f"Failed to fetch data from {restaurant} ({url})")

# Print the list of food dictionaries
print(foods)

KeyboardInterrupt: 

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows

In [None]:
df = pd.DataFrame(foods)

### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [None]:
df.to_csv('foods.csv')