# Web Scraping with Beautiful Soup Lab

In [2]:
# Import libaries here

import pandas as pd
import requests
import html
from bs4 import BeautifulSoup

### Step 1: Create a soup object from the home page

In [3]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)

In [4]:
res.status_code

200

In [5]:

soup = BeautifulSoup(res.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [6]:
table = soup.find(class_='table')

In [7]:
restaurants = []
for row in table.find_all('td'):
    restaurant = {}
    restaurant['name'] = row.find('a').text.strip()
    restaurant['href'] = row.find('a').attrs['href']
    restaurants.append(restaurant)

In [8]:
restaurants

[{'name': 'A&W Restaurants', 'href': 'restaurants/1.html'},
 {'name': "Applebee's", 'href': 'restaurants/2.html'},
 {'name': "Arby's", 'href': 'restaurants/3.html'},
 {'name': 'Atlanta Bread Company', 'href': 'restaurants/4.html'},
 {'name': "Bojangle's Famous Chicken 'n Biscuits",
  'href': 'restaurants/5.html'},
 {'name': 'Buffalo Wild Wings', 'href': 'restaurants/6.html'},
 {'name': 'Burger King', 'href': 'restaurants/7.html'},
 {'name': "Captain D's", 'href': 'restaurants/8.html'},
 {'name': "Carl's Jr.", 'href': 'restaurants/9.html'},
 {'name': "Charley's Grilled Subs", 'href': 'restaurants/10.html'},
 {'name': 'Chick-fil-A', 'href': 'restaurants/11.html'},
 {'name': "Chili's", 'href': 'restaurants/12.html'},
 {'name': 'Chipotle Mexican Grill', 'href': 'restaurants/13.html'},
 {'name': "Church's", 'href': 'restaurants/14.html'},
 {'name': 'Corner Bakery Cafe', 'href': 'restaurants/15.html'},
 {'name': 'Dairy Queen', 'href': 'restaurants/16.html'},
 {'name': "Denny's", 'href': 'res

### Step 3: Using the `href`, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

#### Trying manually for the first restaurant:

In [9]:
href = []
for i in range(len(restaurants)):
    href.append(restaurants[i]['href'])


In [10]:
name_r = []
for i in range(len(restaurants)):
    name_r.append(restaurants[i]['name'])


In [11]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/restaurants/1.html'
res = requests.get(url)
soup = BeautifulSoup(res.content, 'lxml')
table = soup.find_all('td') 
foods = []

for i in table:
    foods.append(i.text.strip())
    
# Breaking list into chunks:    
chunks = [foods[x:x+5] for x in range(0, len(foods), 5)]
list_keys = ['name', 'category', 'calories', 'fat', 'carbs', 'restaurant']


for chunk in chunks:
    chunk.append('A&W Restaurants')

In [12]:
def as_dict_list(data: list, columns: list):
    return [dict((zip(columns, row))) for row in data]

In [13]:
menu1 = as_dict_list(chunks, list_keys)

#### For other restaurants:

In [14]:
url_test = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/restaurants/{}.html'
urls = []
for i in range(1, len(href)):
    urls.append(url_test.format(i))

In [15]:
for url in urls:
    res = requests.get( url)
    soup = BeautifulSoup(res.content, 'lxml')
    table = soup.find_all('td')
    foods = []

    for i in table:
        foods.append(i.text.strip())

    # Breaking list into chunks:    
    chunks = [foods[x:x+5] for x in range(0, len(foods), 5)]
    list_keys = ['name', 'category', 'calories', 'fat', 'carbs', 'restaurant']

    for name in name_r:
        for chunk in chunks:
            chunk.append(name)
    
menu_n = as_dict_list(chunks, list_keys)

#### General function

In [16]:
def menu(url, name):
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'lxml')
    table = soup.find_all('td') 
    foods = []

    for i in table:
        foods.append(i.text.strip())

    # Breaking list into chunks:    
    chunks = [foods[x:x+5] for x in range(0, len(foods), 5)]
    list_keys = ['name', 'category', 'calories', 'fat', 'carbs', 'restaurant']


    for chunk in chunks:
        chunk.append(name)
        
    menu = as_dict_list(chunks, list_keys)
    return menu


In [17]:
#result = []

#for url in urls:
    #for name in name_r:
        #result.append(menu(url, name))
        
my_list = list(zip(urls, name_r))       
a = []
for url, name in my_list:
    a.append(menu(url, name))
    


In [18]:
def flatten(t):
    return [item for sublist in t for item in sublist]

In [19]:
b = flatten(a)

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows. Please output the number of rows in your DataFrame!

In [24]:
df = pd.DataFrame(b)
df = df.set_index('name')
df.head(2)

Unnamed: 0_level_0,category,calories,fat,carbs,restaurant
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Original Bacon Double Cheeseburger,Burgers,760,45,45,A&W Restaurants
Coney (Chili) Dog,Entrees,340,20,26,A&W Restaurants


In [25]:
# How many rows does your dataframe have?
df.shape

(5067, 5)

### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [26]:
df.to_csv(r'./result.csv')