# Web Scraping with  Beautiful Soup
![The words Beautiful Soup stylized](https://imgur.com/Zf7atCZ.jpg)
<a href="https://bestprogrammer.ru/izuchenie/veb-skraping-s-pomoshhyu-beautiful-soup">Image Source</a>

In [2]:
# Import libaries
import requests
from bs4 import BeautifulSoup

In [10]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

### Step 1: Creating a soup object from the home page

In [3]:
url = "https://pages.git.generalassemb.ly/rldaggie/for-scraping/"

req = requests.get(url)

req.status_code

200

### Step 2: Scraping the home page soup for every restaurant

For this, we're going to create a list of dictionaries, one for each restaurant. 

Each dictionary will contain the restaurant's name and path from the `href`.

In [5]:
soup = BeautifulSoup(req.text, "lxml")

In [8]:
results_list = []

all_td = soup.find_all('td')

for element in all_td:
    
    # start a dictionary to store this item's data
    result = {}
    
    # get the title and full link/url
    a_href = element.find('a')
    
    if a_href is not None:
        result['name'] = a_href.text
        result['href'] = a_href['href']
    #saving these in the results dictionary, key = link, result = title    
        
    if len(result) == 2:
        results_list.append(result)

In [98]:
results_list

[{'name': 'A&W Restaurants', 'href': 'restaurants/1.html'},
 {'name': "Applebee's",
  'href': 'restaurants/2.html',
  'foods': [{'name': 'Original Bacon Double Cheeseburger',
    'category': 'Original Bacon Double Cheeseburger',
    'calories': 'Original Bacon Double Cheeseburger',
    'fat': 'Original Bacon Double Cheeseburger',
    'carbs': 'Original Bacon Double Cheeseburger',
    'restaurant': "Applebee's"},
   {'name': 'Coney (Chili) Dog',
    'category': 'Coney (Chili) Dog',
    'calories': 'Coney (Chili) Dog',
    'fat': 'Coney (Chili) Dog',
    'carbs': 'Coney (Chili) Dog',
    'restaurant': "Arby's"},
   {'name': 'Chili Fries',
    'category': 'Chili Fries',
    'calories': 'Chili Fries',
    'fat': 'Chili Fries',
    'carbs': 'Chili Fries',
    'restaurant': 'Atlanta Bread Company'},
   {'name': 'Strawberry Milkshake (small)',
    'category': 'Strawberry Milkshake (small)',
    'calories': 'Strawberry Milkshake (small)',
    'fat': 'Strawberry Milkshake (small)',
    'carbs':

### Step 3: Scraping each restaurant's page and create a single list of food dictionaries using the `href`

In [105]:
href_list = [results_list[i]['href'] for i in range (len(results_list))]

In [106]:
home_url = "https://pages.git.generalassemb.ly/rldaggie/for-scraping/"

In [None]:
calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },

In [137]:
for x in range(len(href_list)):
    req = requests.get(home_url + href_list[x])
    soup = BeautifulSoup(req.content, 'lxml')
    table_soup = soup.find_all('tr')
    foods = []
    
    for i in range(1, (len(table_soup))):
        food = {}
        food['calories'] = str(table_soup[i]).split('<td>')[3].split('</td>')[0]
        food['carbs'] = str(table_soup[i]).split('<td>')[5].split('</td>')[0]
        food['category'] = str(table_soup[i]).split('<td>')[2].split('</td>')[0]
        food['fat'] = str(table_soup[i]).split('<td>')[4].split('</td>')[0]
        food['name'] = str(table_soup[i]).split('<td>')[1].split('</td>')[0]
        food['restaurant'] = results_list[x]['name']

        foods.append(food)
        
    results_list[x]['foods'] = foods

In [138]:
foods

[{'calories': '670',
  'carbs': '41',
  'category': 'Burgers',
  'fat': '37',
  'name': 'Son of Baconator®',
  'restaurant': "Wendy's"},
 {'calories': '270',
  'carbs': '14',
  'category': 'Chicken',
  'fat': '18',
  'name': '6-piece Chicken Nuggets',
  'restaurant': "Wendy's"},
 {'calories': '310',
  'carbs': '40',
  'category': 'French Fries',
  'fat': '15',
  'name': 'Small French Fries',
  'restaurant': "Wendy's"},
 {'calories': '780',
  'carbs': '40',
  'category': 'Salads',
  'fat': '51',
  'name': 'Spicy Chicken Caesar Salad',
  'restaurant': "Wendy's"},
 {'calories': '290',
  'carbs': '48',
  'category': 'Shakes',
  'fat': '7',
  'name': 'Small Original Chocolate Frosty™',
  'restaurant': "Wendy's"},
 {'calories': '560',
  'carbs': '93',
  'category': 'Shakes',
  'fat': '14',
  'name': 'Large Vanilla Frosty™',
  'restaurant': "Wendy's"},
 {'calories': '290',
  'carbs': '53',
  'category': 'Shakes',
  'fat': '6',
  'name': 'Vanilla Frosty Waffle Cone',
  'restaurant': "Wendy's"}

### Step 4: Creating a pandas DataFrame from our list of foods

In [139]:
import pandas as pd

In [140]:
list_of_food_dicts = []
for i in range(len(results_list)):
    for x in range(len(results_list[i]['foods'])):
        dict = {}
        dict = (results_list[i]['foods'][x])
        list_of_food_dicts.append(dict)

In [141]:
df = pd.DataFrame(list_of_food_dicts)

In [144]:
df

Unnamed: 0,calories,carbs,category,fat,name,restaurant
0,760,45,Burgers,45,Original Bacon Double Cheeseburger,A&W Restaurants
1,340,26,Entrees,20,Coney (Chili) Dog,A&W Restaurants
2,370,49,French Fries,15,Chili Fries,A&W Restaurants
3,670,90,Shakes,29,Strawberry Milkshake (small),A&W Restaurants
4,820,150,Shakes,18,A&amp;W® Root Beer Freeze (large),A&W Restaurants
...,...,...,...,...,...,...
5126,200,32,Shakes,5,Jr. Original Chocolate Frosty™,Wendy's
5127,260,25,Wraps,10,Grilled Chicken Go Wrap,Wendy's
5128,670,57,Sandwiches,32,Asiago Ranch Chicken Club,Wendy's
5129,330,30,Wraps,16,Spicy Chicken Go Wrap,Wendy's


In [147]:
df = df[['name', 'calories', 'fat', 'carbs', 'restaurant']]

In [148]:
df

Unnamed: 0,name,calories,fat,carbs,restaurant
0,Original Bacon Double Cheeseburger,760,45,45,A&W Restaurants
1,Coney (Chili) Dog,340,20,26,A&W Restaurants
2,Chili Fries,370,15,49,A&W Restaurants
3,Strawberry Milkshake (small),670,29,90,A&W Restaurants
4,A&amp;W® Root Beer Freeze (large),820,18,150,A&W Restaurants
...,...,...,...,...,...
5126,Jr. Original Chocolate Frosty™,200,5,32,Wendy's
5127,Grilled Chicken Go Wrap,260,10,25,Wendy's
5128,Asiago Ranch Chicken Club,670,32,57,Wendy's
5129,Spicy Chicken Go Wrap,330,16,30,Wendy's


### Step 5: Exporting to csv

In [149]:
df.to_csv('nutrition_information.csv', index=False, sep=",")