In [190]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import pandas as pd
import urllib.request
import re

In [2]:
# example scraping functions found online
def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None
    
def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)

def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can
    make it do anything.
    """
    print(e)

In [209]:
# function used to create nutrition table

def create_nutrition_table(url):
    raw_html = simple_get(url)
    html = BeautifulSoup(raw_html, 'html.parser')
    nutrition_info = html.findAll('div',{'class':'wprm-recipe-nutrition-container'})

    # create table to hold series results
    nutrition_table = pd.DataFrame(columns=['Nutrient','Amount','Unit'])
    
    # add nutrients
    nutrients = nutrition_info[0].find('li').findAll("strong")
    for nutrient in range(len(nutrients)):
        nutrition_table.loc[nutrient,'Nutrient'] = nutrients[nutrient].text[:-1]
        
    # add amounts
    amounts = nutrition_info[0].find('li').text.split(' ')
    counter = 0
    for amount in range(len(amounts)):
        if amounts[amount][0].isdigit() == True:
            total = re.sub('[^0-9\.]','', amounts[amount])
            units = amounts[amount].strip('0123456789.')
            nutrition_table.loc[counter,'Amount'] = total
            nutrition_table.loc[counter,'Unit'] = units
            counter += 1
    
    # manually adding calories label
    nutrition_table.loc[0, "Unit"] = 'cals'
        
    return nutrition_table

In [210]:
create_nutrition_table('https://minimalistbaker.com/garlicky-kale-salad-with-crispy-chickpeas/')

Unnamed: 0,Nutrient,Amount,Unit
0,Calories,669.0,cals
1,Fat,44.2,g
2,Saturated fat,5.9,g
3,Sodium,138.0,mg
4,Potassium,1123.0,mg
5,Carbohydrates,59.8,g
6,Fiber,16.5,g
7,Sugar,15.5,g
8,Protein,20.2,g
9,Vitamin A,14200.0,IU
