In [45]:
'''Version 0.1'''
from bs4 import BeautifulSoup
from requests import get
import re
import json
global results
from pprint import pprint
import pdb

resdict = {}
#https://www.allrecipes.com/recipe/80827/easy-garlic-broiled-chicken/

def autograder(url):
    '''Accepts the URL for a recipe, and returns a dictionary of the
    parsed results in the correct format. See project sheet for
    details on correct format.'''
    # your code here    
    print('loading...')
    get_ingredients(url)
    get_tools(url)
    get_methods(url)
    get_structuredsteps(url)
    
    print(get_title(url)+': \n')
    for ingredient_dict in resdict['ingredients']:
        print(ingredient_dict['full_string'])
        print('Ingredient: ',ingredient_dict['name'][0])
        print('Quantity: ',ingredient_dict['quantity'][0])
        print('Measurement: ',ingredient_dict['measurement'][0])
        print('')
    return

####################################################################################################################################
# GENERAL STUFF
####################################################################################################################################

def get_raw_html(url):
    try:
        raw_html = get(url,stream=True)
        if raw_html.status_code == 200:
            html = BeautifulSoup(raw_html.content, 'html.parser')
            return html
        else:
            print(raw_html.status_code)
            sys.exit()
    except:
        print("URL ", url, " not recognized!")

def get_title(url):
    url_list = url.split('/')
    if len(url_list[-1])==0:
        title = url_list[-2]
    else:
        title = url_list[-1]
    title_list = title.split('-')
    title = ' '.join(title_list)
    return title.title()


####################################################################################################################################
# INGREDIENT STUFF
####################################################################################################################################

def ingredients_from_url(url):
    html = get_raw_html(url)
    #pdb.set_trace()
    #soup = BeautifulSoup(html, "html.parser")
    items = []
    for line in html.select('label'):
        line = str(line)
        if "{true: 'checkList__item'}" in line:
            segments = line.split('"')
            items.append(segments[3])
    return items[:-1]

def ingredient_parser(string):
    words = string.split(' ')

    # Quantity
    if words[0][0].isnumeric() and words[1][0].isnumeric():    
        quantity = [words[:2]]
        words = words[2:]
    elif words[0][0].isnumeric():
        quantity = [words[0]]
        words = words[1:]
    else:
        quantity = ['unspecified']

    # Measurement
    measurement_units = [line.split('\n')[0] for line in open('measurement_units.txt', 'r').readlines()]
    if '(' in words[0]:
        measurement = ' '.join(words[:3])
        measurement = measurement.replace("(","").replace(")","")
        measurement = [measurement]
        words = words[3:]
    else:
        measurement = [word for word in words if word in measurement_units]
        if len(measurement)==0:
            measurement=['unspecified']
        words = [word for word in words if word not in measurement_units]

    descriptor_terms = [line.split('\n')[0] for line in open('descriptor_terms.txt', 'r').readlines()]
    descriptors = [word for word in words if word in descriptor_terms]
    if len(descriptors)==0:
        descriptors=['unspecified']
    words = [word for word in words if word not in descriptor_terms]
    
    preparation = []
    prep_description = []
    Max = []
    name = [' '.join(words)]

    return {"name":name,
            "quantity":quantity,
            "measurement":measurement,
            "descriptor":descriptors,
            "preparation":preparation,
            "prep_description":prep_description,
            "max":Max,
            "full_string":string
            }

def get_ingredients(url):
    ingredient_list = ingredients_from_url(url)
    ingredient_data = []
    for ingredient in ingredient_list:
        ingredient_data.append(ingredient_parser(ingredient))
    resdict["ingredients"] = ingredient_data
    return ingredient_data


####################################################################################################################################
# PREP STUFF
####################################################################################################################################

def grab_steps(url):
    html = get_raw_html(url)
    steps = []
    for line in html.select('span'):
        #         print(line)
        line = str(line)
        if "recipe-directions__list--item" in line:
            #pdb.set_trace()
            segments = line.split('>')
            steps.append(segments[1].split('\n')[0])
    return steps[:-1]

def get_structuredsteps(url):
    steps = grab_steps(url)
    joined = " ".join(steps)
    structured_steps = joined.split(".")

    resdict["structured steps"] = []

    time_units = ['sec', 'sec.', 'seconds', 'second' 'min', 'min.', 'minutes', 'minute', 'hour', 'hours', 'hr', 'hrs', 'hr.', 'hrs.']

    stop_words = ["and", "with", "the", "to"]

    ingredient_names = [ingredient["name"][0] for ingredient in resdict["ingredients"]]
    cooking_tools = [y for y in resdict["cooking tools"]]
    cooking_tools.extend([z for z in resdict["implied cooking tools"]])
    #some line for extracted methods
    #print(ingredient_names)
    #print(cooking_tools)

    for s in structured_steps:
        if s != "":
            ingredient_list = []
            for i in ingredient_names:
                for y in i.split():
                    if y not in stop_words and y in s:
                        ingredient_list.append(i)

            tools_list = [t for t in cooking_tools if t in s]

            #some line for methods

            cooking_time = ""
            tokens = s.split(" ")
            for x in range(len(tokens) - 2):
                if tokens[x].isdigit() and tokens[x + 1] in time_units:
                    cooking_time = tokens[x] + ' ' + tokens[x + 1]

            step = {
                "step": s,
                "ingredients": list(set(ingredient_list)),
                "tools": list(set(tools_list)),
                "cooking time": cooking_time
            }

            resdict["structured steps"].append(step)

def get_tools(url):
    steps = grab_steps(url)
    cooking_tools = []
    implied_tools = []
    official_tools = {}
    with open('tools.json') as f:
        official_tools = json.load(f)

    for s in steps:
        line = s.lower().strip()
        line = re.sub(r'[^\w\s]','',line)
        for t in official_tools:
            if t in line:
                cooking_tools.append(t)
            elif official_tools[t]:
                for w in official_tools[t]:
                    if w in line:
                        implied_tools.append(t)

    resdict["cooking tools"] = list(set(cooking_tools))
    resdict["implied cooking tools"] = list(set(implied_tools))
    return list(set(cooking_tools)), list(set(implied_tools))

def get_methods(url):
    steps = grab_steps(url)
    cooking_methods = []
    official_methods = {}
    with open('methods.json') as f:
        official_methods = json.load(f)

    for s in steps:
        line = s.lower().strip()
        line = re.sub(r'[^\w\s]','',line)
        for m in official_methods:
            if m in line:
                cooking_methods.append(m)
            elif official_methods[m]:
                for w in official_methods[m]:
                    if w in line:
                        cooking_methods.append(m)

    resdict["cooking methods"] = list(set(cooking_methods))
    return list(set(cooking_methods))

def main():
#     url = "http://allrecipes.com/recipe/easy-meatloaf/"
#     url = 'https://www.allrecipes.com/recipe/7453/chocolate-caramel-nut-cake/?internalSource=rotd&referringId=22935&referringContentType=Recipe%20Hub'
#     url = 'https://thewoksoflife.com/2018/08/peach-daiquiris-frozen/'
    url = str(input("What recipe would you like to read?: \n")).strip()
    autograder(url)
    
if __name__ == '__main__':
    main()

What recipe would you like to read?: 
http://allrecipes.com/recipe/easy-meatloaf/
loading...
Easy Meatloaf: 

1 1/2 pounds ground beef
Ingredient:  beef
Quantity:  ['1', '1/2']
Measurement:  pounds

1 egg
Ingredient:  egg
Quantity:  1
Measurement:  unspecified

1 onion, chopped
Ingredient:  onion,
Quantity:  1
Measurement:  unspecified

1 cup milk
Ingredient:  milk
Quantity:  1
Measurement:  cup

1 cup dried bread crumbs
Ingredient:  bread crumbs
Quantity:  1
Measurement:  cup

salt and pepper to taste
Ingredient:  salt and pepper to taste
Quantity:  unspecified
Measurement:  unspecified

2 tablespoons brown sugar
Ingredient:  brown sugar
Quantity:  2
Measurement:  tablespoons

2 tablespoons prepared mustard
Ingredient:  mustard
Quantity:  2
Measurement:  tablespoons

1/3 cup ketchup
Ingredient:  ketchup
Quantity:  1/3
Measurement:  cup



In [25]:
url = "http://allrecipes.com/recipe/easy-meatloaf/"
ingredients_from_url(url)

['1 1/2 pounds ground beef',
 '1 egg',
 '1 onion, chopped',
 '1 cup milk',
 '1 cup dried bread crumbs',
 'salt and pepper to taste',
 '2 tablespoons brown sugar',
 '2 tablespoons prepared mustard',
 '1/3 cup ketchup']

In [35]:
[line.split('\n')[0] for line in open('measurement_units.txt', 'r').readlines()]

['lbs',
 'lb',
 'oz',
 'cup',
 'cups',
 'tablespoon',
 'tbsp',
 'teaspoon',
 'tsp',
 'pinch',
 'fl oz',
 'jar']

In [27]:
'1'.isnumeric()

True

In [2]:
sample_dict = {
    "url": "http://allrecipes.com/Recipe/Easy-Garlic-Broiled-Chicken/",
    "ingredients": [{
            "name": ["butter"],
            "quantity": [0.5],
            "measurement": ["cup", "cups"],
            "descriptor": [],
            "preparation": [],
            "prep-description": [],
            "max": 3
        },
        {
            "name": ["garlic", "minced garlic"],
            "quantity": [3],
            "measurement": ["tablespoons","tablespoon"],
            "descriptor": [],
            "preparation": ["minced"],
            "prep-description": [],
            "max": 4
        },
        {
            "name": ["soy sauce"],
            "quantity": [3],
            "measurement": ["tablespoons", "tablespoon"],
            "descriptor": [],
            "preparation": [],
            "prep-description": [],
            "max": 3
        },
        {
            "name": ["pepper", "black pepper"],
            "quantity": [0.25],
            "measurement": ["teaspoon", "teaspoons"],
            "descriptor": ["black"],
            "preparation": [],
            "prep-description": [],
            "max": 4
        },
        {
            "name": ["parsley", "dried parsley"],
            "quantity": [1],
            "measurement": ["tablespoon", "tablespoons"],
            "descriptor": ["dried"],
            "preparation": ["dried"],
            "prep-description": [],
            "max": 4
        },
        {
            "name": ["chicken","chicken thighs","boneless chicken thighs","boneless chicken","boneless chicken thighs, with skin"],
            "quantity": [6],
            "measurement": ["thighs", "unit", "units","discrete"],
            "descriptor": ["boneless", "with skin", "thighs","boneless thighs, with skin"],
            "preparation": ["boneless","with skin"],
            "prep-description": [],
            "max": 5
        },
        {
            "name": ["parsley", "dried parsley","dried parsley, to taste"],
            "quantity": [0,1,"none"],
            "measurement": ["to taste", "taste"],
            "descriptor": ["dried","dried, to taste"],
            "preparation": ["dried"],
            "prep-description": [],
            "max": 4
        }
    ],
    "max": {
        "ingredients": 27,
        "primary cooking method": 1,
        "cooking tools": 6,
        "cooking methods": 11,
        "implied cooking tools": 0,
        "implied cooking methods": 0
    },
    "primary cooking method": "broil",
    "cooking methods":["grease","greasing","preheat","preheating","mix","mixing","melted","melting","arrange","arranging","microwave","microwaving","coat","coating","basting","broil","broiling","turning","sprinkle","sprinkling","turn","melt"],
    "cooking tools": ["oven","knife", "baking pan", "microwave safe bowl", "microwave", "baster"],
    "implied cooking methods": [],
    "implied cooking tools": []
}

In [4]:
def american_style(recipe_dict):
    sample_dict

{'url': 'http://allrecipes.com/Recipe/Easy-Garlic-Broiled-Chicken/',
 'ingredients': [{'name': ['butter'],
   'quantity': [0.5],
   'measurement': ['cup', 'cups'],
   'descriptor': [],
   'preparation': [],
   'prep-description': [],
   'max': 3},
  {'name': ['garlic', 'minced garlic'],
   'quantity': [3],
   'measurement': ['tablespoons', 'tablespoon'],
   'descriptor': [],
   'preparation': ['minced'],
   'prep-description': [],
   'max': 4},
  {'name': ['soy sauce'],
   'quantity': [3],
   'measurement': ['tablespoons', 'tablespoon'],
   'descriptor': [],
   'preparation': [],
   'prep-description': [],
   'max': 3},
  {'name': ['pepper', 'black pepper'],
   'quantity': [0.25],
   'measurement': ['teaspoon', 'teaspoons'],
   'descriptor': ['black'],
   'preparation': [],
   'prep-description': [],
   'max': 4},
  {'name': ['parsley', 'dried parsley'],
   'quantity': [1],
   'measurement': ['tablespoon', 'tablespoons'],
   'descriptor': ['dried'],
   'preparation': ['dried'],
   'pre

In [3]:
sample_dict['ingredients']

[{'name': ['butter'],
  'quantity': [0.5],
  'measurement': ['cup', 'cups'],
  'descriptor': [],
  'preparation': [],
  'prep-description': [],
  'max': 3},
 {'name': ['garlic', 'minced garlic'],
  'quantity': [3],
  'measurement': ['tablespoons', 'tablespoon'],
  'descriptor': [],
  'preparation': ['minced'],
  'prep-description': [],
  'max': 4},
 {'name': ['soy sauce'],
  'quantity': [3],
  'measurement': ['tablespoons', 'tablespoon'],
  'descriptor': [],
  'preparation': [],
  'prep-description': [],
  'max': 3},
 {'name': ['pepper', 'black pepper'],
  'quantity': [0.25],
  'measurement': ['teaspoon', 'teaspoons'],
  'descriptor': ['black'],
  'preparation': [],
  'prep-description': [],
  'max': 4},
 {'name': ['parsley', 'dried parsley'],
  'quantity': [1],
  'measurement': ['tablespoon', 'tablespoons'],
  'descriptor': ['dried'],
  'preparation': ['dried'],
  'prep-description': [],
  'max': 4},
 {'name': ['chicken',
   'chicken thighs',
   'boneless chicken thighs',
   'boneless