In [9]:
import sys
sys.path.insert(0, "/Users/artur/workspace/recipedb")
from recipedb.db import get_db
import functools
import pandas as pd
import numpy as np
from fractions import Fraction
from fuzzywuzzy import fuzz
db = get_db()

In [10]:
exs = db.allrecipes.find({"error":{"$eq":None}})

In [11]:
NUM_EXAMPLES = 100
exs_recipes = list(map(lambda x: next(exs), range(NUM_EXAMPLES))) # grab the next NUM_EXAMPLES from cursor above

In [60]:
food_base = list(db.FoodBase.find())

In [14]:
unit_terms = list(db.UnitTerms.find())

In [54]:
THRESHOLD = 75

def split_text_into_words(raw_text):
    return list(map(lambda x: x.strip().replace('(','').replace(')', ''), raw_text.split()))

def parse_and_separate_units(raw_ingred_text):
    terms = split_text_into_words(raw_ingred_text)
    last_term = None
    unit = None
    i = 0
    found = False
    for term in terms:
        for other_term in unit_terms:
            ratio = fuzz.ratio(term, other_term['text'])
            if ratio > THRESHOLD:
                unit = other_term
                found = True 
        if found:
            break
        last_term = term
        i += 1
    if found:
        terms.pop(i)
        terms.pop(i-1)
        units = {'qty': float(Fraction(last_term)), "unit": unit }
    else:
        units = None
    return units, ' '.join(terms)


In [56]:
parse_units('1/2 cup chopped fresh mint leaves')

(0.5,
 {'_id': ObjectId('5ec214ed6464fda41be88c7e'),
  'text': 'cup',
  'unit_id': 'cup'},
 'chopped fresh mint leaves')

In [62]:
def match_ingredient(ingred):
    matches = list(map(lambda x: fuzz.ratio(x['text'], ingred), food_base))
    return food_base[np.argmax(matches)]

In [64]:
def match_ingredients(ingredients):
    output = []
    for ingred in ingreds:
        unit, separated_text = parse_and_separate_units(ingred)
        match = match_ingredient(separated_text)
        output.append({"unit": unit, "match": match})
    return output

for i in range(15, 30):
    ingreds = exs_recipes[i]['data']['ingredients']
    print("INDEX %d" % i)
    print(ingreds)
    matches = match_ingredients(ingreds)
    print(matches)
    print('\n\n')
    

INDEX 15
['1 (1.25 ounce) package taco seasoning mix', '1/4 cup all-purpose flour', '3 pounds venison roast', '2 teaspoons cayenne pepper, or to taste', '2 tablespoons vegetable oil', '1 1/2 cups water']
[{'unit': {'qty': 1.25, 'unit': {'_id': ObjectId('5ec2163a6464fda41be88c92'), 'text': 'ounce', 'unit_id': 'ounce'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c9535322'), 'text': 'taco seasoning mix', 'class': ['AG.01.m', 'AG.01.n.12', 'AG.01.l', 'AG.01.t.05']}}, {'unit': {'qty': 0.25, 'unit': {'_id': ObjectId('5ec214ed6464fda41be88c7e'), 'text': 'cup', 'unit_id': 'cup'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c95359fa'), 'text': 'all-purpose flour', 'class': ['AG.01.k', 'AG.01.j']}}, {'unit': {'qty': 3.0, 'unit': {'_id': ObjectId('5ec2160e6464fda41be88c8f'), 'text': 'pound', 'unit_id': 'pound'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c9535297'), 'text': 'onion salt', 'class': ['AG.01.h.02.e', 'AG.01.l.01', 'AG.01.w']}}, {'unit': {'qty': 2.0, 'unit': {'_id': ObjectId('5ec213

[{'unit': {'qty': 8.0, 'unit': {'_id': ObjectId('5ec214ed6464fda41be88c7e'), 'text': 'cup', 'unit_id': 'cup'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c95359fa'), 'text': 'all-purpose flour', 'class': ['AG.01.k', 'AG.01.j']}}, {'unit': {'qty': 4.0, 'unit': {'_id': ObjectId('5ec214ed6464fda41be88c7e'), 'text': 'cup', 'unit_id': 'cup'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c95353be'), 'text': 'shredded cheddar cheese', 'class': ['AG.01.e.02', 'AG.01.n.18']}}, {'unit': {'qty': 0.75, 'unit': {'_id': ObjectId('5ec214ed6464fda41be88c7e'), 'text': 'cup', 'unit_id': 'cup'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c953551b'), 'text': 'diced jalapeno pepper', 'class': ['AG.01.l.03', 'AG.01.t.05']}}, {'unit': {'qty': 0.5, 'unit': {'_id': ObjectId('5ec214ed6464fda41be88c7e'), 'text': 'cup', 'unit_id': 'cup'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c9534fb3'), 'text': 'white sugar', 'class': ['AG.01.l.02']}}, {'unit': {'qty': 0.5, 'unit': {'_id': ObjectId('5ec213a86464fda41be88c

[{'unit': {'qty': 2.0, 'unit': {'_id': ObjectId('5ec2140d6464fda41be88c75'), 'text': 'tablespoon', 'unit_id': 'tablespoon'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c9535177'), 'text': 'corn syrup', 'class': ['AG.01.i', 'AG.01.l.02']}}, {'unit': {'qty': 0.5, 'unit': {'_id': ObjectId('5ec214ed6464fda41be88c7e'), 'text': 'cup', 'unit_id': 'cup'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c953501f'), 'text': 'butter', 'class': ['AG.01.e.01', 'AG.01.n']}}, {'unit': {'qty': 1.0, 'unit': {'_id': ObjectId('5ec214ed6464fda41be88c7e'), 'text': 'cup', 'unit_id': 'cup'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c9534fa1'), 'text': 'brown sugar', 'class': ['AG.01']}}, {'unit': {'qty': 1.0, 'unit': {'_id': ObjectId('5ec2160e6464fda41be88c8f'), 'text': 'pound', 'unit_id': 'pound'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c95351d6'), 'text': 'sliced french bread', 'class': ['AG.01.n.11', 'AG.01.t.05']}}, {'unit': None, 'match': {'_id': ObjectId('5ebcd38f1ec24005c9534fae'), 'text': 'eggs'

[{'unit': {'qty': 0.5, 'unit': {'_id': ObjectId('5ec214ed6464fda41be88c7e'), 'text': 'cup', 'unit_id': 'cup'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c95359fa'), 'text': 'all-purpose flour', 'class': ['AG.01.k', 'AG.01.j']}}, {'unit': {'qty': 1.0, 'unit': {'_id': ObjectId('5ec2140d6464fda41be88c75'), 'text': 'tablespoon', 'unit_id': 'tablespoon'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c953517c'), 'text': 'baking soda', 'class': ['AG.01.aa', 'AG.01.m']}}, {'unit': {'qty': 1.0, 'unit': {'_id': ObjectId('5ec2140d6464fda41be88c75'), 'text': 'tablespoon', 'unit_id': 'tablespoon'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c9536cc5'), 'text': 'ground cinnamon', 'class': ['AG.01.l.03']}}, {'unit': {'qty': 1.0, 'unit': {'_id': ObjectId('5ec2140d6464fda41be88c75'), 'text': 'tablespoon', 'unit_id': 'tablespoon'}}, 'match': {'_id': ObjectId('5ebcd38f1ec24005c9534f62'), 'text': 'salt', 'class': ['AG.01.l.01', 'AG.01.w']}}, {'unit': {'qty': 0.5, 'unit': {'_id': ObjectId('5ec214ed6464fd

In [65]:
exs_recipes

[{'_id': ObjectId('5e98387436a5c33e2595923d'),
  'error': None,
  'src': 'https://www.allrecipes.com/recipe/14636/southwestern-haystacks/',
  'data': {'title': 'Southwestern Haystacks',
   'image': 'https://images.media-allrecipes.com/userphotos/560x315/1529583.jpg',
   'categories': ['World Cuisine', 'Latin American', 'Mexican'],
   'rating': 3.97272729873657,
   'review_count': '80',
   'ingredients': ['1 pound ground beef',
    '1 (16 ounce) can diced tomatoes',
    '1 (15 ounce) can whole kernel corn',
    '1 (15 ounce) can kidney beans, drained',
    '1 teaspoon garlic powder',
    'salt to taste',
    'ground black pepper to taste',
    '1 (16 ounce) package macaroni',
    '1 (8 ounce) container sour cream',
    '2 cups shredded Cheddar cheese'],
   'directions': ['In a large skillet over medium-high heat, brown hamburger. Drain fat, add tomatoes, corn, and beans and heat through. Season with garlic powder, salt and pepper.',
    'Cook elbow macaroni in a large pot of boiling wat