In [10]:
import csv
import os

import networkx as nx
from bs4 import BeautifulSoup
from nltk.stem import WordNetLemmatizer

In [2]:
ingredients = []
with open('data/ingredients/all_ingredients.txt') as f:
    for line in f:
        ingredient = line.strip()
        ingredients.append(ingredient)
ingredients = sorted(ingredients, key=len, reverse=True)

In [3]:
ingredients[:10]

['high altitude adjustment for deep fat frying',
 "mrs dash's fiesta lime salt-free seasoning",
 'high altitude adjustment for cookie',
 'trappist or monastery-style cheese',
 'high altitude baking quick bread',
 "i can't believe it's not butter",
 'meat: safe cooking temperature',
 'african birdseye chile pepper',
 'heirloom weight & measurement',
 'trompette de la mort mushroom']

In [4]:
techniques = []
with open('data/techniques/all_techniques.txt') as f:
    for line in f:
        technique = line.strip()
        techniques.append(technique)
techniques = sorted(techniques, key=len, reverse=True)

In [5]:
techniques[:10]

['thermal immersion circulator',
 'extreme-heat stir-fry',
 'flash pasteurization',
 'amylolytic process',
 'biomass briquettes',
 'coagulated protein',
 'high heat stir fry',
 'mongolian barbecue',
 'culinary triangle',
 'food preservation']

In [6]:
def singularize(word):
    if word == 'cookies':
        result = 'cookie'
    elif word == 'mrs':
        result = 'mrs'
    elif word == 'ras':
        result = 'ras'
    elif word == 'somen':
        result = 'somen'
    elif word == 'cos':
        result = 'cos'
    elif word == 'monks':
        result = "monk's"
    elif word == 'webbs':
        result = "webb's"
    else:
        result = WordNetLemmatizer().lemmatize(word.lower())
    return result

In [7]:
def inf_ing(v_ing):
    inf = v_ing
    if v_ing == 'de-seeding':
        inf = 'de-seed'
    elif v_ing == 'de-boning':
        inf = 'de-bone'
    elif v_ing == 'de-bearding':
        inf = 'de-beard'
    elif v_ing == 'gutting':
        inf = 'gut'
    elif v_ing == 'de-glazing':
        inf = 'de-glaze'
    elif v_ing == 'sautéing':
        inf = 'sauté'
    elif v_ing == 'degorging':
        inf = 'degorge'
    elif v_ing == 'stir-frying':
        inf = 'stir-fry'
    elif v_ing == 'broasting':
        inf = 'broast'
    elif v_ing == 'juicing':
        inf = 'juice'
    elif v_ing == 'parbaking':
        inf = 'parbake'
    elif v_ing == 'charbroiling':
        inf = 'charbroil'
    elif v_ing == 'crinkle-cutting':
        inf = 'crinkle-cut'
    elif v_ing == 'cheesemaking':
        inf = 'cheesemake'
    elif v_ing == 'swissing':
        inf = 'swiss'
    elif v_ing == 'flash-frying':
        inf = 'flash-fry'
    elif v_ing == 'flashbaking':
        inf = 'flashbake'
    else:
        inf = WordNetLemmatizer().lemmatize(v_ing.lower(), 'v')
    return inf

def infinitive(words):
    aux = []
    word_list = words.split()
    for word in word_list:
        if word != 'cook' and word != 'cooking':
            if word.endswith('ing'):
                inf = inf_ing(word.lower())
                aux.append(inf)
                if inf == word:
                    print('NO INFINITIVE FOR', word)
            else:
                aux.append(word.lower())
    result = ' '.join(aux)
    return result

In [8]:
ingr_id_graph = nx.Graph()

In [75]:
with open('data/ingredients/allrecipes_ingr_ids.csv', 'w') as f1,\
     open('data/techniques/allrecipes_techniques.csv', 'w') as f2:
    writer1 = csv.writer(
        f1,
        delimiter=',',
        quotechar='"',
        quoting=csv.QUOTE_MINIMAL
    )
    writer2 = csv.writer(
        f2,
        delimiter=',',
        quotechar='"',
        quoting=csv.QUOTE_MINIMAL
    )
    for folder_name in os.listdir('data/allrecipes/'):
        folder_path = 'data/allrecipes/' + folder_name
        for file_name in os.listdir(folder_path):
            file_path = folder_path + '/' + file_name
            with open(file_path) as f:
                soup = BeautifulSoup(f.read(), 'html.parser')
                recipe_info = soup.find('recipe-signup')
                recipe_id = recipe_info['data-id']
                recipe_title = recipe_info['data-title'].strip('"').encode('utf_8').decode('unicode_escape')
                info_row = [recipe_id, recipe_title]
                
                ingredient_ids = set()
                ingredient_amounts = soup.select('section.recipe-ingredients span[itemprop=ingredients]')
                for ingredient_amount in ingredient_amounts:
                    ingredient_id = ingredient_amount['data-id']
                    ingredient_ids.add(ingredient_id)
                    singularized = ' '.join(map(singularize, ingredient_amount.string.strip().split()))
                    for ingredient in ingredients:
                        if ingredient in singularized:
                            if ingr_id_graph.add_edge(ingredient_id, ingredient):
                                ingr_id_graph[ingredient_id][ingredient]['weight'] += 1
                            else:
                                ingr_id_graph.add_edge(ingredient_id, ingredient, weight=1)
                ingredient_row = info_row + list(ingredient_ids)
                writer1.writerow(ingredient_row)
                
                technique_names = set()
                instructions = soup.select('section.recipe-directions span.recipe-directions__list--item')
                for instruction in instructions:
                    infinitived = ' '.join(map(infinitive, instruction.string.strip().split()))
                    for technique in techniques:
                        if technique in infinitived:
                            technique_names.add(technique)
                technique_row = info_row + list(technique_names)
                writer2.writerow(technique_row)

In [30]:
type(recipe_info['data-title'])

str

In [17]:
s='Grandma\u0027s English Muffin Bread'

In [21]:
b=s.encode('utf-8')

In [24]:
b.decode('utf-8')

"Grandma's English Muffin Bread"

In [55]:
with open('data/allrecipes/6000/6903.html') as f:
    soup = BeautifulSoup(f.read(), 'html.parser')
    info = soup.find('recipe-signup')
    print(info['data-id'])
    print(info['data-title'])
    qq=info['data-title']
    info_row = [info['data-id'], info['data-title']]
    technique_names = []
    instructions = soup.select('section.recipe-directions span.recipe-directions__list--item')
    for instruction in instructions:
        infinitived = ' '.join(map(infinitive, instruction.string.strip().split()))
        for technique in techniques:
            if technique in infinitived:
                technique_names.append(technique)
    technique_row = info_row + technique_names
    print(technique_row)
    print()

6903
"Grandma\u0027s English Muffin Bread"
['6903', '"Grandma\\u0027s English Muffin Bread"', 'bread', 'red', 'grease', 'cover', 'shape', 'ice', 'red', 'brown', 'bake']



In [68]:
qq.encode('ascii', 'backslashreplace')

b'"Grandma\\u0027s English Muffin Bread"'

In [54]:
import unicodedata
unicodedata.normalize(info['data-title'], 'NFD')

ValueError: invalid normalization form

In [40]:
str(info['data-title']).encode('utf-8').decode('utf-8')

'"Grandma\\u0027s English Muffin Bread"'

In [39]:
las tecnicas no tienen pq ser solo verbos - pueden ser sustantivos, por ejemplo, bbq, fondue...

[('salt', 'VB'), ('the', 'DT'), ('fish', 'NN')]

In [33]:
for folder_name in os.listdir('data/allrecipes/'):
    folder_path = 'data/allrecipes/' + folder_name
    for file_name in os.listdir(folder_path):
        file_path = folder_path + '/' + file_name
        with open(file_path) as f:
            soup = BeautifulSoup(f.read(), 'html.parser')
            info = soup.find('recipe-signup')
            print(info['data-id'])
            print(info['data-title'])
            ingredients = soup.select('span[itemprop=ingredients]')
            for ingredient in ingredients:
                print(ingredient.string.strip())
            print()

6902
"Indian Naan I"
1 1/2 teaspoons active dry yeast
2 cups bread flour
1 teaspoon salt
9 tablespoons water
2 tablespoons clarified butter

246279
"White Chocolate Fondue"
1 (12 ounce) package white chocolate chips
1/2 cup heavy whipping cream, or more as needed
1/4 cup light corn syrup
2 tablespoons butter
1/4 teaspoon salt
1 teaspoon vanilla extract

6903
"Grandma\u0027s English Muffin Bread"
3 cups all-purpose flour
2 1/4 teaspoons active dry yeast
1/2 tablespoon white sugar
1 teaspoon salt
1/8 teaspoon baking powder
1 cup warm milk
1/4 cup water

6905
"Apple Scones"
2 cups all-purpose flour
1/4 cup white sugar
2 teaspoons baking powder
1/2 teaspoon baking soda
1/2 teaspoon salt
1/4 cup butter, chilled
1 apple - peeled, cored and shredded
1/2 cup milk
2 tablespoons milk
2 tablespoons white sugar
1/2 teaspoon ground cinnamon

6904
"Chocolate Filled Muffins"
2 cups all-purpose flour
3/4 cup white sugar
1/4 cup unsweetened cocoa powder
3 teaspoons baking powder
1/2 teaspoon salt
1/2 t