In [1]:
from model.Corpus import Corpus
from model.Model import Model
from model.Embeddings import Embeddings
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

from pprint import pprint
import csv
from scrapers.ScraperResult import ScraperResult
from page_processors import *

global_hyperparameters = {
    "embedding_dim": 100, # Dimensions in the GloVe embedding
    "max_length": 60, # Enforced max and min character length of an example
    "trunc_type": "post",
    "padding_type": "post",
    "oov_tok": "<OOV>",
    "test_portion": 0.1
}


In [2]:
# should tokenize the labels; there aren't 1191 unique ones

ingredients_hyperparameters = {
    "num_epochs": 6,
    "training_size": 89500
}

ingredient_corpus = Corpus(
    "training_data/augmented_classified_ingredients.csv",
    global_hyperparameters,
    ingredients_hyperparameters
).initialize()

ingredient_embeddings = Embeddings(
    "glove.6B.100d.txt",
    ingredient_corpus,
    global_hyperparameters
).activate()

ingredient_classifier = Model(
    ingredient_corpus,
    ingredient_embeddings,
    global_hyperparameters,
    ingredients_hyperparameters,
    1191
).run()


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [3]:
recipes_hyperparameters = {
    "num_epochs": 4,
    "training_size": 76000
}

recipe_corpus = Corpus(
    "training_data/augmented_training_labels.csv",
    global_hyperparameters,
    recipes_hyperparameters
).initialize()

recipe_embeddings = Embeddings(
    "glove.6B.100d.txt",
    recipe_corpus,
    global_hyperparameters
).activate()

recipe_classifier = Model(
    recipe_corpus,
    recipe_embeddings,
    global_hyperparameters,
    recipes_hyperparameters,
    5
).run()


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [82]:
from scrapers.PageProcessor import PageProcessor
from model.Cache import Cache
instance_cache = Cache()

ValueError: I/O operation on closed file.

In [89]:
total_payload = []

new_links = [
    "https://www.bbcgoodfood.com/recipes/espresso-martini",
    "https://www.seriouseats.com/the-martini-recipe"
]

with open('newlabels.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    
    for link in new_links:
        print(link)
        
        #try:
        data = PageProcessor(
            link,
            recipe_classifier,
            recipe_corpus,
            global_hyperparameters,
            writer
        )
        data.soupify()
        this_recipe = ScraperResult(
            link.split("/")[-1],
            list(map(lambda x: x[0], lasso_inward(data.classifications,4,1,1,2)[0])),
            list(map(lambda x: x[0], lasso_inward(data.classifications,4,1,2,2)[0])),
            list(map(lambda x: x[0], lasso_inward(data.classifications,4,1,3,2)[0])),
            list(map(lambda x: x[0], lasso_inward(data.classifications,4,1,4,2)[0]))
        )
        this_recipe.map_payload_ingredients(
            instance_cache.get_ingredients(),
            ingredient_corpus,
            ingredient_classifier,
            global_hyperparameters
        )
        total_payload.append(this_recipe.get_payload())

        #except:
        #    print("link " + link + " could not be opened")


pprint(total_payload)


https://www.bbcgoodfood.com/recipes/espresso-martini
https://www.seriouseats.com/the-martini-recipe
[{'ingredients': ['For the sugar syrup',
                  '100g golden caster sugar',
                  'ice',
                  '100ml vodka',
                  '50ml freshly brewed espresso coffee',
                  '50ml coffee liqueur (we used Kahlua)',
                  '4 coffee beans (optional)'],
  'mapped_ingredients': [['For the sugar syrup', 'simple%20syrup', 102],
                         ['100g golden caster sugar', 'caster%20sugar', 1181],
                         ['ice', 'ice%20cube', 115],
                         ['100ml vodka', 'vodka', 69],
                         ['50ml freshly brewed espresso coffee', 'coffee', 101],
                         ['50ml coffee liqueur (we used Kahlua)',
                          'kahlua',
                          981],
                         ['4 coffee beans (optional)', 'coffee%20bean', 381]],
  'steps': ['Start by making the sugar

In [87]:
print(data.classifications)

[['Classic Martini Cocktail Recipe', 0, 0, 0.99998844, 3.9928636e-06, 7.612833e-06, 1.681149e-08, 2.3457147e-08, 'https://www.seriouseats.com/the-martini-recipe'], ['var Mntl = window.Mntl || {};\nMntl.RTB = Mntl.RTB || {};\nMntl.RTB.setTaxonomyStampValues({"tax1":"se_all-recipes","tax2":"se_recipes-by-course","tax0":"se_root","tax5":"se_gin-recipes","tax3":"se_drink-recipes","tax4":"se_cocktail-recipes"});\nMntl.RTB.indexFirstPartyData = \'tax1:se_all-recipes\';\nMntl.RTB.setTimeoutLength([500,800]);\nMntl.RTB.Plugins.amazon.amazonConfigs = {"mapTaxValues":{"tax1":"se_all-recipes","tax2":"se_recipes-by-course","si_section":"se_recipes-by-course","tax0":"se_root","tax3":"se_drink-recipes"},"mapFBValues":{},"amazonSlotName":false,"amazonSection":"Drink Recipes"};\nMntl.RTB.Plugins.s2s.s2sConfigs = {"partners":"","timeout":500};\nMntl.RTB.initBidders([{ type: "amazon", id: \'3222\'},{ type: "lotameLightning", id: \'true\'},{ type: "ias", id: \'926268\'},{ type: "ixid", id: \'true\'},{ ty

In [24]:
data = process_page(
    "https://www.seriouseats.com/the-martini-recipe",
    #writer,
    recipe_classifier,
    recipe_corpus,
    global_hyperparameters["max_length"],
    global_hyperparameters["padding_type"],
    global_hyperparameters["trunc_type"]
)
this_recipe = ScraperResult(
    link.split("/")[-1],
    list(map(lambda x: x[0], lasso_inward(data,4,6,1,7)[0])),
    list(map(lambda x: x[0], lasso_inward(data,4,6,2,7)[0])),
    list(map(lambda x: x[0], lasso_inward(data,4,6,3,7)[0])),
    list(map(lambda x: x[0], lasso_inward(data,4,6,4,7)[0]))
)
this_payload = this_recipe.get_payload()
this_payload_mapped = this_recipe.map_payload_ingredients(
    instance_cache.get_ingredients(),
    ingredient_corpus,
    ingredient_classifier,
    global_hyperparameters
)
total_payload.append(this_recipe.get_payload())

In [25]:
pprint(this_payload_mapped)

{'ingredients': ['2 ounces dry gin',
                 '1 ounce dry vermouth',
                 '1 dash orange bitters'],
 'mapped_ingredients': [['2 ounces dry gin', 'gin', 33],
                        ['1 ounce dry vermouth', 'dry%20vermouth', 78],
                        ['1 dash orange bitters', 'bitters', 498]],
 'steps': ['Combine ingredients in a mixing glass and fill with ice. Stir well '
           'to chill and strain into a chilled cocktail glass. Twist a piece '
           'of lemon peel over the drink and use as garnish, or, if you must, '
           'toss in an olive.'],
 'timing': ['Total: 5 mins'],
 'title': 'the-martini-recipe',
 'yield': ['']}


In [18]:
tst_snt = total_payload[0]["ingredients"]
tst_snt = ingredient_corpus.tokenizer.texts_to_sequences(tst_snt)
tst_snt = pad_sequences(
    tst_snt, 
    maxlen=global_hyperparameters["max_length"], 
    padding=global_hyperparameters["padding_type"], 
    truncating=global_hyperparameters["trunc_type"]
)
pred = ingredient_classifier.model.predict(tst_snt)

for j in range(len(total_payload)):
    for i in range(len(total_payload[j]["ingredients"])):
        pred_id = np.argmax(pred[i])
        matched_ing = list(filter(lambda x: x[1] == pred_id,instance_cache.get_ingredients()))[0]
        total_payload[j]["ingredients"][i] = [total_payload[j]["ingredients"][i],matched_ing[0],pred_id]

    pprint(total_payload[j]["ingredients"])



[['2 ounces dry gin', 'gin', 33],
 ['1 ounce dry vermouth', 'dry%20vermouth', 78],
 ['1 dash orange bitters', 'bitters', 498]]


In [29]:
from scrapers.CocktailLinkScraper import CocktailLinkScraper
cs = CocktailLinkScraper()

ab = cs.alton_brown("scraper_working_data/ab-cocktails.html")
print(ab)


['https://altonbrown.com/recipes/allspice-dram/', 'https://altonbrown.com/recipes/dairy-free-chocolate-date-shake/', 'https://altonbrown.com/recipes/clarified-milk-punch/', 'https://altonbrown.com/recipes/easy-cooked-eggnog/', 'https://altonbrown.com/recipes/cranberry-apple-shrub/', 'https://altonbrown.com/recipes/cocktail-ice-cubes/', 'https://altonbrown.com/recipes/how-to-make-dalgona-coffee/', 'https://altonbrown.com/recipes/hot-cocoa-mix/', 'https://altonbrown.com/recipes/immersion-circulator-fennel-cordial/', 'https://altonbrown.com/recipes/mint-syrup/', 'https://altonbrown.com/recipes/smoky-tequila-sour/', 'https://altonbrown.com/recipes/the-alton-brown-martini/', 'https://altonbrown.com/recipes/perfect-pour-over-coffee/', 'https://altonbrown.com/recipes/alton-browns-favorite-martini/', 'https://altonbrown.com/recipes/aged-eggnog/', 'https://altonbrown.com/recipes/barley-water/', 'https://altonbrown.com/recipes/bitter-grapefruit-martini/', 'https://altonbrown.com/recipes/pickle-b