In [1]:
import pickle as pkl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm_notebook as tqdm
from torchvision import datasets, transforms
import torch.utils.data as data_utils
import scipy
import scipy.sparse
import os

from nn import *

In [2]:
def merge(source, destination):
    """
    run me with nosetests --with-doctest file.py

    >>> a = { 'first' : { 'all_rows' : { 'pass' : 'dog', 'number' : '1' } } }
    >>> b = { 'first' : { 'all_rows' : { 'fail' : 'cat', 'number' : '5' } } }
    >>> merge(b, a) == { 'first' : { 'all_rows' : { 'pass' : 'dog', 'fail' : 'cat', 'number' : '5' } } }
    True
    """
    for key, value in source.items():
        if isinstance(value, dict):
            # get node or create one
            node = destination.setdefault(key, {})
            merge(value, node)
        else:
            destination[key] = value

    return destination

In [3]:
results = {}
for fname in os.listdir("results"):
    if fname == '.DS_Store':
        continue
    print(fname)
    with open(f"results/{fname}", "rb") as f:
        res = pkl.load(f)
        results = merge(res, results)

nn_train_valid_results_2_0.001.pkl
nn_train_valid_results_3_0.01.pkl
nn_train_valid_results_2_0.01.pkl
nn_train_valid_results_2_0.0001_larger.pkl
nn_train_valid_results_3_0.0001.pkl
nn_train_valid_results_3_0.001.pkl
nn_train_valid_results_2_0.001_smaller.pkl
nn_train_valid_results_2_0.0001.pkl
nn_train_valid_results_2_0.001_larger.pkl
nn_train_valid_results_2_0.01_larger.pkl


In [4]:
results

{2: {0.001: {(200,
    200): (TwoHiddenNN(
      (fc0): Linear(in_features=3500, out_features=200, bias=True)
      (fc1): Linear(in_features=200, out_features=200, bias=True)
      (fc2): Linear(in_features=200, out_features=3500, bias=True)
    ), [6.3486355074678364,
     5.543690551965001,
     4.861971136099234,
     4.277568328113983,
     3.796006494436782,
     3.374413350138801,
     2.9942352208085716,
     2.655894259675242,
     2.3548491420075535,
     2.093960295469997], [4.345547124600639,
     11.455421325878595,
     20.67566892971246,
     27.668230830670925,
     33.01218051118211,
     37.687200479233226,
     41.939147364217256,
     46.43570287539936,
     51.322883386581466,
     56.026607428115014], 0.019503415489196776, 29.61),
   (500,
    500): (TwoHiddenNN(
      (fc0): Linear(in_features=3500, out_features=500, bias=True)
      (fc1): Linear(in_features=500, out_features=500, bias=True)
      (fc2): Linear(in_features=500, out_features=3500, bias=True)
    

In [6]:
# we are going to try the 200x200 0.001 lr nn and the 3500x3500 0.0001 lr nn
# nn200_model, nn200_losses, nn200_accs, nn200_valid_loss, nn200_valid_acc = results[2][0.001][(200, 200)]
# nn3500_model, nn3500_losses, nn3500_accs, nn3500_valid_loss, nn3500_valid_acc = results[2][0.0001][(3500, 3500)]

models = [
    (200, 0.0001, results[2][0.001][(200, 200)]), 
    (3500, 0.0001, results[2][0.0001][(3500, 3500)]), 
    (7000, 0.0001, results[2][0.0001][(7000, 7000)]),
    (200, 0.001, results[2][0.001][(200, 200)]),
    (3500, 0.001, results[2][0.001][(3500, 3500)]), 
    (7000, 0.001, results[2][0.001][(7000, 7000)]),
]

_, _, test_tensor, _ = get_data(dataset_size=100000)

In [7]:
# test our models!!
# nn200_model, nn200_losses, nn200_accs, nn200_valid_loss, nn200_valid_acc = nn200
# nn3500_model, nn3500_losses, nn3500_accs, nn3500_valid_loss, nn3500_valid_acc = nn3500

use_cuda = not ARGS["no_cuda"] and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
test_loader = torch.utils.data.DataLoader(
    test_tensor,
    batch_size=ARGS["batch_size"], shuffle=True, **kwargs)

for nodes, lr, model_res in models:
    model, train_losses, train_accs, valid_loss, valid_acc = model_res
    test_loss, test_acc = test(model, device, test_loader)
    print(f"{nodes} {lr} results - loss: {test_loss}, acc: {test_acc}")


Test set: Average loss: 1.96e-02, Accuracy: 2949/10000 (29%)

200 0.0001 results - loss: 0.01964267535209656, acc: 29.49

Test set: Average loss: 1.77e-02, Accuracy: 3002/10000 (30%)

3500 0.0001 results - loss: 0.01768766601085663, acc: 30.02

Test set: Average loss: 1.81e-02, Accuracy: 3127/10000 (31%)

7000 0.0001 results - loss: 0.018091839146614073, acc: 31.27

Test set: Average loss: 1.93e-02, Accuracy: 2949/10000 (29%)

200 0.001 results - loss: 0.01933665277957916, acc: 29.49

Test set: Average loss: 3.24e-02, Accuracy: 2844/10000 (28%)

3500 0.001 results - loss: 0.03244561290740967, acc: 28.44

Test set: Average loss: 3.12e-02, Accuracy: 2633/10000 (26%)

7000 0.001 results - loss: 0.031222165632247924, acc: 26.33


In [9]:
data = np.load('dataset.npz', allow_pickle=True)

raw_ingredients = data["ingredients"]

ingredients = {ing: i for i, ing in enumerate(raw_ingredients)}

In [10]:
def convert_one_hot(array):
    # here i'm getting an array of zeros
    # num rows is the size of the input array (ie how many recipes)
    # num cols is num of ingredients total (so we can 1-hot them)
    one_hot = np.zeros((len(array), NUM_INGREDIENTS))
    
    for i in range(len(array)):
        if len(array[i]) > 0:
            # this is just indexing into the ith row of the array (ith recipe)
            # and saying all the values in the recipe we're gonna set to 1
            one_hot[i][array[i]] = 1
            
        else:
            print("shouldn't get here ever")

    return one_hot

In [11]:
def find_possible_matches(ingredient_identifier, print_details=True):
    if print_details:
        print("Matches for ingredient identifier:")
    indices = []
    for ingredient, index in ingredients.items():
        if ingredient_identifier in ingredient:
            if print_details:
                print("{} : {}".format(ingredient, index))
            indices.append(index)
    return indices

In [61]:
def recommend(input_ingredients, model, num_recs=10):
    if isinstance(input_ingredients[0], str):
        idxs = []
        for ing in input_ingredients:
            matches = find_possible_matches(ing, print_details=False)
            ing_idx = matches[0] if matches else print(ing)
            idxs.append(ing_idx)
    elif isinstance(input_ingredients[0], int):
        idxs = input_ingredients
        
    inp = torch.tensor(convert_one_hot([idxs]).astype(np.float32))
    # use the model to make a recommendation
    out = model(inp)
    pred = torch.topk(out, num_recs)
    
    nlls = pred[0][0]
    indices = pred[1][0]
    
    recommendations = []
    for i in range(num_recs):
        print(f"{raw_ingredients[indices[i]]} - {nlls[i]}")
        recommendations.append(raw_ingredients[indices[i]])
        
    return recommendations

In [18]:
model200 = results[2][0.001][(200, 200)][0]
model3500 = results[2][0.001][(3500, 3500)][0]
model7000 = results[2][0.0001][(7000, 7000)][0]

In [54]:
for model in [model200, model7000]:
    print(model)
    recommendations = recommend(
        ["cinnamon", "cream", "egg", "egg yolks", "milk", "nutmeg", "salt", "sugar", "vanilla", "whipping cream"],
        model
    )
    
    for rec in recommendations:
        print(rec)

TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=200, bias=True)
  (fc1): Linear(in_features=200, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=3500, bias=True)
)
egg - -1.3839261531829834
flour - -1.6081905364990234
butter - -2.7672359943389893
water - -2.9232966899871826
whipped cream - -3.8057525157928467
halfandhalf - -3.8670670986175537
cornstarch - -3.882234811782837
temperature - -4.085807800292969
apples - -4.200525283813477
granulated sugar - -4.359938621520996
egg
flour
butter
water
whipped cream
halfandhalf
cornstarch
temperature
apples
granulated sugar
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=7000, bias=True)
  (fc1): Linear(in_features=7000, out_features=7000, bias=True)
  (fc2): Linear(in_features=7000, out_features=3500, bias=True)
)
egg - -1.853110432624817
butter - -2.5027875900268555
ground cinnamon - -2.6338367462158203
cloves - -2.9393372535705566
pumpkin - -3.0989222526550293
flour - -3.455662250518799
plus

In [39]:
for model in [model200, model7000]:
    print(model)
    recommend(
        ["cinnamon", "cream", "egg", "egg yolks", "milk", "nutmeg", "salt", "sugar", "vanilla", "whipping cream"],
        model
    )

TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=200, bias=True)
  (fc1): Linear(in_features=200, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=3500, bias=True)
)
egg - -1.3839261531829834
flour - -1.6081905364990234
butter - -2.7672359943389893
water - -2.9232966899871826
whipped cream - -3.8057525157928467
halfandhalf - -3.8670670986175537
cornstarch - -3.882234811782837
temperature - -4.085807800292969
apples - -4.200525283813477
granulated sugar - -4.359938621520996
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=7000, bias=True)
  (fc1): Linear(in_features=7000, out_features=7000, bias=True)
  (fc2): Linear(in_features=7000, out_features=3500, bias=True)
)
egg - -1.853110432624817
butter - -2.5027875900268555
ground cinnamon - -2.6338367462158203
cloves - -2.9393372535705566
pumpkin - -3.0989222526550293
flour - -3.455662250518799
plus - -3.6107754707336426
cornstarch - -3.8256678581237793
granulated sugar - -3.9107208251953125
v

In [43]:
for model in [model200, model7000]:
    print(model)
    recommend(
        ["warm water", "dry yeast", "all-purpose flour", "olive oil" ,"salt", "sugar", "tomato paste", "crushed tomatoes",\
        "mushrooms", "mozzarella", "mozzarella cheese", "water"],
        model
    )

TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=200, bias=True)
  (fc1): Linear(in_features=200, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=3500, bias=True)
)
olive - -0.0002826052950695157
flour - -8.589816093444824
kosher - -9.627706527709961
sesame - -12.190649032592773
vinegar - -12.473869323730469
yogurt - -13.197586059570312
yukon gold - -13.199728965759277
cream - -13.223943710327148
potatoes - -13.492240905761719
wine - -13.554697036743164
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=7000, bias=True)
  (fc1): Linear(in_features=7000, out_features=7000, bias=True)
  (fc2): Linear(in_features=7000, out_features=3500, bias=True)
)
flour - -0.5503931641578674
tomato - -1.3365709781646729
tomatoes - -3.154029369354248
garlic - -3.4966578483581543
pepper - -3.851656913757324
onions - -5.374743938446045
shiitake mushrooms - -5.5815911293029785
dried porcini mushrooms - -5.677877902984619
olive - -5.7976765632629395
pitted prun

In [49]:
for model in [model200, model7000]:
    print(model)
    recommend(
        ["ground beef", "chili powder", "onion", "salt", "dried oregano", "garlic powder",
        "ground black pepper", "water", "taco shell", "tomatoes", "sour cream", "guacamole"],
        model
    )

TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=200, bias=True)
  (fc1): Linear(in_features=200, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=3500, bias=True)
)
ground - -0.003887597005814314
powder - -5.692481994628906
garlic - -7.881733417510986
canola - -9.743964195251465
cornmeal - -11.104440689086914
in water - -11.772629737854004
instant - -12.292189598083496
egg - -12.390426635742188
elbow macaroni - -12.854533195495605
hot pepper sauce - -13.215974807739258
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=7000, bias=True)
  (fc1): Linear(in_features=7000, out_features=7000, bias=True)
  (fc2): Linear(in_features=7000, out_features=3500, bias=True)
)
ground - -9.202533692587167e-05
garlic - -9.364952087402344
powder - -12.039314270019531
seasoning - -15.397104263305664
oregano - -17.270061492919922
lima beans - -17.36379623413086
plum tomatoes - -17.399856567382812
basil - -17.423099517822266
cumin - -17.616281509399414
masala

In [81]:
for model in [model200, model7000]:
    print(model)
    recommend(
        ["white wine", "salt", "pepper", "garlic", "onion", "butter"],
        model
    )

TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=200, bias=True)
  (fc1): Linear(in_features=200, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=3500, bias=True)
)
white - -0.013811847195029259
potatoes - -5.325450897216797
cream - -7.07416296005249
tomatoes - -7.537132263183594
paprika - -7.601494312286377
water - -7.685240268707275
garlic salt - -7.835998058319092
flour - -7.891382217407227
cloves - -7.92772102355957
pork - -7.934650897979736
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=7000, bias=True)
  (fc1): Linear(in_features=7000, out_features=7000, bias=True)
  (fc2): Linear(in_features=7000, out_features=3500, bias=True)
)
white - -0.0048474413342773914
cloves - -6.797843933105469
clove - -7.86116361618042
minced garlic - -8.116683006286621
potato chips - -8.186177253723145
garlic minced - -8.526825904846191
garlic cloves - -8.859143257141113
thyme - -9.068663597106934
cod - -9.131184577941895
flour - -9.214014053344727


In [82]:
for model in [model200, model7000]:
    print(model)
    recommend(
        ["saffron", "spinach", "rice", "salt", "water", "egg"],
        model
    )

TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=200, bias=True)
  (fc1): Linear(in_features=200, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=3500, bias=True)
)
pepper - -0.7018657922744751
sugar - -1.7028275728225708
onion - -3.3197107315063477
oil - -3.4592084884643555
sweet potato - -3.667262077331543
butter - -4.338338375091553
mayonnaise - -4.473094940185547
beef - -4.645192623138428
green - -4.727263927459717
peppercorns - -4.9044508934021
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=7000, bias=True)
  (fc1): Linear(in_features=7000, out_features=7000, bias=True)
  (fc2): Linear(in_features=7000, out_features=3500, bias=True)
)
onion - -2.227447271347046
ham - -2.541785478591919
pepper - -2.8139150142669678
boiling water - -2.964434862136841
green - -3.0418283939361572
sugar - -3.2087419033050537
milk - -3.418802499771118
sausage - -3.8056561946868896
grain - -3.8178908824920654
long grain white rice - -3.899893045425415


In [None]:
for model in [model200, model7000]:
    print(model)
    recommend(
        ["saffron", "spinach", "rice", "salt", "water", "egg"],
        model
    )

In [65]:
alinea_truffle = [306,3284,48,2,2160,5,174,13,1326,7,254,2667,418,8,58]
for i in alinea_truffle:
    print(raw_ingredients[i])
    
for model in [model200, model7000]:
    print(model)
    recommend(alinea_truffle, model)

gelatin
black truffle oil
kosher salt
butter
white truffle oil
flour
egg yolks
eggs
whole milk
olive oil
cornmeal
truffles
romaine lettuce
water
parmesan cheese
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=200, bias=True)
  (fc1): Linear(in_features=200, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=3500, bias=True)
)
olive - -1.5735502529423684e-05
kosher - -11.094480514526367
cream - -15.571755409240723
prosciutto - -15.859622955322266
unsalted butter - -16.680116653442383
yolk - -18.424663543701172
sugar - -18.522300720214844
pork and beans - -19.16857147216797
heavy whipping cream - -19.200815200805664
sesame - -19.752620697021484
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=7000, bias=True)
  (fc1): Linear(in_features=7000, out_features=7000, bias=True)
  (fc2): Linear(in_features=7000, out_features=3500, bias=True)
)
parmesan - -0.16338910162448883
olive - -2.023392677307129
milk - -4.258756637573242
grated lemon peel - -

In [68]:
icefish = [664,147,847,196,46,435,212,1493,88,1858,4,48,174,111,1145,2383,33,433,6,2576,346,642,1169,991,8,274,185,29]
for i in icefish:
    print(raw_ingredients[i])
    
for model in [model200, model7000]:
    print(model)
    recommend(icefish[1:], model)

fish
canola oil
fennel bulb
shallots
celery
black peppercorns
bay leaf
dry vermouth
heavy cream
grated horseradish
sugar
kosher salt
egg yolks
cornstarch
xanthan gum
meyer lemon
garlic cloves
skim milk
onion
cornichons
capers
yukon gold potatoes
clarified butter
asparagus spears
water
parsley leaves
chives
lemon juice
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=200, bias=True)
  (fc1): Linear(in_features=200, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=3500, bias=True)
)
lemon - -0.005278460215777159
garlic - -5.891328811645508
mustard - -7.344183444976807
leaves - -8.213150978088379
fava beans - -8.510231018066406
turkey stock - -8.565352439880371
cayenne - -8.853535652160645
vegetable - -8.87251091003418
egg - -9.03984546661377
mozzarella - -9.320830345153809
TwoHiddenNN(
  (fc0): Linear(in_features=3500, out_features=7000, bias=True)
  (fc1): Linear(in_features=7000, out_features=7000, bias=True)
  (fc2): Linear(in_features=7000, out_fe