In [1]:
import pickle as pkl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm_notebook as tqdm
from torchvision import datasets, transforms
import torch.utils.data as data_utils
import scipy
import scipy.sparse
import os

from nn import *

In [2]:
def merge(source, destination):
    """
    run me with nosetests --with-doctest file.py

    >>> a = { 'first' : { 'all_rows' : { 'pass' : 'dog', 'number' : '1' } } }
    >>> b = { 'first' : { 'all_rows' : { 'fail' : 'cat', 'number' : '5' } } }
    >>> merge(b, a) == { 'first' : { 'all_rows' : { 'pass' : 'dog', 'fail' : 'cat', 'number' : '5' } } }
    True
    """
    for key, value in source.items():
        if isinstance(value, dict):
            # get node or create one
            node = destination.setdefault(key, {})
            merge(value, node)
        else:
            destination[key] = value

    return destination

In [3]:
results = {}
for fname in os.listdir("results"):
    if fname == '.DS_Store':
        continue
    print(fname)
    with open(f"results/{fname}", "rb") as f:
        res = pkl.load(f)
        results = merge(res, results)

nn_train_valid_results_2_0.001.pkl
nn_train_valid_results_3_0.01.pkl
nn_train_valid_results_2_0.01.pkl
nn_train_valid_results_2_0.0001_larger.pkl
nn_train_valid_results_3_0.0001.pkl
nn_train_valid_results_3_0.001.pkl
nn_train_valid_results_2_0.001_smaller.pkl
nn_train_valid_results_2_0.0001.pkl
nn_train_valid_results_2_0.001_larger.pkl
nn_train_valid_results_2_0.01_larger.pkl


In [4]:
results

{2: {0.001: {(200,
    200): (TwoHiddenNN(
      (fc0): Linear(in_features=3500, out_features=200, bias=True)
      (fc1): Linear(in_features=200, out_features=200, bias=True)
      (fc2): Linear(in_features=200, out_features=3500, bias=True)
    ), [6.3486355074678364,
     5.543690551965001,
     4.861971136099234,
     4.277568328113983,
     3.796006494436782,
     3.374413350138801,
     2.9942352208085716,
     2.655894259675242,
     2.3548491420075535,
     2.093960295469997], [4.345547124600639,
     11.455421325878595,
     20.67566892971246,
     27.668230830670925,
     33.01218051118211,
     37.687200479233226,
     41.939147364217256,
     46.43570287539936,
     51.322883386581466,
     56.026607428115014], 0.019503415489196776, 29.61),
   (500,
    500): (TwoHiddenNN(
      (fc0): Linear(in_features=3500, out_features=500, bias=True)
      (fc1): Linear(in_features=500, out_features=500, bias=True)
      (fc2): Linear(in_features=500, out_features=3500, bias=True)
    

In [10]:
# we are going to try the 200x200 0.001 lr nn and the 3500x3500 0.0001 lr nn
# nn200_model, nn200_losses, nn200_accs, nn200_valid_loss, nn200_valid_acc = results[2][0.001][(200, 200)]
# nn3500_model, nn3500_losses, nn3500_accs, nn3500_valid_loss, nn3500_valid_acc = results[2][0.0001][(3500, 3500)]

models = [
    (200, results[2][0.001][(200, 200)]), 
    (3500, results[2][0.0001][(3500, 3500)]), 
    (7000, results[2][0.0001][(7000, 7000)])
]

_, _, test_tensor, _ = get_data(dataset_size=100000)

(80000,)
(10000,)
(10000,)
(10,)


In [13]:
# test our models!!
# nn200_model, nn200_losses, nn200_accs, nn200_valid_loss, nn200_valid_acc = nn200
# nn3500_model, nn3500_losses, nn3500_accs, nn3500_valid_loss, nn3500_valid_acc = nn3500

use_cuda = not ARGS["no_cuda"] and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
test_loader = torch.utils.data.DataLoader(
    test_tensor,
    batch_size=ARGS["batch_size"], shuffle=True, **kwargs)

for nodes, model_res in models:
    model, train_losses, train_accs, valid_loss, valid_acc = model_res
    test_loss, test_acc = test(model, device, test_loader)
    print(f"{nodes} results - loss: {test_loss}, acc: {test_acc}")


Test set: Average loss: 1.93e-02, Accuracy: 2925/10000 (29%)

200 results - loss: 0.01926054422855377, acc: 29.25

Test set: Average loss: 1.79e-02, Accuracy: 3004/10000 (30%)

3500 results - loss: 0.01785650086402893, acc: 30.04

Test set: Average loss: 1.86e-02, Accuracy: 3120/10000 (31%)

7000 results - loss: 0.018648069286346436, acc: 31.2


In [75]:
data = np.load('dataset.npz', allow_pickle=True)

raw_ingredients = data["ingredients"]

ingredients = {ing: i for i, ing in enumerate(raw_ingredients)}

In [76]:
def convert_one_hot(array):
    # here i'm getting an array of zeros
    # num rows is the size of the input array (ie how many recipes)
    # num cols is num of ingredients total (so we can 1-hot them)
    one_hot = np.zeros((len(array), NUM_INGREDIENTS))
    
    for i in range(len(array)):
        if len(array[i]) > 0:
            # this is just indexing into the ith row of the array (ith recipe)
            # and saying all the values in the recipe we're gonna set to 1
            one_hot[i][array[i]] = 1
            
        else:
            print("shouldn't get here ever")

    return one_hot

In [77]:
def find_possible_matches(ingredient_identifier, print_details=True):
    if print_details:
        print("Matches for ingredient identifier:")
    indices = []
    for ingredient, index in ingredients.items():
        if ingredient_identifier in ingredient:
            if print_details:
                print("{} : {}".format(ingredient, index))
            indices.append(index)
    return indices

In [78]:
def recommend(input_ingredients, model, num_recs=10):
    idxs = []
    for ing in input_ingredients:
        ing_idx = find_possible_matches(ing, print_details=False)[0]
        idxs.append(ing_idx)
        
    inp = torch.tensor(convert_one_hot([idxs]).astype(np.float32))
    # use the model to make a recommendation
    out = model(inp)
    pred = torch.topk(out, num_recs)
    
    nlls = pred[0][0]
    indices = pred[1][0]
    
    recommendations = []
    for i in range(num_recs):
        print(f"{raw_ingredients[indices[i]]} - {nlls[i]}")
        recommendations.append(raw_ingredients[indices[i]])
        
    return recommendations

In [98]:
recommend(["sushi rice", "avocado", "salmon", "seaweed"], results[2][0.001][(3500, 3500)][0])

sauce - -0.40252721309661865
dressing - -2.5837607383728027
salt - -3.021697521209717
tomatoes - -3.077658176422119
seasoning - -3.248444080352783
shrimp - -3.8051066398620605
cucumber - -4.307392597198486
lettuce - -4.320250034332275
tuna - -4.368200778961182
water - -4.398378849029541


['sauce',
 'dressing',
 'salt',
 'tomatoes',
 'seasoning',
 'shrimp',
 'cucumber',
 'lettuce',
 'tuna',
 'water']

In [100]:
recommend(["white rice", "cilantro", "lime", "tortilla", "chicken"], results[2][0.001][(3500, 3500)][0])

juice - -0.06068161502480507
white - -2.832331418991089
cheese - -13.770707130432129
leaves - -15.83867359161377
salt - -15.906718254089355
mint - -15.945918083190918
avocado - -16.71072006225586
water - -16.95568084716797
lime - -17.384998321533203
toasted - -17.550128936767578


['juice',
 'white',
 'cheese',
 'leaves',
 'salt',
 'mint',
 'avocado',
 'water',
 'lime',
 'toasted']

In [101]:
recommend(["flour", "mushrooms", "onion","tomato sauce", "tomato paste", "basil", "mozzarella"], results[2][0.001][(3500, 3500)][0])

tomato - 0.0
tomatoes - -19.006031036376953
mozzarella cheese - -19.41239356994629
bay leaf - -25.311656951904297
frozen peas - -25.92708969116211
olive oil - -26.248531341552734
vegetable - -26.769067764282227
sauce - -26.793087005615234
chicken - -27.181304931640625
garlic - -28.019676208496094


['tomato',
 'tomatoes',
 'mozzarella cheese',
 'bay leaf',
 'frozen peas',
 'olive oil',
 'vegetable',
 'sauce',
 'chicken',
 'garlic']

In [97]:
recommend(["flour", "mushrooms", "onion","tomato sauce", "tomato paste", "basil", "mozzarella"], results[2][0.001][(3500, 3500)][0])

tomato - 0.0
tomatoes - -19.006031036376953
mozzarella cheese - -19.41239356994629
bay leaf - -25.311656951904297
frozen peas - -25.92708969116211
olive oil - -26.248531341552734
vegetable - -26.769067764282227
sauce - -26.793087005615234
chicken - -27.181304931640625
garlic - -28.019676208496094


['tomato',
 'tomatoes',
 'mozzarella cheese',
 'bay leaf',
 'frozen peas',
 'olive oil',
 'vegetable',
 'sauce',
 'chicken',
 'garlic']

In [102]:
# ALINEA recipe
print(f"missing: {surf[-4:]}")
print(f"input: {surf[:-4]}")

recommend(surf[:-4], results[2][0.001][(3500, 3500)][0])

missing: ['butter', 'white wine vinegar', 'sugar', 'cucumber']
input: ['clams', 'canola oil', 'kosher salt', 'ground black pepper', 'water', 'yukon gold potatoes', 'half & half', 'leaves', 'ice cubes', 'shallots']
ground - -0.07821056246757507
kosher - -2.5874927043914795
potatoes - -11.015131950378418
flour - -12.2555570602417
garnish - -15.456151008605957
plus - -15.49994945526123
chicken breast - -15.896588325500488
cream - -16.78713035583496
thyme - -16.997835159301758
tortillas - -17.208341598510742


['ground',
 'kosher',
 'potatoes',
 'flour',
 'garnish',
 'plus',
 'chicken breast',
 'cream',
 'thyme',
 'tortillas']