In [1]:
import pickle as pkl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm_notebook as tqdm
from torchvision import datasets, transforms
import torch.utils.data as data_utils

## getting train/valid/test data

In [46]:
data = np.load('dataset.npz', allow_pickle=True)
ingredients = data['ingredients']
recipes = data['recipes']
vectorized_len = np.vectorize(len)
recipes = recipes[vectorized_len(recipes) > 0]

def split_train_val_test(recipes, train=0.8, val=0.1):
    shuffled = np.random.RandomState(0).permutation(recipes)
    n_train = int(len(shuffled) * train)
    n_val = int(len(shuffled) * val)
    return shuffled[:n_train], shuffled[n_train: n_train + n_val], shuffled[-n_val:]

train_recipes, val_recipes, test_recipes = split_train_val_test(recipes)

In [47]:
train_recipes[:5]

array([array([ 219,  212,   46, 1133,  222,  657, 1343, 1014,   73,  140,   26,
          8,  286]),
       array([  77,  967, 3002,  199,    2,    3,   98,   18, 1477,   25,  895,
          1,  157,    0,    8]),
       array([  14,  134,  147,    3,   33,    9,   30, 3275,    1,   73,  511,
       1597]),
       array([198, 233,  14,   3,  33,  42, 120, 151,  10,   7,   1,   0,  21,
        26]),
       array([167,  52,  32,  13,   5, 224,  71,   0,  43,   4,  36])],
      dtype=object)

## neural net woooooo

In [50]:
# we need to convert our data into one-hot encoding
def convert_one_hot(array):
    # here i'm getting an array of zeros
    # num rows is the size of the input array (ie how many recipes)
    # num cols is num of ingredients total (so we can 1-hot them)
    one_hot = np.zeros((len(array), len(ingredients)))
    
    for i in range(len(array)):
        if len(array[i]) > 0:
            # this is just indexing into the ith row of the array (ith recipe)
            # and saying all the values in the recipe we're gonna set to 1
            one_hot[i][array[i]] = 1
        else:
            print("shouldn't get here ever")
        
    return one_hot

In [51]:
train_one_hot = convert_one_hot(train_recipes)
val_one_hot = convert_one_hot(val_recipes)
test_one_hot = convert_one_hot(test_recipes)