In [2]:
import pickle as pkl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm_notebook as tqdm
from torchvision import datasets, transforms
import torch.utils.data as data_utils

## getting train/valid/test data

In [3]:
data = np.load('dataset.npz', allow_pickle=True)
ingredients = data['ingredients']
recipes = data['recipes']
vectorized_len = np.vectorize(len)
recipes = recipes[vectorized_len(recipes) > 0]

def split_train_val_test(recipes, train=0.8, val=0.1):
    shuffled = np.random.RandomState(0).permutation(recipes)
    n_train = int(len(shuffled) * train)
    n_val = int(len(shuffled) * val)
    return shuffled[:n_train], shuffled[n_train: n_train + n_val], shuffled[-n_val:]

train_recipes, val_recipes, test_recipes = split_train_val_test(recipes)

In [4]:
train_recipes[:5]

array([array([ 219,  212,   46, 1133,  222,  657, 1343, 1014,   73,  140,   26,
          8,  286]),
       array([  77,  967, 3002,  199,    2,    3,   98,   18, 1477,   25,  895,
          1,  157,    0,    8]),
       array([  14,  134,  147,    3,   33,    9,   30, 3275,    1,   73,  511,
       1597]),
       array([198, 233,  14,   3,  33,  42, 120, 151,  10,   7,   1,   0,  21,
        26]),
       array([167,  52,  32,  13,   5, 224,  71,   0,  43,   4,  36])],
      dtype=object)

In [5]:
# we need to convert our data into one-hot encoding
# we'll also return the x vector (all - 1 ingredient) and y (missing ingredient)
def convert_one_hot(array):
    # here i'm getting an array of zeros
    # num rows is the size of the input array (ie how many recipes)
    # num cols is num of ingredients total (so we can 1-hot them)
    one_hot = np.zeros((len(array), len(ingredients)))
    inputs = np.zeros((len(array), len(ingredients)))
    targets = np.empty(len(array))
    
    for i in range(len(array)):
        if len(array[i]) > 0:
            # this is just indexing into the ith row of the array (ith recipe)
            # and saying all the values in the recipe we're gonna set to 1
            one_hot[i][array[i]] = 1
            
            # randomly choose one of the ingredients
            leave_out_idx = np.random.randint(len(array[i]))
            leave_out = array[i][leave_out_idx]
            leave_out_array = np.delete(array[i], leave_out_idx)
            inputs[i][leave_out_array] = 1
            targets[i] = leave_out
            
        else:
            print("shouldn't get here ever")
        
    return one_hot, inputs, targets

In [6]:
train_one_hot, train_x, train_y = convert_one_hot(train_recipes)
val_one_hot, val_x, val_y = convert_one_hot(val_recipes)
test_one_hot, test_x, test_y = convert_one_hot(test_recipes)

In [51]:
mini_size = 100
x_mini = torch.tensor(train_x[:mini_size].astype(np.float32))
y_mini = torch.tensor(train_y[:mini_size].astype(np.int_))
mini_train_tensor = data_utils.TensorDataset(x_mini, y_mini)

In [None]:
x_train = torch.tensor(train_x.astype(np.float32))
y_train = torch.tensor(train_y.astype(np.int_))
train_tensor = data_utils.TensorDataset(x_train, y_train) 

In [None]:
x_valid = torch.tensor(val_x.astype(np.float32))
y_valid = torch.tensor(val_y.astype(np.int_))
valid_tensor = data_utils.TensorDataset(x_valid, y_valid) 

x_test = torch.tensor(test_x.astype(np.float32))
y_test = torch.tensor(test_y.astype(np.int_))
test_tensor = data_utils.TensorDataset(x_test, y_test) 

## Neural Net

In [8]:
class Net(nn.Module):
    def __init__(self, params, num_ingredients):
        super(Net, self).__init__()
        self.params = params
        self.fc0 = nn.Linear(num_ingredients, self.params["hidden_1"])
        self.fc1 = nn.Linear(self.params["hidden_1"], self.params["hidden_2"])
        self.fc2 = nn.Linear(self.params["hidden_2"], num_ingredients)

    def forward(self, x):
        x = F.relu(self.fc0(x))
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


In [52]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    sum_num_correct = 0
    sum_loss = 0
    num_batches_since_log = 0
    
    losses = []

    batches = tqdm(enumerate(train_loader), total=len(train_loader))
    batches.set_description("Epoch NA: Loss (NA) Accuracy (NA %)")
    for batch_idx, (data, target) in batches:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss_fn = nn.CrossEntropyLoss()
        loss = loss_fn(output, target)
        pred = output.max(1, keepdim=True)[1]
#         print(f"Prediction: {pred}, Actual: {target}, Loss: {loss}")
        correct = pred.eq(target.float().view_as(pred)).sum().item()
        sum_num_correct += correct
        sum_loss += loss.item()
        num_batches_since_log += 1
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
        
        batches.set_description(
          "Epoch {:d}: Loss ({:.2e},  Accuracy ({:02.0f}%)".format(
            epoch, loss.item(), 100. * sum_num_correct / (num_batches_since_log * train_loader.batch_size))
        )
        
    return sum(losses)/len(losses)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            # We use reduction = 'sum' here to ignore the impact of batch size and make 
            # this value comparable with the loss reported in the train loop above. Note,
            # though, that we divide by the len of the dataset below (so this is truly a per-element loss value)
#             test_loss += F.mse_loss(torch.clamp(output.view(target.shape), 0., 5.), target.float(), reduction='sum') # sum up the mean square loss
#             pred = torch.clamp(output, 0., 5.)
#             correct += pred.eq(target.float().view_as(pred)).sum().item()
            loss_fn = nn.CrossEntropyLoss()
            loss = torch.sum(loss_fn(output, target, reduction='sum')).item() # sum up batch loss
            test_loss += loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            
    test_loss /= len(test_loader.dataset)
#     print('\nTest set: Average loss: {:.2e}\n'.format(test_loss))
    print('\nTest set: Average loss: {:.2e}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return test_loss

In [54]:
args = dict()
args["seed"] = 1
args["no_cuda"] = False
args["log_interval"] = 50
args["batch_size"] = 8
args["test-batch-size"] = 1000

params = dict()
params["epochs"] = 10
params["lr"] = 5e-2
params["hidden_1"] = len(ingredients)*2
params["hidden_2"] = len(ingredients)*2

use_cuda = not args["no_cuda"] and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
mini_train_loader = torch.utils.data.DataLoader(
    mini_train_tensor,
    batch_size=args["batch_size"], shuffle=True, **kwargs)

In [55]:
model = Net(params, len(ingredients)).to(device)
optimizer = optim.Adam(model.parameters(), lr=params["lr"])

In [None]:
torch.manual_seed(args["seed"])
for epoch in range(1, params["epochs"] + 1):
    train(model, device, mini_train_loader, optimizer, epoch)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  batches = tqdm(enumerate(train_loader), total=len(train_loader))


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13.0), HTML(value='')))