In [1]:
import time
import sys
sys.path.append('../')

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from helpers.pytorch_helper import *

In [2]:
train = InstacartDataset('training.npy','training_labels.npy', transform=ToTensor())
validation = InstacartDataset('validation.npy','validation_labels.npy', transform=ToTensor())

In [3]:
BATCH = 32
train_loader = DataLoader(train, batch_size=BATCH, 
                          shuffle=True, num_workers=10)
val_loader = DataLoader(validation, batch_size=BATCH,
                        shuffle=True, num_workers=10)

In [4]:
class TesterLSTM(nn.Module):
    def __init__(self):
        super(TesterLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=148, 
                            hidden_size=256,
                            num_layers=3,
                            batch_first=False,
                            bidirectional=False)
        self.fc = nn.Linear(256*67,145)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        return (Variable(torch.zeros(3, 67, 256)).cuda(), # shape is (num_layers,sequence_length,hidden_dim)
                Variable(torch.zeros(3, 67, 256)).cuda())
    
    def forward(self, x):
        hidden = self.init_hidden()
        output, hidden = self.lstm(x, hidden)
        output = self.fc(output.view(x.size()[0],-1))
        return torch.clamp(output,0,49688)

In [5]:
tester = TesterLSTM().cuda()

In [6]:
LR = .01
EPOCHS = 100
loss_function = nn.MSELoss()
optimizer = torch.optim.RMSprop(tester.parameters(), lr=LR)

In [7]:
best_val_loss = float('inf')
for epoch in range(EPOCHS):  # loop over the dataset multiple times
    epoch_loss = 0.
    running_loss = 0.
    time0 = time.time()
    # Training
    j=0
    tester.train()
    for i, batch in enumerate(train_loader):
        # get the inputs
        inputs, labels = batch['features'], batch['target']
        # wrap them in Variable
        inputs, labels = Variable(inputs).cuda(), Variable(labels).cuda()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = tester(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.data[0]
        epoch_loss += loss.data[0]
        if i % 1000 == 0:    # print every 1000 mini-batches
            print('[%d, %5d] loss: %.5f' %
                  (epoch + 1, i + 1, running_loss / (j+1)))
            running_loss = 0.0
        j+=1
    print('Epoch %d finished. Loss: %.5f' % (epoch+1,epoch_loss/(j+1)))
    print('Epoch %d took %.3f seconds.' % (epoch+1, time.time()-time0))
    
    # Validation
    val_loss = 0.
    j=0
    tester.eval()
    for i, batch in enumerate(val_loader):
        inputs, labels = batch['features'], batch['target']
        inputs, labels = Variable(inputs, volatile=True).cuda(), Variable(labels, volatile=True).cuda()
        outputs = tester(inputs)
        val_loss += loss_function(outputs, labels).data[0]
        j+=1
    # Save checkpoint
    is_best = val_loss < best_val_loss
    best_val_loss = min(val_loss, best_val_loss)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': tester.state_dict(),
        'best_val_loss': best_val_loss,
        'optimizer' : optimizer.state_dict(),
    }, is_best)

    print("Validation loss for epoch %d: %.5f" % (epoch+1, val_loss/(j+1)))
print('Finished Training')

[1,     1] loss: 99989296.00000
[1,  1001] loss: 68861793.00699
[1,  2001] loss: 34087516.26987
[1,  3001] loss: 22604013.24492
[1,  4001] loss: 16513123.78705
[1,  5001] loss: 12614644.93421
Epoch 1 finished. Loss: 66717976.45040
Epoch 1 took 68.022 seconds.
Validation loss for epoch 1: 63116328.70988
[2,     1] loss: 64113016.00000
[2,  1001] loss: 59126837.64635
[2,  2001] loss: 28247112.94753
[2,  3001] loss: 18794426.29124
[2,  4001] loss: 14180053.84854
[2,  5001] loss: 10902704.32474
Epoch 2 finished. Loss: 56454893.95457
Epoch 2 took 65.746 seconds.
Validation loss for epoch 2: 49708814.48765
[3,     1] loss: 51595244.00000
[3,  1001] loss: 48776697.71628
[3,  2001] loss: 23682020.30785
[3,  3001] loss: 15481099.15362
[3,  4001] loss: 11445941.05474
[3,  5001] loss: 9114542.73985
Epoch 3 finished. Loss: 46747779.04950
Epoch 3 took 65.932 seconds.
Validation loss for epoch 3: 45398313.02469
[4,     1] loss: 45960292.00000
[4,  1001] loss: 45045643.72827
[4,  2001] loss: 22331326

Process Process-307:
Process Process-305:
Process Process-309:
Process Process-303:
Process Process-310:
Traceback (most recent call last):
Process Process-301:
Traceback (most recent call last):
Process Process-306:
Process Process-308:
Process Process-304:
Traceback (most recent call last):
Process Process-302:
  File "/home/jason/anaconda3/envs/torch/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/jason/anaconda3/envs/torch/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/jason/anaconda3/envs/torch/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/jason/anaconda3/envs/torch/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/home/jason/anaconda3/

KeyboardInterrupt: 