In [1]:
from dpp_nets.layers.layers import *
import torch
import torch.nn as nn
from collections import OrderedDict
import shutil
import time
import gzip
import os
import json
import numpy as np
from dpp_nets.utils.io import make_embd, make_tensor_dataset, load_tensor_dataset
from dpp_nets.utils.io import data_iterator, load_embd
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader
import time
from dpp_nets.my_torch.utilities import pad_tensor

In [2]:
## Data Sets
train_set = torch.load('/Users/Max/data/full_beer/pytorch/annotated_common.pt')
rat_set = torch.load('/Users/Max/data/full_beer/pytorch/annotated.pt')
embd = load_embd('/Users/Max/data/full_beer/pytorch/embeddings.pt')

In [3]:
# Parameters
torch.manual_seed(12)
batch_size = 25
_, max_set_size = train_set.data_tensor.size()
_, embd_dim = embd.weight.size()

hidden_dim = 500
enc_dim = 200
target_dim = 3 # let's choose the first three aspects to learn!

# Baseline
baseline_nets = DeepSetBaseline(embd_dim, hidden_dim, enc_dim, target_dim)
baseline = nn.Sequential(embd, baseline_nets, nn.Sigmoid())

# Model
kernel_dim = 200
trainer = MarginalTrainer(embd, hidden_dim, kernel_dim, enc_dim, target_dim)

trainer.reg = 0.1
trainer.reg_mean = 10
trainer.activation = nn.Sigmoid()

train_loader = DataLoader(train_set, batch_size, shuffle=True)

In [None]:
# Actual training loop for model
torch.manual_seed(12)
params = [{'params': trainer.kernel_net.parameters(), 'lr': 1e-3},
          {'params': trainer.pred_net.parameters(), 'lr': 1e-4}]

optimizer = torch.optim.Adam(params)
trainer.reg = 0.1

for epoch in range(10):
    for t, (review, target) in enumerate(train_loader):
        review = Variable(review)
        target = Variable(target[:,:3])
        loss  = trainer(review, target)
        
        # Backpropagate + parameter updates
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not (t+1) % 10: 
            print('Loss at it :', t+1, 'is', loss.data[0])
            
            


In [None]:
# Need also a training script for RTrainer!!
# incorporate embedding into trainer


kernel_net = KernelVar(embd_dim, hidden_dim, kernel_dim)
sampler = ReinforceSampler(3)
pred_net = PredNet(embd_dim, hidden_dim, enc_dim, target_dim)

Rtrainer = ReinforceTrainer(kernel_net, sampler, pred_net)
Rtrainer.reg = 0.1
Rtrainer.reg_mean = 10
Rtrainer.activation = nn.Sigmoid()

params = [{'params': Rtrainer.kernel_net.parameters(), 'lr': 1e-3},
          {'params': Rtrainer.pred_net.parameters(), 'lr': 1e-4}]

optimizer = torch.optim.Adam(params)

Rtrainer.double()

for epoch in range(20):
    for t, (review, target) in enumerate(train_loader):
        words = embd(Variable(review)).double()
        target = Variable(target[:,:3]).double()
        loss  = Rtrainer(words, target)
        
        # Backpropagate + parameter updates
        optimizer.zero_grad()
        loss.backward()
        # print(Rtrainer.kernel_net.layer1.weight.grad)
        optimizer.step()

        if not (t+1) % 10: 
            print('Loss at it :', t+1, 'is', loss.data[0])



In [None]:
# Actual training loop for baseline
# Training
criterion = nn.MSELoss()
lr = 1e-4
optimizer = torch.optim.Adam(baseline_nets.parameters(), lr=lr)


for epoch in range(10):
    
    for t, (review, target) in enumerate(train_loader):
        target = Variable(target[:,:3])
        words = Variable(review)
        pred = baseline(words)
        loss = criterion(pred, target)

        # Backpropagate + parameter updates
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not (t+1) % 10: 
            print('Loss at it :', t+1, 'is', loss.data[0])

In [None]:
def validate_baseline(val_set, model, criterion):
    x = Variable(val_set.data_tensor, volatile=True)
    y = Variable(val_set.target_tensor[:,:3], volatile=True)
    pred = model(x)
    loss = criterion(pred, y)
    print(loss.data[0])

In [None]:
def validate_model(val_set, model):
    model.reg = 0
    x = Variable(val_set.data_tensor, volatile=True)
    x = embd(x)
    y = Variable(val_set.target_tensor[:,:3], volatile=True)
    loss = model(x, y)
    print(loss.data[0])

In [None]:
Rtrainer.float()
validate_model(train_set, Rtrainer)

In [None]:
x = Variable(train_set.data_tensor, volatile=True)
x = embd(x)
y = Variable(train_set.target_tensor[:,:3], volatile=True)

In [None]:
sampler = ReinforceSampler(1)
Rtrainer.sampler = sampler
Rtrainer.alpha_iter = 1

In [None]:
validate_baseline(train_set, baseline, nn.MSELoss())

In [None]:
x.size()

In [None]:
y.size()

In [None]:
import random
def sample(model, sampler, embd, dataset):
    rand = random.randint(0, len(dataset))
    x = dataset.data_tensor[rand:rand+2]
    x = embd(Variable(x))
    y = dataset.target_tensor[rand:rand+2]
    kernel = trainer.kernel_net(x)
    sampler.s_ix = trainer.kernel_net.s_ix
    sampler.e_ix = trainer.kernel_net.e_ix
    sampler(kernel, x)
    print(sampler.saved_subsets)

In [None]:
rand = random.randint(0, len(train_set))
x = train_set.data_tensor[rand:rand+10]
x = embd(Variable(x))
y = Variable(train_set.target_tensor[rand:rand+10,:3])
Rtrainer(x, y)

In [None]:
[i.data.sum() for l in Rtrainer.sampler.saved_subsets for i in l]

In [None]:
import argparse

In [None]:
help(argparse)

In [None]:
help(argparse.ArgumentParser.add_argument)

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument('--foo')
parser.parse_args('--foo 1'.split())

parser = argparse.ArgumentParser()
parser.add_argument('--foo', action='store_const', const=42)
parser.parse_args('--foo'.split())

parser = argparse.ArgumentParser()
parser.add_argument('--foo', action='store_true')
parser.add_argument('--bar', action='store_false')
args = parser.parse_args('--foo --bar'.split())

In [None]:
args.bar

In [None]:
int('aspect1'[-1])

In [1]:
import argparse
import os
import shutil

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader

from dpp_nets.utils.io import make_embd, make_tensor_dataset
from dpp_nets.layers.layers import DeepSetBaseline

parser = argparse.ArgumentParser(description='Baseline (Deep Sets) Trainer')

parser.add_argument('-a', '--aspect', type=str, choices=['aspect1', 'aspect2', 'aspect3', 'all'],
                    help='what is the target?', required=True)
parser.add_argument('--remote', type=int,
                    help='training locally or on cluster?', required=True)

parser.add_argument('--data_path_local', type=str, default='/Users/Max/data/beer_reviews',
                    help='where is the data folder locally?')
parser.add_argument('--data_path_remote', type=str, default='/cluster/home/paulusm/data/beer_reviews',
                    help='where is the data folder?')

parser.add_argument('--ckp_path_local', type=str, default='/Users/Max/checkpoints/beer_reviews',
                    help='where is the data folder locally?')

parser.add_argument('--ckp_path_remote', type=str, default='/cluster/home/paulusm/checkpoints/beer_reviews',
                    help='where is the data folder?')

parser.add_argument('-b', '--batch-size', default=50, type=int,
                    metavar='N', help='mini-batch size (default: 50)')
parser.add_argument('--epochs', default=30, type=int, metavar='N',
                    help='number of total epochs to run')
#parser.add_argument('--lr-k', '--learning-rate-k', default=0.1, type=float,
#                    metavar='LRk', help='initial learning rate for kernel net')
#parser.add_argument('--lr-p', '--learning-rate-p', default=0.1, type=float,
#                    metavar='LRp', help='initial learning rate for pred net')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial learning rate for baseline')
#parser.add_argument('--reg', type=float, required=True,
#                    metavar='reg', help='regularization constant')
#parser.add_argument('--reg-mean', type=float, required=True,
#                    metavar='reg_mean', help='regularization_mean')


def train(loader, model, criterion, optimizer, aspect):

    for t, (review, target) in enumerate(loader):
        review = Variable(review)

        if args.aspect == 'all':
            target = Variable(target[:,:3]).double()
        else:
            target = Variable(target[:,int(args.aspect[-1])]).double()

        pred = model(review)
        loss = criterion(pred, target)
        optimizer.zero_grad()
        loss.backward()
        print("Gradient in pred_net is:", model[1].pred_net[2].weight.grad.data.sum())
        optimizer.step()
        print('it %d' %t, 'loss is', loss.data[0])

def validate(loader, model, criterion, aspect):

    total_loss = 0.0

    for i, (review, target) in enumerate(loader, 1):

        review = Variable(review, volatile=True)

        if args.aspect == 'all':
            target = Variable(target[:,:3], volatile=True).double()
        else:
            target = Variable(target[:,int(args.aspect[-1])], volatile=True).double()

        pred = model(review)
        loss = criterion(pred, target)
        
        delta = loss.data[0] - total_loss
        total_loss += (delta / i)

        print("validated one batch")

    return total_loss

def log(epoch, loss):
    string = str.join(" | ", ['Epoch: %d' % (epoch), 'Validation Loss: %.5f' % (loss)])

    if args.remote:
        destination = os.path.join(args.ckp_path_remote, args.aspect + 'DeepSetBaseline_log.txt')
    else:
        destination = os.path.join(args.ckp_path_local, args.aspect + 'DeepSetBaseline_log.txt')

    with open(destination, 'a') as log:
        log.write(string + '\n')

def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 5 epochs"""
    lr = args.lr * (0.1 ** (epoch // 5))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def save_checkpoint(state, is_best, filename='baseline_checkpoint.pth.tar'):
    """
    State is a dictionary that cotains valuable information to be saved.
    """
    if args.remote:
        destination = os.path.join(args.ckp_path_remote, args.aspect + filename)
    else:
        destination = os.path.join(args.ckp_path_local, args.aspect + filename)
    
    torch.save(state, destination)
    if is_best:
        if args.remote:
            best_destination = os.path.join(args.ckp_path_remote, args.aspect + 'baseline_model_best.pth.tar')
        else:
            best_destination = os.path.join(args.ckp_path_local, args.aspect + 'baseline_model_best.pth.tar')
        
        shutil.copyfile(destination, best_destination)

In [2]:
global args, lowest_loss

args = parser.parse_args('-a aspect1 --remote 0'.split())

lowest_loss = 100 # arbitrary high number as upper bound for loss

### Load data
if args.remote:
    # print('training remotely')
    train_path = os.path.join(args.data_path_remote, str.join(".",['reviews', args.aspect, 'train.txt.gz']))
    val_path   = os.path.join(args.data_path_remote, str.join(".",['reviews', args.aspect, 'heldout.txt.gz']))
    embd_path  = os.path.join(args.data_path_remote, 'review+wiki.filtered.200.txt.gz')

else:
    # print('training locally')
    train_path = os.path.join(args.data_path_local, str.join(".",['reviews', args.aspect, 'train.txt.gz']))
    val_path   = os.path.join(args.data_path_local, str.join(".",['reviews', args.aspect, 'heldout.txt.gz']))
    embd_path = os.path.join(args.data_path_local, 'review+wiki.filtered.200.txt.gz')

embd, word_to_ix = make_embd(embd_path)
train_set = make_tensor_dataset(train_path, word_to_ix)
val_set = make_tensor_dataset(val_path, word_to_ix)
print("loaded data")

torch.manual_seed(0)
train_loader = DataLoader(train_set, args.batch_size, shuffle=True)
val_loader = DataLoader(val_set, args.batch_size)
print("loader defined")

loaded data
loader defined


In [6]:
### Build model
# Network parameters
embd_dim = embd.weight.size(1)
hidden_dim = 500
enc_dim = 200
if args.aspect == 'all':
    target_dim = 3
else: 
    target_dim = 1

# Model
torch.manual_seed(0)
net = DeepSetBaseline(embd_dim, hidden_dim, enc_dim, target_dim)
activation = nn.Sigmoid()
model = nn.Sequential(embd, net, activation)
#model.double()
print("created model")

### Set-up training
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)
print("set up optimizer")

created model
set up optimizer


In [14]:
### Loop
torch.manual_seed(0)
print("started loop")
for epoch in range(args.epochs):

    adjust_learning_rate(optimizer, epoch)

    train(train_loader, model, criterion, optimizer, args.aspect)    
    loss = validate(val_loader, model, criterion, args.aspect)

    log(epoch, loss)
    print("logged")

    is_best = loss < lowest_loss
    lowest_loss = min(loss, lowest_loss)    
    save = {'epoch:': epoch + 1, 
            'model': 'Deep Set Baseline',
            'state_dict': model.state_dict(),
            'lowest_loss': lowest_loss,
            'optimizer': optimizer.state_dict()} 

    save_checkpoint(save, is_best)
    print("saved a checkpoint")

print('*'*20, 'SUCCESS','*'*20)

started loop
Gradient in pred_net is: 0.0
it 0 loss is 0.18479999394416835
Gradient in pred_net is: 0.0
it 1 loss is 0.15859999790191665
Gradient in pred_net is: 0.0
it 2 loss is 0.15119999508857745
Gradient in pred_net is: 0.0
it 3 loss is 0.14079999823570274
Gradient in pred_net is: 0.0
it 4 loss is 0.1901999958038332
Gradient in pred_net is: 0.0
it 5 loss is 0.1645999945640566
Gradient in pred_net is: 0.0
it 6 loss is 0.17959999456405662


KeyboardInterrupt: 

In [13]:
# A manual loop
loader = train_loader
net = DeepSetBaseline(embd_dim, hidden_dim, enc_dim, target_dim)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
#optimizer = torch.optim.SGD(net.parameters(), lr=1e-1, momentum=0.9)
embd = embd.float()
for t, (review, target) in enumerate(loader):
    review = Variable(review)
    words = embd(review)
    output = net(words)
    
    pred = nn.Sigmoid()(output)
    print("prediction is :", pred.data)

    if args.aspect == 'all':
        target = Variable(target[:,:3])
    else:
        target = Variable(target[:,int(args.aspect[-1])])

    loss = criterion(pred, target)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print('loss is: ', loss.data[0])
    print('weight grad is: ',net.enc_layer1.weight.grad.data.sum())


prediction is : 
 0.5172
 0.5269
 0.5146
 0.5167
 0.5268
 0.5240
 0.5447
 0.5597
 0.5388
 0.5239
 0.5368
 0.5403
 0.5190
 0.5227
 0.5545
 0.5395
 0.5251
 0.5317
 0.5403
 0.5269
 0.5220
 0.5335
 0.5372
 0.5404
 0.5165
 0.5143
 0.5450
 0.5260
 0.5126
 0.5535
 0.5305
 0.5317
 0.5267
 0.5156
 0.5205
 0.5207
 0.5343
 0.5372
 0.5422
 0.5179
 0.5264
 0.5125
 0.5168
 0.5182
 0.5213
 0.5174
 0.5299
 0.5250
 0.5203
 0.5579
[torch.FloatTensor of size 50x1]

loss is:  0.07014837116003036
weight grad is:  0.01934915469658871
prediction is : 
 0.6669
 0.7977
 0.7743
 0.6152
 0.6556
 0.6171
 0.6603
 0.6605
 0.7936
 0.6457
 0.6201
 0.6429
 0.6651
 0.6441
 0.7832
 0.8832
 0.5803
 0.6624
 0.6808
 0.6893
 0.7007
 0.8820
 0.6278
 0.6435
 0.6583
 0.6121
 0.5920
 0.6283
 0.7124
 0.7844
 0.6065
 0.6695
 0.6448
 0.6322
 0.7636
 0.6886
 0.7784
 0.6430
 0.6169
 0.7256
 0.7379
 0.8642
 0.6288
 0.8155
 0.6472
 0.5788
 0.6433
 0.6254
 0.6049
 0.6841
[torch.FloatTensor of size 50x1]

loss is:  0.0598050020635128
we

KeyboardInterrupt: 

In [9]:
net.enc_layer1.weight


Parameter containing:
-4.0917e-02  6.2794e-02  5.8092e-03  ...  -6.3790e-02 -1.8017e-02 -2.8331e-02
 4.1147e-02  3.6015e-02 -5.7746e-02  ...   3.8079e-02  2.4789e-02  3.2314e-02
-5.6748e-02  6.3613e-02  2.3695e-02  ...  -6.8372e-02 -9.2158e-03  4.3017e-02
                ...                   ⋱                   ...                
-3.5129e-02  4.6058e-03  5.5488e-02  ...  -5.1774e-02 -2.6169e-02 -3.1497e-02
-5.9451e-02  5.8607e-02  8.8602e-04  ...  -4.3175e-03  3.9091e-02 -5.3066e-02
 2.5660e-02  4.8838e-02 -4.3077e-02  ...  -6.5688e-02 -4.9512e-02 -5.4340e-02
[torch.FloatTensor of size 500x200]

In [10]:
embd.weight

Parameter containing:
 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
-0.0425 -0.0521  0.0683  ...   0.1130  0.0135  0.0482
-0.0534 -0.0038 -0.0476  ...  -0.0365  0.0941 -0.0478
          ...             ⋱             ...          
 0.0093 -0.0598  0.0637  ...  -0.0051  0.0202 -0.0329
 0.0317 -0.0415 -0.0221  ...   0.0125 -0.0892 -0.0764
-0.0223 -0.0166  0.0155  ...   0.0024 -0.0372  0.0276
[torch.DoubleTensor of size 147760x200]