In [1]:
from dpp_nets.layers.layers import *
import torch
import torch.nn as nn
from collections import OrderedDict
import shutil
import time
import gzip
import os
import json
import numpy as np
from dpp_nets.utils.io import make_embd, make_tensor_dataset, load_tensor_dataset
from dpp_nets.utils.io import data_iterator, load_embd
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader
import time
from dpp_nets.my_torch.utilities import pad_tensor

In [2]:
## Data Sets
train_set = torch.load('/Users/Max/data/full_beer/pytorch/annotated_common.pt')
rat_set = torch.load('/Users/Max/data/full_beer/pytorch/annotated.pt')
embd = load_embd('/Users/Max/data/full_beer/pytorch/embeddings.pt')

In [3]:
# Parameters
torch.manual_seed(12)
batch_size = 25
_, max_set_size = train_set.data_tensor.size()
_, embd_dim = embd.weight.size()

hidden_dim = 500
enc_dim = 200
target_dim = 3 # let's choose the first three aspects to learn!

# Baseline
baseline_nets = DeepSetBaseline(embd_dim, hidden_dim, enc_dim, target_dim)
baseline = nn.Sequential(embd, baseline_nets, nn.Sigmoid())

# Model
kernel_dim = 200
trainer = MarginalTrainer(embd, hidden_dim, kernel_dim, enc_dim, target_dim)

trainer.reg = 0.1
trainer.reg_mean = 10
trainer.activation = nn.Sigmoid()

train_loader = DataLoader(train_set, batch_size, shuffle=True)

In [4]:
# Actual training loop for model
torch.manual_seed(12)
params = [{'params': trainer.kernel_net.parameters(), 'lr': 1e-3},
          {'params': trainer.pred_net.parameters(), 'lr': 1e-4}]

optimizer = torch.optim.Adam(params)

for epoch in range(10):
    for t, (review, target) in enumerate(train_loader):
        review = Variable(review)
        target = Variable(target[:,:3])
        loss  = trainer(review, target)
        
        # Backpropagate + parameter updates
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not (t+1) % 10: 
            print('Loss at it :', t+1, 'is', loss.data[0])

Loss at it : 10 is 0.019715605303645134
Loss at it : 20 is 0.015935303643345833
Loss at it : 30 is 0.01891719177365303
Loss at it : 40 is 0.01880471594631672
Loss at it : 10 is 0.03427441790699959
Loss at it : 20 is 0.01509181223809719
Loss at it : 30 is 0.02585383877158165


KeyboardInterrupt: 

In [None]:
# Need also a training script for RTrainer!!
# incorporate embedding into trainer


kernel_net = KernelVar(embd_dim, hidden_dim, kernel_dim)
sampler = ReinforceSampler(3)
pred_net = PredNet(embd_dim, hidden_dim, enc_dim, target_dim)

Rtrainer = ReinforceTrainer(kernel_net, sampler, pred_net)
Rtrainer.reg = 0.1
Rtrainer.reg_mean = 10
Rtrainer.activation = nn.Sigmoid()

params = [{'params': Rtrainer.kernel_net.parameters(), 'lr': 1e-3},
          {'params': Rtrainer.pred_net.parameters(), 'lr': 1e-4}]

optimizer = torch.optim.Adam(params)

Rtrainer.double()

for epoch in range(20):
    for t, (review, target) in enumerate(train_loader):
        words = embd(Variable(review)).double()
        target = Variable(target[:,:3]).double()
        loss  = Rtrainer(words, target)
        
        # Backpropagate + parameter updates
        optimizer.zero_grad()
        loss.backward()
        # print(Rtrainer.kernel_net.layer1.weight.grad)
        optimizer.step()

        if not (t+1) % 10: 
            print('Loss at it :', t+1, 'is', loss.data[0])



In [None]:
# Actual training loop for baseline
# Training
criterion = nn.MSELoss()
lr = 1e-4
optimizer = torch.optim.Adam(baseline_nets.parameters(), lr=lr)


for epoch in range(10):
    
    for t, (review, target) in enumerate(train_loader):
        target = Variable(target[:,:3])
        words = Variable(review)
        pred = baseline(words)
        loss = criterion(pred, target)

        # Backpropagate + parameter updates
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not (t+1) % 10: 
            print('Loss at it :', t+1, 'is', loss.data[0])

In [None]:
def validate_baseline(val_set, model, criterion):
    x = Variable(val_set.data_tensor, volatile=True)
    y = Variable(val_set.target_tensor[:,:3], volatile=True)
    pred = model(x)
    loss = criterion(pred, y)
    print(loss.data[0])

In [None]:
def validate_model(val_set, model):
    model.reg = 0
    x = Variable(val_set.data_tensor, volatile=True)
    x = embd(x)
    y = Variable(val_set.target_tensor[:,:3], volatile=True)
    loss = model(x, y)
    print(loss.data[0])

In [None]:
Rtrainer.float()
validate_model(train_set, Rtrainer)

In [None]:
x = Variable(train_set.data_tensor, volatile=True)
x = embd(x)
y = Variable(train_set.target_tensor[:,:3], volatile=True)

In [None]:
sampler = ReinforceSampler(1)
Rtrainer.sampler = sampler
Rtrainer.alpha_iter = 1

In [None]:
validate_baseline(train_set, baseline, nn.MSELoss())

In [48]:
params = [{'params': trainer.kernel_net.parameters(), 'lr': 1e-3},
          {'params': trainer.pred_net.parameters(),   'lr': 1e-4}]
optimizer = torch.optim.Adam(params)
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR multiplied by factor 0.1 for every 10 epochs"""
    if not ((epoch + 1) % 10):
        factor = 1e-1
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * factor

In [49]:
for epoch in range(46):
    print(epoch, optimizer.param_groups[0]['lr'], optimizer.param_groups[1]['lr'])
    adjust_learning_rate(optimizer, epoch)

0 0.001 0.0001
1 0.001 0.0001
2 0.001 0.0001
3 0.001 0.0001
4 0.001 0.0001
5 0.001 0.0001
6 0.001 0.0001
7 0.001 0.0001
8 0.001 0.0001
9 0.001 0.0001
10 0.0001 1e-05
11 0.0001 1e-05
12 0.0001 1e-05
13 0.0001 1e-05
14 0.0001 1e-05
15 0.0001 1e-05
16 0.0001 1e-05
17 0.0001 1e-05
18 0.0001 1e-05
19 0.0001 1e-05
20 1e-05 1.0000000000000002e-06
21 1e-05 1.0000000000000002e-06
22 1e-05 1.0000000000000002e-06
23 1e-05 1.0000000000000002e-06
24 1e-05 1.0000000000000002e-06
25 1e-05 1.0000000000000002e-06
26 1e-05 1.0000000000000002e-06
27 1e-05 1.0000000000000002e-06
28 1e-05 1.0000000000000002e-06
29 1e-05 1.0000000000000002e-06
30 1.0000000000000002e-06 1.0000000000000002e-07
31 1.0000000000000002e-06 1.0000000000000002e-07
32 1.0000000000000002e-06 1.0000000000000002e-07
33 1.0000000000000002e-06 1.0000000000000002e-07
34 1.0000000000000002e-06 1.0000000000000002e-07
35 1.0000000000000002e-06 1.0000000000000002e-07
36 1.0000000000000002e-06 1.0000000000000002e-07
37 1.0000000000000002e-06 1

In [36]:
6 % 3

0

In [81]:
import argparse
import os
import shutil

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader

from dpp_nets.utils.io import make_embd, make_tensor_dataset
from dpp_nets.layers.layers import MarginalTrainer


parser = argparse.ArgumentParser(description='Marginal Krause Trainer')

parser.add_argument('-a', '--aspect', type=str, choices=['aspect1', 'aspect2', 'aspect3', 'all'],
                help='what is the target?', required=True)

parser.add_argument('-b', '--batch-size', default=50, type=int,
                metavar='N', help='mini-batch size (default: 50)')
parser.add_argument('--epochs', default=30, type=int, metavar='N',
                help='number of total epochs to run')
parser.add_argument('--lr_k', '--learning_rate_k', default=0.1e-3, type=float,
                metavar='LRk', help='initial learning rate for kernel net')
parser.add_argument('--lr_p', '--learning_rate_p', default=0.1e-4, type=float,
                metavar='LRp', help='initial learning rate for pred net')
parser.add_argument('--reg', type=float, required=True,
                metavar='reg', help='regularization constant')
parser.add_argument('--reg_mean', type=float, required=True,
                metavar='reg_mean', help='regularization_mean')

# Train locally or remotely?
parser.add_argument('--remote', type=int,
                help='training locally or on cluster?', required=True)
# Burnt in Paths..
parser.add_argument('--data_path_local', type=str, default='/Users/Max/data/beer_reviews',
                help='where is the data folder locally?')
parser.add_argument('--data_path_remote', type=str, default='/cluster/home/paulusm/data/beer_reviews',
                help='where is the data folder?')
parser.add_argument('--ckp_path_local', type=str, default='/Users/Max/checkpoints/beer_reviews',
                help='where is the data folder locally?')
parser.add_argument('--ckp_path_remote', type=str, default='/cluster/home/paulusm/checkpoints/beer_reviews',
                help='where is the data folder?')



global args, lowest_loss

args = parser.parse_args('-a aspect1 --remote 0 --reg 0.1 --reg_mean 10'.split())
lowest_loss = 100 # arbitrary high number as upper bound for loss

### Load data
if args.remote:
    # print('training remotely')
    train_path = os.path.join(args.data_path_remote, str.join(".",['reviews', args.aspect, 'train.txt.gz']))
    val_path   = os.path.join(args.data_path_remote, str.join(".",['reviews', args.aspect, 'heldout.txt.gz']))
    embd_path = os.path.join(args.data_path_remote, 'review+wiki.filtered.200.txt.gz')

else:
    # print('training locally')
    train_path = os.path.join(args.data_path_local, str.join(".",['reviews', args.aspect, 'train.txt.gz']))
    val_path   = os.path.join(args.data_path_local, str.join(".",['reviews', args.aspect, 'heldout.txt.gz']))
    embd_path = os.path.join(args.data_path_local, 'review+wiki.filtered.200.txt.gz')

embd, word_to_ix = make_embd(embd_path)
train_set = make_tensor_dataset(train_path, word_to_ix)
val_set = make_tensor_dataset(val_path, word_to_ix)
print("loaded data")

torch.manual_seed(0)
train_loader = DataLoader(train_set, args.batch_size, shuffle=True)
val_loader = DataLoader(val_set, args.batch_size)
print("loader defined")

### Build model
# Network parameters
embd_dim = embd.weight.size(1)
hidden_dim = 500
enc_dim = 200
if args.aspect == 'all':
    target_dim = 3
else: 
    target_dim = 1

# Model
torch.manual_seed(0)
trainer = MarginalTrainer(embd, hidden_dim, kernel_dim, enc_dim, target_dim)
trainer.activation = nn.Sigmoid()
trainer.reg = args.reg
trainer.reg_mean = args.reg_mean
print("created trainer")

# Set-up Training
params = [{'params': trainer.kernel_net.parameters(), 'lr': args.lr_k},
          {'params': trainer.pred_net.parameters(),   'lr': args.lr_p}]
optimizer = torch.optim.Adam(params)
print('set-up optimizer')

### Loop
torch.manual_seed(0)
print("started loop")
for epoch in range(args.epochs):

    adjust_learning_rate(optimizer, epoch)

    #train(val_loader, trainer, optimizer)        
    loss, pred_loss, reg_loss = validate(val_loader, trainer)

    log(epoch, loss, pred_loss, reg_loss)
    print("logged")

    is_best = pred_loss < lowest_loss
    lowest_loss = min(pred_loss, lowest_loss)    
    save = {'epoch:': epoch + 1, 
            'model': 'Marginal Trainer',
            'state_dict': trainer.state_dict(),
            'lowest_loss': lowest_loss,
            'optimizer': optimizer.state_dict()} 

    save_checkpoint(save, is_best)
    print("saved a checkpoint")

print('*'*20, 'SUCCESS','*'*20)


loaded data
loader defined
created trainer
set-up optimizer
started loop
logged
saved a checkpoint


KeyboardInterrupt: 

In [79]:
trainer.loss

Variable containing:
 5.3530
[torch.FloatTensor of size 1]

In [80]:

def train(loader, trainer, optimizer):

    for t, (review, target) in enumerate(loader):
        review = Variable(review)

        if args.aspect == 'all':
            target = Variable(target[:,:3])
        else:
            target = Variable(target[:,int(args.aspect[-1])])

        loss  = trainer(review, target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #print("trained one batch")
        #print("trained one batch")

def validate(loader, trainer):

    total_loss = 0.0
    total_pred_loss = 0.0
    total_reg_loss = 0.0

    for i, (review, target) in enumerate(loader, 1):

        review = Variable(review, volatile=True)

        if args.aspect == 'all':
            target = Variable(target[:,:3], volatile=True)
        else:
            target = Variable(target[:,int(args.aspect[-1])], volatile=True)

        trainer(review, target)
        loss = trainer.loss.data[0]
        pred_loss = trainer.pred_loss.data[0]
        reg_loss = trainer.reg_loss.data[0]

        delta = loss - total_loss
        total_loss += (delta / i)
        delta = pred_loss - total_pred_loss 
        total_pred_loss += (delta / i)
        delta = reg_loss - total_reg_loss
        total_reg_loss += (delta / i)

        # print("validated one batch")

    return total_loss, total_pred_loss, total_reg_loss
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR multiplied by factor 0.1 for every 10 epochs"""
    if not ((epoch + 1) % 10):
        factor = 0.1
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * factor

def log(epoch, loss, pred_loss, reg_loss):

    string = str.join(" | ", ['Epoch: %d' % (epoch), 'V Loss: %.5f' % (loss), 
                              'V Pred Loss: %.5f' % (pred_loss), 'V Reg Loss: %.5f' % (reg_loss)])

    if args.remote:
        destination = os.path.join(args.ckp_path_remote, args.aspect + 'marginal_log.txt')
    else:
        destination = os.path.join(args.ckp_path_local, args.aspect + 'marginal_log.txt')

    with open(destination, 'a') as log:
        log.write(string + '\n')

def save_checkpoint(state, is_best, filename='marginal_checkpoint.pth.tar'):
    """
    State is a dictionary that cotains valuable information to be saved.
    """
    if args.remote:
        destination = os.path.join(args.ckp_path_remote, args.aspect + filename)
    else:
        destination = os.path.join(args.ckp_path_local, args.aspect + filename)
    
    torch.save(state, destination)

    if is_best:
        if args.remote:
            best_destination = os.path.join(args.ckp_path_remote, args.aspect + 'marginal_best.pth.tar')
        else:
            best_destination = os.path.join(args.ckp_path_local, args.aspect + 'marginal_best.pth.tar')
        
        shutil.copyfile(destination, best_destination)


In [64]:
main()

usage: ipykernel_launcher.py [-h] -a {aspect1,aspect2,aspect3,all} [-b N]
                             [--epochs N] [--lr_k LRk] [--lr_p LRp] --reg reg
                             --reg_mean reg_mean --remote REMOTE
                             [--data_path_local DATA_PATH_LOCAL]
                             [--data_path_remote DATA_PATH_REMOTE]
                             [--ckp_path_local CKP_PATH_LOCAL]
                             [--ckp_path_remote CKP_PATH_REMOTE]
ipykernel_launcher.py: error: the following arguments are required: -a/--aspect, --reg, --reg_mean, --remote


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
