In [3]:
import torch
import torch.nn as nn
from collections import OrderedDict
import shutil
import time
import gzip
import os
import json
import numpy as np
from dpp_nets.utils.io import make_embd, make_tensor_dataset, load_tensor_dataset
from dpp_nets.utils.io import data_iterator, load_embd
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader
import time
from dpp_nets.my_torch.utilities import pad_tensor


root = '/Users/Max/data/beer_reviews'
data_file = 'reviews.aspect3.train.txt.gz'
embd_file = 'review+wiki.filtered.200.txt.gz'
save_path = os.path.join(root,'pytorch/aspect3_train.pt')
data_path = os.path.join(root, data_file)
embd_path = os.path.join(root, embd_file)


def read_rationales(path):
    """
    This reads the json.annotations file. 
    Creates a list of dictionaries, which holds the 994 reviews for which
    sentence-level annotations are available. 
    """
    data = []
    fopen = gzip.open if path.endswith(".gz") else open
    with fopen(path) as fin:
        for line in fin:
            item = json.loads(line)
            data.append(item)
    return data

In [4]:
from collections import defaultdict
import torch
import torch.nn as nn
from dpp_nets.my_torch.linalg import custom_decomp
from dpp_nets.my_torch.DPP import DPP
from dpp_nets.my_torch.DPP import AllInOne
from dpp_nets.my_torch.utilities import compute_baseline

class DPP_Classifier(nn.Module):
    
    def __init__(self, dtype):
        
        super(DPP_Classifier, self).__init__()
        # Float vs Double
        self.dtype = dtype

        # Network parameters
        self.kernel_in = kernel_in = 400
        self.kernel_h = kernel_h = 1000
        self.kernel_out = kernel_out = 400

        self.pred_in = pred_in = 200 # kernel_in / 2
        self.pred_h = pred_h = 500
        self.pred_h2 = pred_h2 = 200
        self.pred_out = pred_out = 3
        
        # 2-Hidden-Layer Networks 
        self.kernel_net = torch.nn.Sequential(nn.Linear(kernel_in, kernel_h), nn.ELU(),
                                              nn.Linear(kernel_h, kernel_h), nn.ELU(), 
                                              nn.Linear(kernel_h, kernel_out))
        # 3-Hidden-Layer-Networks
        self.pred_net = torch.nn.Sequential(nn.Linear(pred_in, pred_h), nn.ReLU(),
                                             nn.Linear(pred_h, pred_h), nn.ReLU(),
                                             nn.Linear(pred_h, pred_h2), nn.ReLU(),
                                             nn.Linear(pred_h2, pred_out), nn.Sigmoid())
        
        self.kernel_net.type(self.dtype)
        self.pred_net.type(self.dtype)
        
        # Sampling Parameter
        self.alpha_iter = 5

        # Convenience
        self.kernels = []
        self.subsets = None
        self.picks = None
        self.preds = None
        
        self.saved_subsets = None
        self.saved_losses = None # not really necesary
        self.saved_baselines = None # not really necessary
        
    def forward(self, reviews):
        """
        reviews: batch_size x max_set_size x embd_dim = 200
        Output: batch_size x pred_out (the prediction)
        Challenges: Need to resize tensor appropriately and 
        measure length etc. 
        """
        batch_size, max_set_size, embd_dim = reviews.size()
        alpha_iter = self.alpha_iter
        self.saved_subsets = actions = [[] for i in range(batch_size)]
        picks = [[] for i in range(batch_size)]
        
        # Create context
        lengths = reviews.sum(2).abs().sign().sum(1)
        context = (reviews.sum(1) / lengths.expand_as(reviews.sum(1))).expand_as(reviews)
        mask = reviews.sum(2).abs().sign().expand_as(reviews).byte()

        # Mask out zero words
        reviews = reviews.masked_select(mask).view(-1, embd_dim)
        context = context.masked_select(mask).view(-1, embd_dim)

        # Compute batched_kernel
        kernel_input = torch.cat([reviews, context], dim=1)
        kernel_output = self.kernel_net(kernel_input)
        
        # Extract the kernel for each review from batched_kernel
        s = list(lengths.squeeze().cumsum(0).long().data - lengths.squeeze().long().data)
        e = list(lengths.squeeze().cumsum(0).long().data)

        for i, (s, e) in enumerate(zip(s, e)):
            review = reviews[s:e] # original review, without zero words
            kernel = kernel_output[s:e] # corresponding kernel 
            self.kernels.append(kernel.data)
            #vals, vecs = custom_decomp()(kernel)
            for j in range(alpha_iter):
                subset = AllInOne()(kernel)
                #subset = DPP()(vals, vecs)
                actions[i].append(subset)
                pick = subset.diag().mm(review).sum(0)
                picks[i].append(pick)

        # Predictions
        picks = torch.stack([torch.stack(pick) for pick in picks]).view(-1, embd_dim)
        preds = self.pred_net(picks).view(batch_size, alpha_iter, -1)
        
        return preds

def register_rewards(preds, targets, criterion, net):
    
    #targets = targets.unsqueeze(1).unsqueeze(1).expand_as(preds)
    targets = targets.unsqueeze(1).expand_as(preds)
    loss = criterion(preds, targets)
    
    actions = net.saved_subsets
    
    losses = ((preds - targets)**2).mean(2)
    losses = [[i.data[0] for i in row] for row in losses]
    net.saved_losses = losses # not really necessary
    baselines = [compute_baseline(i) for i in losses]
    net.saved_baselines = baselines # not really necessary
    
    for actions, rewards in zip(actions, baselines):
        for action, reward in zip(actions, rewards):
            action.reinforce(reward)

    return loss

In [5]:
# Useful Support

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    """
    This is good!
    """
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')
        
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = optimizer.state_dict()['param_groups'][0]['lr']
    lr = lr * (0.1 ** (epoch // 5))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [144]:
def train(train_loader, embd, model, criterion, optimizer, epoch, dtype):
    
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    subset_size = AverageMeter()
    
    target_dim = 3

    end = time.time()
    for i, (review, target) in enumerate(train_loader):
        
        # measure data loading time
        data_time.update(time.time() - end)
        
        targets = Variable(target[:,:target_dim].type(dtype))
        reviews = embd(Variable(review)).type(dtype)

        # compute output
        model.alpha_iter = 2
        pred = model(reviews)
        loss = register_rewards(pred, targets, criterion, model)

        ##measure accuracy and record loss ????????????????????????
        # prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], reviews.size(0))
        for l in model.saved_subsets:
            for s in l:
                subset_size.update(s.data.sum())
        # top1.update(prec1[0], input.size(0))
        # top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        #if i % args.print_freq == 0:
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'SSize {subset_size.val:.2f} ({subset_size.avg: .2f})'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, subset_size = subset_size, loss=losses))

def validate(val_loader, model, criterion):
    
    batch_time = AverageMeter()
    losses = AverageMeter()
    t_prec = AverageMeter()
    t_recall = AverageMeter()
    t_tp = AverageMeter()
    t_fp = AverageMeter()
    t_fn = AverageMeter()
    
    target_dim = 3

    # switch to evaluate mode
    # model.eval()

    end = time.time()
    for i, (review, target) in enumerate(val_loader):
        
        target = target.sum(1).sign().type(dtype).squeeze().byte()
        # targets = target[:,:target_dim,:].type(dtype)
        reviews = embd(Variable(review, volatile=True)).type(dtype)

        # compute output
        model.alpha_iter = 1
        preds = model(reviews)

        subset = model.saved_subsets[0][0]
        subset = pad_tensor(subset.data,0,0,412).byte()
        # target = targets

        # targets = target[:,:target_dim,:].type(dtype)
        reviews = embd(Variable(review, volatile=True)).type(dtype)

        # compute output
        model.alpha_iter = 1
        preds = model(reviews)
        
        subset = model.saved_subsets[0][0]
        subset = pad_tensor(subset.data,0,0,412).byte()
        # target = target[:,:target_dim,:].squeeze()

        retriev = subset.sum()
        relev = target.sum()

        tp = target.masked_select(subset).sum()
        fp = (1 - target.masked_select(subset)).sum()
        fn = (1 - subset.masked_select(target)).sum()
        t_tp.update(tp)
        t_fp.update(fp)
        t_fn.update(fn)

        if retriev: 
            prec = tp / retriev
            t_prec.update(prec)

        if relev: 
            recall = tp / relev
            t_recall.update(recall)

        # measure accuracy and record loss
        #prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        #losses.update(loss.data[0], input.size(0))
        #top1.update(prec1[0], input.size(0))
        #top5.update(prec5[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 100 == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Precision {t_prec.val:.4f} ({t_prec.avg:.4f})\t'
                  'Recall {t_recall.val:.4f} ({t_recall.avg:.4f})\t'.format(
                   i, len(val_loader), batch_time=batch_time, t_prec=t_prec, t_recall=t_recall))
            
    return t_prec.avg

In [None]:
### MAIN PROGRAMME


global best_prec1
best_prec1 = 0

# set parameters
lr = 1e-1
momentum = 0.9
weight_decay = 0.
start_epoch = 0
epochs = 1
batch_size = 20
print_freq = 10

data = '/Users/Max/data/beer_reviews/pytorch'
dtype = torch.DoubleTensor

# create model
embd = load_embd('/Users/Max/data/beer_reviews/pytorch/embeddings.pt')
model = DPP_Classifier(torch.DoubleTensor)

# define loss function (criterion) and optimizer
criterion = nn.L1Loss()

optimizer = torch.optim.SGD(model.parameters(), lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

# Data loading code
trainpath = os.path.join(data, 'aspect1_train.pt')
valpath = os.path.join(data, 'aspect1_heldout.pt')
ratpath = os.path.join(data, 'annotated.pt')

train_set = torch.load(trainpath)
val_set = torch.load(valpath)
rat_set = torch.load(ratpath)

rat_train_set = torch.load(os.path.join(data, 'annotated_common.pt'))
#train_loader = DataLoader(train_set, batch_size, shuffle=True)
#val_loader = DataLoader(val_set)
rat_train_loader = DataLoader(rat_train_set, batch_size, shuffle=True)
rat_loader = DataLoader(rat_set)

In [None]:
epochs = 20
criterion = nn.L1Loss()

for epoch in range(start_epoch, epochs):
    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(rat_train_loader, embd, model, criterion, optimizer, epoch, dtype)

    # evaluate on validation set
    prec1 = validate(rat_loader, model, criterion)

# remember best prec@1 and save checkpoint
is_best = prec1 > best_prec1
best_prec1 = max(prec1, best_prec1)
save_checkpoint({
    'epoch': epoch + 1,
    'state_dict': model.state_dict(),
    'best_prec1': best_prec1,
    'optimizer' : optimizer.state_dict(),
}, is_best)

Epoch: [0][0/50]	Time 2.490 (2.490)	Data 0.003 (0.003)	SSize 16.00 ( 20.30)Loss 0.0993 (0.0993)	
Epoch: [0][10/50]	Time 3.409 (2.320)	Data 0.001 (0.001)	SSize 14.00 ( 19.35)Loss 0.0844 (0.0835)	
Epoch: [0][20/50]	Time 3.378 (2.552)	Data 0.000 (0.006)	SSize 18.00 ( 19.47)Loss 0.0839 (0.0839)	
Epoch: [0][30/50]	Time 3.880 (2.677)	Data 0.001 (0.004)	SSize 19.00 ( 19.42)Loss 0.0829 (0.0850)	
Epoch: [0][40/50]	Time 3.254 (2.706)	Data 0.000 (0.003)	SSize 18.00 ( 19.44)Loss 0.0813 (0.0845)	
Test: [0/994]	Time 0.207 (0.207)	Precision 0.3636 (0.3636)	Recall 0.1127 (0.1127)	
Test: [100/994]	Time 0.061 (0.095)	Precision 0.0526 (0.4306)	Recall 0.0526 (0.1626)	
Test: [200/994]	Time 0.112 (0.098)	Precision 0.5652 (0.4626)	Recall 0.2500 (0.1689)	
Test: [300/994]	Time 0.054 (0.097)	Precision 0.2500 (0.4488)	Recall 0.1136 (0.1706)	
Test: [400/994]	Time 0.048 (0.094)	Precision 0.5625 (0.4495)	Recall 0.2571 (0.1749)	
Test: [500/994]	Time 0.077 (0.091)	Precision 0.6000 (0.4525)	Recall 0.1519 (0.1769)	
Tes

Epoch: [6][20/50]	Time 2.776 (2.395)	Data 0.000 (0.000)	SSize 19.00 ( 19.44)Loss 0.0705 (0.0838)	
Epoch: [6][30/50]	Time 3.299 (2.404)	Data 0.000 (0.000)	SSize 18.00 ( 19.47)Loss 0.0667 (0.0836)	
Epoch: [6][40/50]	Time 2.417 (2.342)	Data 0.000 (0.000)	SSize 18.00 ( 19.42)Loss 0.0657 (0.0837)	
Test: [0/994]	Time 0.144 (0.144)	Precision 0.4545 (0.4545)	Recall 0.1408 (0.1408)	
Test: [100/994]	Time 0.042 (0.062)	Precision 0.2500 (0.4491)	Recall 0.2632 (0.1697)	
Test: [200/994]	Time 0.139 (0.071)	Precision 0.5238 (0.4712)	Recall 0.2115 (0.1715)	
Test: [300/994]	Time 0.078 (0.074)	Precision 0.2857 (0.4588)	Recall 0.1364 (0.1744)	
Test: [400/994]	Time 0.075 (0.077)	Precision 0.3529 (0.4553)	Recall 0.1714 (0.1764)	
Test: [500/994]	Time 0.078 (0.083)	Precision 0.7391 (0.4532)	Recall 0.2152 (0.1751)	
Test: [600/994]	Time 0.063 (0.079)	Precision 0.2857 (0.4495)	Recall 0.1091 (0.1739)	
Test: [700/994]	Time 0.039 (0.077)	Precision 0.6667 (0.4521)	Recall 0.2857 (0.1752)	
Test: [800/994]	Time 0.105 (

In [None]:
import random
#word_to_ix = make_embd(embd_path, only_index_dict=True)
#ix_to_word = {ix: word for word, ix in word_to_ix.items()}

rat_set, ix_to_word
def sample_words(rat_set, model, ix_to_word):
    
    # Sample a review
    ix = random.randint(0,len(rat_set))

    # Make a prediction
    x = rat_set.data_tensor[ix].unsqueeze(0)
    review = embd(Variable(x, volatile=True)).type(dtype)
    model.alpha_iter = 1
    model(review)

    # What words were selected
    subset = model.saved_subsets[0][0]
    subset = pad_tensor(subset.data,0,0,412).byte()

    # Convert to words
    all_words = [ix_to_word[ix -1] for ix in x.squeeze() if ix > 0]
    filtered_words = [ix_to_word[ix -1] for ix in x.masked_select(subset)]
    print(" ".join(all_words) )
    print("DPP Selection: ", filtered_words)


    ix = random.randint(0,len(rat_set))
    rat_set.data_tensor[ix].unsqueeze(0)

In [150]:
   
def sample_prediction(rat_set, model):
    # Sample a review
    ix = random.randint(0,len(rat_train_set))

    # Make a prediction
    x = rat_train_set.data_tensor[ix].unsqueeze(0)
    target = rat_train_set.target_tensor[ix][:3]
    review = embd(Variable(x, volatile=True)).type(dtype)
    model.alpha_iter = 1
    pred = model(review).data.squeeze()
    print(pred, target)
    return pred, target

In [151]:
pred, target = sample_prediction(rat_set, model)


 0.7798
 0.7547
 0.7600
[torch.DoubleTensor of size 3]
 
 0.8000
 0.9000
 0.8000
[torch.FloatTensor of size 3]



In [153]:
criterion(Variable(pred), Variable(target))

TypeError: DoubleMSECriterion_updateOutput received an invalid combination of arguments - got ([32;1mint[0m, [32;1mtorch.DoubleTensor[0m, [31;1mtorch.FloatTensor[0m, [32;1mtorch.DoubleTensor[0m, [32;1mbool[0m), but expected (int state, torch.DoubleTensor input, torch.DoubleTensor target, torch.DoubleTensor output, bool sizeAverage)

In [155]:
torch.save(model.pred_net.state_dict(), 'pred_dict25.pt')