In [None]:
import torch
import torch.nn as nn
from collections import OrderedDict
import shutil
import time
import gzip
import os
import json
import numpy as np
from dpp_nets.utils.io import make_embd, make_tensor_dataset, load_tensor_dataset
from dpp_nets.utils.io import data_iterator, load_embd
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader
import time
from dpp_nets.my_torch.utilities import pad_tensor


root = '/Users/Max/data/beer_reviews'
data_file = 'reviews.aspect3.train.txt.gz'
embd_file = 'review+wiki.filtered.200.txt.gz'
save_path = os.path.join(root,'pytorch/aspect3_train.pt')
data_path = os.path.join(root, data_file)
embd_path = os.path.join(root, embd_file)


def read_rationales(path):
    """
    This reads the json.annotations file. 
    Creates a list of dictionaries, which holds the 994 reviews for which
    sentence-level annotations are available. 
    """
    data = []
    fopen = gzip.open if path.endswith(".gz") else open
    with fopen(path) as fin:
        for line in fin:
            item = json.loads(line)
            data.append(item)
    return data

In [None]:
from collections import defaultdict
import torch
import torch.nn as nn
from dpp_nets.my_torch.linalg import custom_decomp
from dpp_nets.my_torch.DPP import DPP
from dpp_nets.my_torch.DPP import AllInOne
from dpp_nets.my_torch.utilities import compute_baseline

class DPP_Classifier(nn.Module):
    
    def __init__(self, dtype):
        
        super(DPP_Classifier, self).__init__()
        # Float vs Double
        self.dtype = dtype

        # Network parameters
        self.kernel_in = kernel_in = 400
        self.kernel_h = kernel_h = 1000
        self.kernel_out = kernel_out = 400

        self.pred_in = pred_in = 200 # kernel_in / 2
        self.pred_h = pred_h = 500
        self.pred_h2 = pred_h2 = 200
        self.pred_out = pred_out = 3
        
        # 2-Hidden-Layer Networks 
        self.kernel_net = torch.nn.Sequential(nn.Linear(kernel_in, kernel_h), nn.ELU(),
                                              nn.Linear(kernel_h, kernel_h), nn.ELU(), 
                                              nn.Linear(kernel_h, kernel_out))
        # 3-Hidden-Layer-Networks
        self.pred_net = torch.nn.Sequential(nn.Linear(pred_in, pred_h), nn.ReLU(),
                                             nn.Linear(pred_h, pred_h), nn.ReLU(),
                                             nn.Linear(pred_h, pred_h2), nn.ReLU(),
                                             nn.Linear(pred_h2, pred_out), nn.Sigmoid())
        
        self.kernel_net.type(self.dtype)
        self.pred_net.type(self.dtype)
        
        # Sampling Parameter
        self.alpha_iter = 5

        # Convenience
        self.kernels = []
        self.subsets = None
        self.picks = None
        self.preds = None
        
        self.saved_subsets = None
        self.saved_losses = None # not really necesary
        self.saved_baselines = None # not really necessary
        
    def forward(self, reviews):
        """
        reviews: batch_size x max_set_size x embd_dim = 200
        Output: batch_size x pred_out (the prediction)
        Challenges: Need to resize tensor appropriately and 
        measure length etc. 
        """
        batch_size, max_set_size, embd_dim = reviews.size()
        alpha_iter = self.alpha_iter
        self.saved_subsets = actions = [[] for i in range(batch_size)]
        picks = [[] for i in range(batch_size)]
        
        # Create context
        lengths = reviews.sum(2).abs().sign().sum(1)
        context = (reviews.sum(1) / lengths.expand_as(reviews.sum(1))).expand_as(reviews)
        mask = reviews.sum(2).abs().sign().expand_as(reviews).byte()

        # Mask out zero words
        reviews = reviews.masked_select(mask).view(-1, embd_dim)
        context = context.masked_select(mask).view(-1, embd_dim)

        # Compute batched_kernel
        kernel_input = torch.cat([reviews, context], dim=1)
        kernel_output = self.kernel_net(kernel_input)
        
        # Extract the kernel for each review from batched_kernel
        s = list(lengths.squeeze().cumsum(0).long().data - lengths.squeeze().long().data)
        e = list(lengths.squeeze().cumsum(0).long().data)

        for i, (s, e) in enumerate(zip(s, e)):
            review = reviews[s:e] # original review, without zero words
            kernel = kernel_output[s:e] # corresponding kernel 
            self.kernels.append(kernel.data)
            #vals, vecs = custom_decomp()(kernel)
            for j in range(alpha_iter):
                subset = AllInOne()(kernel)
                #subset = DPP()(vals, vecs)
                actions[i].append(subset)
                pick = subset.diag().mm(review).sum(0)
                picks[i].append(pick)

        # Predictions
        picks = torch.stack([torch.stack(pick) for pick in picks]).view(-1, embd_dim)
        preds = self.pred_net(picks).view(batch_size, alpha_iter, -1)
        
        return preds

def register_rewards(preds, targets, criterion, net):
    
    #targets = targets.unsqueeze(1).unsqueeze(1).expand_as(preds)
    targets = targets.unsqueeze(1).expand_as(preds)
    loss = criterion(preds, targets)
    
    actions = net.saved_subsets
    
    losses = ((preds - targets)**2).mean(2)
    losses = [[i.data[0] for i in row] for row in losses]
    net.saved_losses = losses # not really necessary
    baselines = [compute_baseline(i) for i in losses]
    net.saved_baselines = baselines # not really necessary
    
    for actions, rewards in zip(actions, baselines):
        for action, reward in zip(actions, rewards):
            action.reinforce(reward)

    return loss

In [None]:
# Useful Support

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    """
    This is good!
    """
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')
        
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = optimizer.state_dict()['param_groups'][0]['lr']
    lr = lr * (0.1 ** (epoch // 5))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
def train(train_loader, embd, model, criterion, optimizer, epoch, dtype):
    
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    subset_size = AverageMeter()
    
    target_dim = 3

    end = time.time()
    for i, (review, target) in enumerate(train_loader):
        
        # measure data loading time
        data_time.update(time.time() - end)
        
        targets = Variable(target[:,:target_dim].type(dtype))
        reviews = embd(Variable(review)).type(dtype)

        # compute output
        model.alpha_iter = 2
        pred = model(reviews)
        loss = register_rewards(pred, targets, criterion, model)

        ##measure accuracy and record loss ????????????????????????
        # prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], reviews.size(0))
        for l in model.saved_subsets:
            for s in l:
                subset_size.update(s.data.sum())
        # top1.update(prec1[0], input.size(0))
        # top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        #if i % args.print_freq == 0:
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'SSize {subset_size.val:.2f} ({subset_size.avg: .2f})'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, subset_size = subset_size, loss=losses))

def validate(val_loader, model, criterion):
    
    batch_time = AverageMeter()
    losses = AverageMeter()
    t_prec = AverageMeter()
    t_recall = AverageMeter()
    t_tp = AverageMeter()
    t_fp = AverageMeter()
    t_fn = AverageMeter()
    
    target_dim = 3

    # switch to evaluate mode
    # model.eval()

    end = time.time()
    for i, (review, target) in enumerate(val_loader):
        
        target = target.sum(1).sign().type(dtype).squeeze().byte()
        # targets = target[:,:target_dim,:].type(dtype)
        reviews = embd(Variable(review, volatile=True)).type(dtype)

        # compute output
        model.alpha_iter = 1
        preds = model(reviews)

        subset = model.saved_subsets[0][0]
        subset = pad_tensor(subset.data,0,0,412).byte()
        # target = targets

        # targets = target[:,:target_dim,:].type(dtype)
        reviews = embd(Variable(review, volatile=True)).type(dtype)

        # compute output
        model.alpha_iter = 1
        preds = model(reviews)
        
        subset = model.saved_subsets[0][0]
        subset = pad_tensor(subset.data,0,0,412).byte()
        # target = target[:,:target_dim,:].squeeze()

        retriev = subset.sum()
        relev = target.sum()

        tp = target.masked_select(subset).sum()
        fp = (1 - target.masked_select(subset)).sum()
        fn = (1 - subset.masked_select(target)).sum()
        t_tp.update(tp)
        t_fp.update(fp)
        t_fn.update(fn)

        if retriev: 
            prec = tp / retriev
            t_prec.update(prec)

        if relev: 
            recall = tp / relev
            t_recall.update(recall)

        # measure accuracy and record loss
        #prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        #losses.update(loss.data[0], input.size(0))
        #top1.update(prec1[0], input.size(0))
        #top5.update(prec5[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 100 == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Precision {t_prec.val:.4f} ({t_prec.avg:.4f})\t'
                  'Recall {t_recall.val:.4f} ({t_recall.avg:.4f})\t'.format(
                   i, len(val_loader), batch_time=batch_time, t_prec=t_prec, t_recall=t_recall))
            
    return t_prec.avg

In [None]:
### MAIN PROGRAMME


global best_prec1
best_prec1 = 0

# set parameters
lr = 1e-1
momentum = 0.9
weight_decay = 0.
start_epoch = 0
epochs = 1
batch_size = 20
print_freq = 10

data = '/Users/Max/data/beer_reviews/pytorch'
dtype = torch.DoubleTensor

# create model
embd = load_embd('/Users/Max/data/beer_reviews/pytorch/embeddings.pt')
model = DPP_Classifier(torch.DoubleTensor)

# define loss function (criterion) and optimizer
criterion = nn.L1Loss()

optimizer = torch.optim.SGD(model.parameters(), lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

# Data loading code
trainpath = os.path.join(data, 'aspect1_train.pt')
valpath = os.path.join(data, 'aspect1_heldout.pt')
ratpath = os.path.join(data, 'annotated.pt')

train_set = torch.load(trainpath)
val_set = torch.load(valpath)
rat_set = torch.load(ratpath)

rat_train_set = torch.load(os.path.join(data, 'annotated_common.pt'))
#train_loader = DataLoader(train_set, batch_size, shuffle=True)
#val_loader = DataLoader(val_set)
rat_train_loader = DataLoader(rat_train_set, batch_size, shuffle=True)
rat_loader = DataLoader(rat_set)

In [None]:
epochs = 20
criterion = nn.L1Loss()

for epoch in range(start_epoch, epochs):
    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(rat_train_loader, embd, model, criterion, optimizer, epoch, dtype)

    # evaluate on validation set
    prec1 = validate(rat_loader, model, criterion)

# remember best prec@1 and save checkpoint
is_best = prec1 > best_prec1
best_prec1 = max(prec1, best_prec1)
save_checkpoint({
    'epoch': epoch + 1,
    'state_dict': model.state_dict(),
    'best_prec1': best_prec1,
    'optimizer' : optimizer.state_dict(),
}, is_best)

In [None]:
import random
#word_to_ix = make_embd(embd_path, only_index_dict=True)
#ix_to_word = {ix: word for word, ix in word_to_ix.items()}

rat_set, ix_to_word
def sample_words(rat_set, model, ix_to_word):
    
    # Sample a review
    ix = random.randint(0,len(rat_set))

    # Make a prediction
    x = rat_set.data_tensor[ix].unsqueeze(0)
    review = embd(Variable(x, volatile=True)).type(dtype)
    model.alpha_iter = 1
    model(review)

    # What words were selected
    subset = model.saved_subsets[0][0]
    subset = pad_tensor(subset.data,0,0,412).byte()

    # Convert to words
    all_words = [ix_to_word[ix -1] for ix in x.squeeze() if ix > 0]
    filtered_words = [ix_to_word[ix -1] for ix in x.masked_select(subset)]
    print(" ".join(all_words) )
    print("DPP Selection: ", filtered_words)


    ix = random.randint(0,len(rat_set))
    rat_set.data_tensor[ix].unsqueeze(0)

In [None]:
   
def sample_prediction(rat_set, model):
    # Sample a review
    ix = random.randint(0,len(rat_train_set))

    # Make a prediction
    x = rat_train_set.data_tensor[ix].unsqueeze(0)
    target = rat_train_set.target_tensor[ix][:3]
    review = embd(Variable(x, volatile=True)).type(dtype)
    model.alpha_iter = 1
    pred = model(review).data.squeeze()
    print(pred, target)
    return pred, target

In [None]:
pred, target = sample_prediction(rat_set, model)

In [None]:
criterion(Variable(pred), Variable(target))

In [None]:
torch.save(model.pred_net.state_dict(), 'pred_dict25.pt')

In [None]:
import torch

e = 0
for i in range(100):
    v = torch.normal(torch.FloatTensor([1,2,3,4,5]))
    e += v
e / 100

In [None]:
non_lin = torch.sin
torch.sin(v)

In [None]:
non_lin(v)

In [None]:
non_lin

In [None]:
batch_size = 2
set_size = 3
embd_dim = 4
words = torch.randn(batch_size, set_size, embd_dim)

In [None]:
v = torch.normal(torch.FloatTensor([1,2,3,4,5])torch.cos(torch.sin(words).mean(1)).squeeze()

In [None]:
v = torch.normal(torch.FloatTensor([1,2,3,4,5]))
torch.log(v)

In [None]:
import numpy as np
batch_size = 100
n_clusters = 10
set_size = 40
embd_dim = pred_in = 50
dtype = dtype = torch.DoubleTensor
np.random.seed(0)
means = dtype(np.random.randint(-50,50,[n_clusters, int(pred_in)]).astype("float"))

def generate(batch_size):
    """sdf"
    Arguments:
    means: Probs best to make this an attribute of the class, 
    so that repeated training works with the same data distribution.


    """


    # Generate index
    index = torch.cat([torch.arange(0, float(n_clusters)).expand(batch_size, n_clusters).long(), 
                      torch.multinomial(torch.ones(batch_size, n_clusters), set_size - n_clusters, replacement=True)]
                     ,dim=1)
    index = index.t()[torch.randperm(set_size)].t().contiguous()

    # Generate words, context, target
    words = dtype(torch.normal(means.index_select(0,index.view(index.numel()))).view(batch_size, set_size, embd_dim))
    context = dtype(words.sum(1).expand_as(words))

    target = torch.sin(torch.pow(words.abs(),2).mean(1)).squeeze()

    return words, context, target

In [None]:
words, context, target = generate(5)
print(target)

In [None]:
(torch.std(target, dim=0) / torch.mean(target, dim=0)).mean()

In [None]:
target

In [None]:
v1 = torch.randn(2,2)
v2 = torch.randn(2,2)
v3 = torch.randn(2,2)
v4 = torch.randn(2,2)
v5 = torch.randn(2,2)
v6 = torch.randn(2,2)


In [None]:
import torch.nn as nn
nn.MSELoss()

In [None]:
from dpp_nets.my_torch.simulator import SimKDPPDeepSet
import torch
network_params = {'set_size': 40, 'n_clusters': 10}
dtype = torch.DoubleTensor
sim = SimKDPPDeepSet(network_params, dtype)

In [None]:
mod = torch.nn.Sequential(nn.Linear(10,20), nn.ReLU(), nn.Linear(20,10))

In [None]:
for mod in mod.modules():
    print(mod)

In [None]:
A = Variable(torch.randn(10,20))

In [None]:
mod(A)

In [None]:
batch_size = 3
set_size = 4
embd_dim = 5
words = Variable(torch.randn(batch_size, set_size, embd_dim))
print(words)
subset = Variable(torch.ByteTensor([1,0,0,1]),requires_grad=True)
words[1].masked_select(Variable(subset.data.expand_as(words[1].t())).t()).view(-1,embd_dim)

In [3]:
from dpp_nets.layers.layers import *

In [5]:
embd_dim, hidden_dim, enc_dim, target_dim = 10, 20, 10, 2
baseline = DeepSetBaseline(embd_dim, hidden_dim, enc_dim, target_dim)
batch_size = 4
max_set_size = 7
x = Variable(torch.randn(batch_size, max_set_size, embd_dim))
pred = baseline(x)
targets = Variable(torch.randn(batch_size, target_dim))
criterion = nn.MSELoss()
loss = criterion(pred, targets)
loss.backward()
print(baseline.enc_layer2.weight.grad)

torch.Size([4, 10])
Variable containing:

Columns 0 to 9 
1.00000e-02 *
 -0.5300 -0.7917 -0.6452 -1.4612 -0.5058 -1.2006 -2.1787 -2.1236 -0.2686 -1.8842
  0.0000  0.0000 -0.1811  0.0000  0.0000  0.0000 -0.1205 -0.0070  0.0000 -0.0515
 -0.0227 -0.0506  0.0000 -0.2353 -0.1385 -0.0243  0.0491  0.0257  0.0000 -0.1554
  0.0347  0.0884  0.0000  0.3013  0.1799  0.5259  0.7815  0.5997  0.0063  0.5795
  0.0925  0.1635  0.0052  0.0660  0.0037  0.0513  0.0426  0.2012  0.0783  0.0565
 -0.5808 -1.4151 -0.0097 -1.9906 -0.6326 -1.1522 -1.3508 -2.0787 -0.6067 -1.2313
  0.5403  0.9329  0.5399  1.7154  0.4312  1.2229  2.0423  2.1862  0.1797  1.9186
 -0.0347 -0.0023 -0.0535 -0.0018  0.0255 -0.0330 -0.0577  0.0477  0.0363  0.0564
  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0716 -0.0007 -0.0414 -0.0732  0.0192  0.0011 -0.0176 -0.0481  0.0712 -0.2228
 -0.2158 -0.4081 -0.0718 -0.2687 -0.1963 -0.1647 -0.5586 -1.0627 -0.2558 -0.7300
 -0.4459 -0.5748 -0.0900 -0.6765 -0.2