In [40]:
from dpp_nets.layers.layers import *
import torch
import torch.nn as nn
from collections import OrderedDict
import shutil
import time
import gzip
import os
import json
import numpy as np
from dpp_nets.utils.io import make_embd, make_tensor_dataset, load_tensor_dataset
from dpp_nets.utils.io import data_iterator, load_embd
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader
import time
from dpp_nets.my_torch.utilities import pad_tensor

In [41]:
## Data Sets
train_set = torch.load('/Users/Max/data/beer_reviews/pytorch/annotated_common.pt')
rat_set = torch.load('/Users/Max/data/beer_reviews/pytorch/annotated.pt')
embd = load_embd('/Users/Max/data/beer_reviews/pytorch/embeddings.pt')

In [64]:
train_path = os.path.join('data/', str.join("",['reviews.', 'aspect1']),'train.txt.gz')
train_path

'data/reviews.aspect1/train.txt.gz'

In [85]:
epoch = 2
loss = 1.38582030
str.join(" | ", ['Epoch: %d' % (epoch), 'Validation Loss: %.5f' % (loss)])

'Epoch: 2 | Validation Loss: 1.38582'

In [97]:
torch.load('checkpoint.pth.tar')

{'epoch:': 9,
 'lowest_loss': 15.40999049693346,
 'model': 'Deep Set Baseline',
 'optimizer': {'param_groups': [{'betas': (0.9, 0.999),
    'eps': 1e-08,
    'lr': 0.010000000000000002,
    'params': [4543810992,
     4543810288,
     4543810200,
     4543810024,
     4543809760,
     4543808968,
     4543811080,
     4543811168,
     4543811256,
     4543811344,
     4543811432,
     4543811520],
    'weight_decay': 0}],
  'state': {}},
 'state_dict': OrderedDict([('0.weight', 
                0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
               -0.0425 -0.0521  0.0683  ...   0.1130  0.0135  0.0482
               -0.0534 -0.0038 -0.0476  ...  -0.0365  0.0941 -0.0478
                         ...             ⋱             ...          
                0.0093 -0.0598  0.0637  ...  -0.0051  0.0202 -0.0329
                0.0317 -0.0415 -0.0221  ...   0.0125 -0.0892 -0.0764
               -0.0223 -0.0166  0.0155  ...   0.0024 -0.0372  0.0276
               [torch.FloatTensor

In [83]:
import math
n = 4
p = math.pi
'{number:.{digits}f}'.format(number=p, digits=n)

'3.1416'

In [91]:
# Parameters
batch_size = 25
_, max_set_size = train_set.data_tensor.size()
_, embd_dim = embd.weight.size()

hidden_dim = 500
enc_dim = 200
target_dim = 3 # let's choose the first three aspects to learn!

# Baseline
baseline_nets = DeepSetBaseline(embd_dim, hidden_dim, enc_dim, target_dim)
baseline = nn.Sequential(embd, baseline_nets, nn.Sigmoid())

# Model
kernel_dim = 200
kernel_net = KernelVar(embd_dim, hidden_dim, kernel_dim)
sampler = MarginalSampler()
pred_net = PredNet(embd_dim, hidden_dim, enc_dim, target_dim)
trainer = MarginalTrainer(kernel_net, sampler, pred_net)

trainer.reg = 0.1
trainer.reg_mean = 10
trainer.activation = nn.Sigmoid()

train_loader = DataLoader(train_set, batch_size, shuffle=True)

In [None]:
# Actual training loop for model

params = [{'params': trainer.kernel_net.parameters(), 'lr': 1e-3},
          {'params': trainer.pred_net.parameters(), 'lr': 1e-4}]

optimizer = torch.optim.Adam(params)
trainer.reg = 0.1

for epoch in range(10):
    for t, (review, target) in enumerate(train_loader):
        words = embd(Variable(review))
        target = Variable(target[:,:3])
        loss  = trainer(words, target)
        
        # Backpropagate + parameter updates
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not (t+1) % 10: 
            print('Loss at it :', t+1, 'is', loss.data[0])
            
            


In [None]:
# Need also a training script for RTrainer!!
# incorporate embedding into trainer


kernel_net = KernelVar(embd_dim, hidden_dim, kernel_dim)
sampler = ReinforceSampler(3)
pred_net = PredNet(embd_dim, hidden_dim, enc_dim, target_dim)

Rtrainer = ReinforceTrainer(kernel_net, sampler, pred_net)
Rtrainer.reg = 0.1
Rtrainer.reg_mean = 10
Rtrainer.activation = nn.Sigmoid()

params = [{'params': Rtrainer.kernel_net.parameters(), 'lr': 1e-3},
          {'params': Rtrainer.pred_net.parameters(), 'lr': 1e-4}]

optimizer = torch.optim.Adam(params)

Rtrainer.double()

for epoch in range(20):
    for t, (review, target) in enumerate(train_loader):
        words = embd(Variable(review)).double()
        target = Variable(target[:,:3]).double()
        loss  = Rtrainer(words, target)
        
        # Backpropagate + parameter updates
        optimizer.zero_grad()
        loss.backward()
        # print(Rtrainer.kernel_net.layer1.weight.grad)
        optimizer.step()

        if not (t+1) % 10: 
            print('Loss at it :', t+1, 'is', loss.data[0])



In [None]:
# Actual training loop for baseline
# Training
criterion = nn.MSELoss()
lr = 1e-4
optimizer = torch.optim.Adam(baseline_nets.parameters(), lr=lr)


for epoch in range(10):
    
    for t, (review, target) in enumerate(train_loader):
        target = Variable(target[:,:3])
        words = Variable(review)
        pred = baseline(words)
        loss = criterion(pred, target)

        # Backpropagate + parameter updates
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not (t+1) % 10: 
            print('Loss at it :', t+1, 'is', loss.data[0])

In [None]:
def validate_baseline(val_set, model, criterion):
    x = Variable(val_set.data_tensor, volatile=True)
    y = Variable(val_set.target_tensor[:,:3], volatile=True)
    pred = model(x)
    loss = criterion(pred, y)
    print(loss.data[0])

In [None]:
def validate_model(val_set, model):
    model.reg = 0
    x = Variable(val_set.data_tensor, volatile=True)
    x = embd(x)
    y = Variable(val_set.target_tensor[:,:3], volatile=True)
    loss = model(x, y)
    print(loss.data[0])

In [None]:
Rtrainer.float()
validate_model(train_set, Rtrainer)

In [None]:
x = Variable(train_set.data_tensor, volatile=True)
x = embd(x)
y = Variable(train_set.target_tensor[:,:3], volatile=True)

In [None]:
sampler = ReinforceSampler(1)
Rtrainer.sampler = sampler
Rtrainer.alpha_iter = 1

In [None]:
validate_baseline(train_set, baseline, nn.MSELoss())

In [None]:
x.size()

In [None]:
y.size()

In [None]:
import random
def sample(model, sampler, embd, dataset):
    rand = random.randint(0, len(dataset))
    x = dataset.data_tensor[rand:rand+2]
    x = embd(Variable(x))
    y = dataset.target_tensor[rand:rand+2]
    kernel = trainer.kernel_net(x)
    sampler.s_ix = trainer.kernel_net.s_ix
    sampler.e_ix = trainer.kernel_net.e_ix
    sampler(kernel, x)
    print(sampler.saved_subsets)

In [None]:
rand = random.randint(0, len(train_set))
x = train_set.data_tensor[rand:rand+10]
x = embd(Variable(x))
y = Variable(train_set.target_tensor[rand:rand+10,:3])
Rtrainer(x, y)

In [None]:
[i.data.sum() for l in Rtrainer.sampler.saved_subsets for i in l]

In [1]:
import argparse

In [2]:
help(argparse)

Help on module argparse:

NAME
    argparse - Command-line parsing library

MODULE REFERENCE
    https://docs.python.org/3.6/library/argparse
    
    The following documentation is automatically generated from the Python
    source files.  It may be incomplete, incorrect or include features that
    are considered implementation detail and may vary between Python
    implementations.  When in doubt, consult the module reference at the
    location listed above.

DESCRIPTION
    This module is an optparse-inspired command-line parsing library that:
    
        - handles both optional and positional arguments
        - produces highly informative usage messages
        - supports parsers that dispatch to sub-parsers
    
    The following is a simple usage example that sums integers from the
    command-line and writes the result to a file::
    
        parser = argparse.ArgumentParser(
            description='sum the integers at the command line')
        parser.add_argument(
      

In [4]:
help(argparse.ArgumentParser.add_argument)

Help on function add_argument in module argparse:

add_argument(self, *args, **kwargs)
    add_argument(dest, ..., name=value, ...)
    add_argument(option_string, option_string, ..., name=value, ...)



In [14]:
parser = argparse.ArgumentParser()
parser.add_argument('--foo')
parser.parse_args('--foo 1'.split())

parser = argparse.ArgumentParser()
parser.add_argument('--foo', action='store_const', const=42)
parser.parse_args('--foo'.split())

parser = argparse.ArgumentParser()
parser.add_argument('--foo', action='store_true')
parser.add_argument('--bar', action='store_false')
args = parser.parse_args('--foo --bar'.split())

In [15]:
args.bar

False

In [66]:
int('aspect1'[-1])

1