In [2]:
import os
import sys
import time
import math
import random
import argparse
import json
import pickle as pkl
import numpy as np
import torch.utils.data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from search import search, search_fast
from utils import to_gpu, Corpus, batchify, SNLIDataset, train_ngram_lm, get_ppl, load_ngram_lm, get_delta, collate_snli
from models import Seq2Seq, MLP_D, MLP_G, MLP_I, MLP_I_AE, JSDistance, Seq2SeqCAE, Baseline_Embeddings, Baseline_LSTM

In [3]:
print(torch.__version__)

1.0.1.post2


In [4]:
parser = argparse.ArgumentParser(description='Generating Natural Adversaries for Text')

# Path Arguments
parser.add_argument('--data_path', type=str, default='./data',
                    help='path to data corpus ./data')
parser.add_argument('--classifier_path', type=str, default='./models',
                    help='path to classifier files ./models')
parser.add_argument('--kenlm_path', type=str, default='./models/kenlm',
                    help='path to kenlm directory')
parser.add_argument('--outf', type=str, default='',
                    help='output directory name')

# Data Processing Arguments
parser.add_argument('--vocab_size', type=int, default=11000,
                    help='cut vocabulary down to this size (most frequently seen in training)')
parser.add_argument('--maxlen', type=int, default=10,
                    help='maximum sentence length')
parser.add_argument('--lowercase', type=bool, default=True,
                    help='lowercase all text')
parser.add_argument('--packed_rep', type=bool, default=False,
                    help='pad all sentences to fixed maxlen')

# Model Arguments
parser.add_argument('--emsize', type=int, default=300,
                    help='size of word embeddings')
parser.add_argument('--nhidden', type=int, default=300,
                    help='number of hidden units per layer in LSTM')
parser.add_argument('--nlayers', type=int, default=1,
                    help='number of layers')
parser.add_argument('--noise_radius', type=float, default=0.2,
                    help='stdev of noise for autoencoder (regularizer)')
parser.add_argument('--noise_anneal', type=float, default=0.995,
                    help='anneal noise_radius exponentially by this every 100 iterations')
parser.add_argument('--hidden_init', action='store_true',
                    help="initialize decoder hidden state with encoder's")
parser.add_argument('--arch_i', type=str, default='300-300',
                    help='inverter architecture (MLP)')
parser.add_argument('--arch_g', type=str, default='300-300',
                    help='generator architecture (MLP)')
parser.add_argument('--arch_d', type=str, default='300-300',
                    help='critic/discriminator architecture (MLP)')
parser.add_argument('--arch_conv_filters', type=str, default='500-700-1000',
                    help='encoder filter sizes for different convolutional layers')
parser.add_argument('--arch_conv_strides', type=str, default='1-2-2',
                    help='encoder strides for different convolutional layers')
parser.add_argument('--arch_conv_windows', type=str, default='3-3-3',
                    help='encoder window sizes for different convolutional layers')
parser.add_argument('--z_size', type=int, default=100,
                    help='dimension of random noise z to feed into generator')
parser.add_argument('--temp', type=float, default=1,
                    help='softmax temperature (lower --> more discrete)')
parser.add_argument('--enc_grad_norm', type=bool, default=True,
                    help='norm code gradient from critic->encoder')
parser.add_argument('--gan_toenc', type=float, default=-0.01,
                    help='weight factor passing gradient from gan to encoder')
parser.add_argument('--dropout', type=float, default=0.0,
                    help='dropout applied to layers (0 = no dropout)')
parser.add_argument('--useJS', type=bool, default=True,
                    help='use Jenson Shannon distance')
parser.add_argument('--perturb_z', type=bool, default=True,
                    help='perturb noise space z instead of hidden c')

# Training Arguments
parser.add_argument('--epochs', type=int, default=15,
                    help='maximum number of epochs')
parser.add_argument('--min_epochs', type=int, default=20,
                    help="minimum number of epochs to train for")
parser.add_argument('--no_earlystopping', action='store_true',
                    help="won't use KenLM for early stopping")
parser.add_argument('--patience', type=int, default=5,
                    help="language model evaluations w/o ppl improvement before early stopping")
parser.add_argument('--batch_size', type=int, default=1, metavar='N',
                    help='batch size')
parser.add_argument('--niters_ae', type=int, default=1,
                    help='number of autoencoder iterations in training')
parser.add_argument('--niters_gan_d', type=int, default=5,
                    help='number of discriminator iterations in training')
parser.add_argument('--niters_gan_g', type=int, default=1,
                    help='number of generator iterations in training')
parser.add_argument('--niters_inv', type=int, default=5,
                    help='number of inverter iterations in training')
parser.add_argument('--niters_gan_schedule', type=str, default='2-4-6',
                    help='epochs to increase GAN training iterations (increase by 1 each time)')
parser.add_argument('--lr_ae', type=float, default=1,
                    help='autoencoder learning rate')
parser.add_argument('--lr_inv', type=float, default=1e-05,
                    help='inverter learning rate')
parser.add_argument('--lr_gan_g', type=float, default=5e-05,
                    help='generator learning rate')
parser.add_argument('--lr_gan_d', type=float, default=1e-05,
                    help='critic/discriminator learning rate')
parser.add_argument('--beta1', type=float, default=0.9,
                    help='beta1 for adam. default=0.9')
parser.add_argument('--clip', type=float, default=1,
                    help='gradient clipping, max norm')
parser.add_argument('--gan_clamp', type=float, default=0.01,
                    help='WGAN clamp')
parser.add_argument('--convolution_enc', action='store_true', default=True,
                    help='use convolutions in encoder')
parser.add_argument('--use_inv_ae', action='store_true', default=False,
                    help='use encoder->inv->gen->dec')
parser.add_argument('--update_base', action='store_true', default=True,
                    help='updating base models')
parser.add_argument('--load_pretrained', type=str, default=None,
                    help='load a pre-trained encoder and decoder to train the inverter')
parser.add_argument('--reload_exp', type=str, default=None,
                    help='resume a previous experiment')

# Evaluation Arguments
parser.add_argument('--sample', action='store_true',
                    help='sample when decoding for generation')
parser.add_argument('--N', type=int, default=5,
                    help='N-gram order for training n-gram language model')
parser.add_argument('--log_interval', type=int, default=200,
                    help='interval to log autoencoder training results')

# Other
parser.add_argument('--seed', type=int, default=1111,
                    help='random seed')
parser.add_argument('--cuda', action='store_true', default=True,
                    help='use CUDA')
parser.add_argument('--debug_mode', action='store_true', default=False,
                    help='debug mode to not create a new dir')
parser.add_argument('--hybrid', type=bool, default=False,
                    help='performs hybrid search')

args = parser.parse_args(args=[])

In [5]:
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.device(0)
    print("using cuda device gpu:" + format(torch.cuda.current_device()))
    torch.cuda.manual_seed(args.seed)

using cuda device gpu:0


In [6]:
if args.debug_mode:
    args.outf = "debug"
elif args.reload_exp:
    args.outf = args.reload_exp
else:
    args.outf = str(int(time.time()))

In [7]:
args.load_pretrained = '1556583694'

In [8]:
# make output directory if it doesn't already exist
if not os.path.isdir('./output'):
    os.makedirs('./output')
if not os.path.isdir('./output/{}'.format(args.outf)):
    os.makedirs('./output/{}'.format(args.outf))
    os.makedirs('./output/{}'.format(args.outf + "/models"))
print("Saving into directory ./output/{0}".format(args.outf))

if args.reload_exp:
    cur_dir = './output/{}'.format(args.reload_exp)
    print("Loading previous experiment from " + cur_dir)
elif args.load_pretrained:
    cur_dir = './output/{}'.format(args.load_pretrained)
    print("Loading pretrained models from " + cur_dir)
else:
    cur_dir = './output/{}'.format(args.outf)
    print("Creating new experiment at " + cur_dir)

Saving into directory ./output/1556975304
Loading pretrained models from ./output/1556583694


In [9]:
###############################################################################
# Load data and target classifiers
###############################################################################

# create corpus
if args.reload_exp or args.load_pretrained:
    corpus = Corpus(args.data_path,
                    maxlen=args.maxlen,
                    vocab_size=args.vocab_size,
                    lowercase=args.lowercase,
                    load_vocab=cur_dir + '/vocab.json')
else:
    corpus = Corpus(args.data_path,
                    maxlen=args.maxlen,
                    vocab_size=args.vocab_size,
                    lowercase=args.lowercase)

if not args.convolution_enc:
    args.packed_rep = True

Finished loading vocabulary from ./output/1556583694/vocab.json

Number of sentences dropped from ./data/train.txt: 270949 out of 714667 total
Number of sentences dropped from ./data/test.txt: 5481 out of 13323 total


In [10]:
train_data = batchify(corpus.train, args.batch_size, args.maxlen,
                      packed_rep=args.packed_rep, shuffle=True)
valid_data = batchify(corpus.test, args.batch_size, args.maxlen,
                      packed_rep=args.packed_rep, shuffle=False)

corpus_test = SNLIDataset(train=False, vocab_size=args.vocab_size+4,
                          reset_vocab=corpus.dictionary.word2idx)
testloader = torch.utils.data.DataLoader(corpus_test, batch_size=10,
                                         collate_fn=collate_snli, shuffle=False)
test_data = iter(testloader)        # different format from train_data and valid_data

classifier1 = Baseline_Embeddings(100, vocab_size=args.vocab_size+4)
classifier1.load_state_dict(torch.load(args.classifier_path + "/baseline/model_emb.pt"))
vocab_classifier1 = pkl.load(open(args.classifier_path + "/vocab.pkl", 'rb'))

classifier2 = Baseline_LSTM(100, 300, maxlen=10, gpu=args.cuda)
classifier2.load_state_dict(torch.load(args.classifier_path + "/baseline/model_lstm.pt"))
vocab_classifier2 = pkl.load(open(args.classifier_path + "/vocab.pkl", 'rb'))

print("Loaded data and target classifiers!")

Number of sentences dropped from ./data/classifier/test.txt: 8288 out of 9824 total
LSTM(100, 300, batch_first=True)
Loaded data and target classifiers!


In [11]:
cur_dir

'./output/1556583694'

In [12]:
###############################################################################
# Build the models
###############################################################################
ntokens = len(corpus.dictionary.word2idx)
args.ntokens = ntokens
print("Vocabulary Size: {}".format(ntokens))

Vocabulary Size: 11004


In [13]:
inverter = torch.load(open('output/1556583694/' + '/models/inverter_model.pt','rb'))

In [14]:
if args.reload_exp or args.load_pretrained:
    autoencoder = torch.load(open(cur_dir + '/models/autoencoder_model.pt', 'rb'))
    gan_gen = torch.load(open(cur_dir + '/models/gan_gen_model.pt', 'rb'))
    gan_disc = torch.load(open(cur_dir + '/models/gan_disc_model.pt', 'rb'))
    with open(cur_dir + '/vocab.json', 'r') as f:
        corpus.dictionary.word2idx = json.load(f)

    if args.load_pretrained:
        inverter = MLP_I(args.nhidden, args.z_size, args.arch_i, gpu=args.cuda)
    else:
        inverter = torch.load(open(cur_dir + '/models/inverter_model.pt','rb'))
else:
    if args.convolution_enc:
        autoencoder = Seq2SeqCAE(emsize=args.emsize,
                                 nhidden=args.nhidden,
                                 ntokens=ntokens,
                                 nlayers=args.nlayers,
                                 noise_radius=args.noise_radius,
                                 hidden_init=args.hidden_init,
                                 dropout=args.dropout,
                                 conv_layer=args.arch_conv_filters,
                                 conv_windows=args.arch_conv_windows,
                                 conv_strides=args.arch_conv_strides,
                                 gpu=args.cuda)
    else:
        autoencoder = Seq2Seq(emsize=args.emsize,
                              nhidden=args.nhidden,
                              ntokens=ntokens,
                              nlayers=args.nlayers,
                              noise_radius=args.noise_radius,
                              hidden_init=args.hidden_init,
                              dropout=args.dropout,
                              gpu=args.cuda)
    inverter = MLP_I_AE(ninput=args.nhidden, noutput=args.z_size, layers=args.arch_i)
    gan_gen = MLP_G(ninput=args.z_size, noutput=args.nhidden, layers=args.arch_g)
    gan_disc = MLP_D(ninput=args.nhidden, noutput=1, layers=args.arch_d)
    # dumping vocabulary
    with open('./output/{}/vocab.json'.format(args.outf), 'w') as f:
        # json.dump(corpus.dictionary.word2idx, f, encoding='utf-8')
        json.dump(corpus.dictionary.word2idx, f)



In [20]:
print(autoencoder)
print(inverter)
print(gan_gen)
print(gan_disc)

Seq2SeqCAE(
  (embedding): Embedding(11004, 300)
  (embedding_decoder): Embedding(11004, 300)
  (encoder): Sequential(
    (layer-1): Conv1d(300, 500, kernel_size=(3,), stride=(1,))
    (bn-1): BatchNorm1d(500, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation-1): LeakyReLU(negative_slope=0.2, inplace)
    (layer-2): Conv1d(500, 700, kernel_size=(3,), stride=(2,))
    (bn-2): BatchNorm1d(700, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation-2): LeakyReLU(negative_slope=0.2, inplace)
    (layer-3): Conv1d(700, 1000, kernel_size=(3,), stride=(2,))
    (bn-3): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation-3): LeakyReLU(negative_slope=0.2, inplace)
  )
  (linear): Linear(in_features=1000, out_features=300, bias=True)
  (decoder): LSTM(600, 300, batch_first=True)
  (linear_dec): Linear(in_features=300, out_features=11004, bias=True)
)
MLP_I(
  (layer1): Linear(in_features=300, ou

In [15]:
optimizer_ae = optim.SGD(autoencoder.parameters(),
                         lr=args.lr_ae)
optimizer_inv = optim.Adam(inverter.parameters(),
                           lr=args.lr_inv, betas=(args.beta1, 0.999))
optimizer_gan_g = optim.Adam(gan_gen.parameters(),
                             lr=args.lr_gan_g, betas=(args.beta1, 0.999))
optimizer_gan_d = optim.Adam(gan_disc.parameters(),
                             lr=args.lr_gan_d, betas=(args.beta1, 0.999))

criterion_ce = nn.CrossEntropyLoss()
criterion_mse = nn.MSELoss()
criterion_js = JSDistance()

if args.cuda:
    autoencoder = autoencoder.cuda()
    inverter = inverter.cuda()
    gan_gen = gan_gen.cuda()
    gan_disc = gan_disc.cuda()
    criterion_ce = criterion_ce.cuda()
    classifier1 = classifier1.cuda()
    classifier2 = classifier2.cuda()
else:
    autoencoder.gpu = False
    autoencoder = autoencoder.cpu()
    inverter = inverter.cpu()
    gan_gen = gan_gen.cpu()
    gan_disc = gan_disc.cpu()
    classifier1.cpu()
    classifier2.cpu()

In [16]:
# schedule of increasing GAN training loops
if args.niters_gan_schedule != "":
    gan_schedule = [int(x) for x in args.niters_gan_schedule.split("-")]
else:
    gan_schedule = []
niter_gan = 1

fixed_noise = to_gpu(args.cuda, Variable(torch.ones(args.batch_size, args.z_size)))
fixed_noise.data.normal_(0, 1)
one = to_gpu(args.cuda, torch.FloatTensor([1]))
mone = one * -1

impatience = 0
all_ppl = []
best_ppl = None

In [17]:
fixed_noise.shape

torch.Size([1, 100])

In [18]:
total_loss_ae = 0
epoch_start_time = time.time()
start_time = time.time()
niter = 0
niter_global = 1

In [18]:
def train_ae(batch, total_loss_ae, start_time, i,
             args, autoencoder, optimizer_ae, criterion_ce, n_train_data, epoch):
    autoencoder.train()
    autoencoder.zero_grad()

    source, target, lengths = batch
    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))

    # Create sentence length mask over padding
    mask = target.gt(0)
    masked_target = target.masked_select(mask)
    # examples x ntokens
    output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

    # output: batch x seq_len x ntokens
    output = autoencoder(source, lengths, noise=True)

    # output_size: batch_size, maxlen, self.ntokens
    flattened_output = output.view(-1, ntokens)

    masked_output = flattened_output.masked_select(output_mask).view(-1, ntokens)
    loss = criterion_ce(masked_output/args.temp, masked_target)
    loss.backward()

    # `clip_grad_norm` to prevent exploding gradient in RNNs / LSTMs
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip)
    optimizer_ae.step()

    total_loss_ae += loss.data

    accuracy = None
    if i % args.log_interval == 0 and i > 0:
        # accuracy
        probs = F.softmax(masked_output)
        max_vals, max_indices = torch.max(probs, 1)
        # accuracy = torch.mean(max_indices.eq(masked_target).float()).data[0]
        accuracy = torch.mean(max_indices.eq(masked_target).float()).data.item()

        # cur_loss = total_loss_ae[0] / args.log_interval
        cur_loss = total_loss_ae.item() / args.log_interval
        elapsed = time.time() - start_time
        print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
              'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}'
              .format(epoch, i, n_train_data,
                      elapsed * 1000 / args.log_interval,
                      cur_loss, math.exp(cur_loss), accuracy))

        with open("./output/{}/logs.txt".format(args.outf), 'a') as f:
            f.write('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}\n'.
                    format(epoch, i, n_train_data,
                           elapsed * 1000 / args.log_interval,
                           cur_loss, math.exp(cur_loss), accuracy))

        total_loss_ae = 0
        start_time = time.time()

    return total_loss_ae, start_time

In [None]:
train_ae(train_data[niter], total_loss_ae, start_time, niter,
                             args, autoencoder, optimizer_ae, criterion_ce,
                             len(train_data), epoch)

In [19]:
batch = train_data[0]
autoencoder.eval()

source, target, lengths = batch
source = to_gpu(args.cuda, Variable(source))
target = to_gpu(args.cuda, Variable(target))

# Create sentence length mask over padding
mask = target.gt(0)
masked_target = target.masked_select(mask)
# examples x ntokens
output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

output = autoencoder(source, lengths, noise=True)

# output_size: batch_size, maxlen, self.ntokens
flattened_output = output.view(-1, ntokens)

masked_output = flattened_output.masked_select(output_mask).view(-1, ntokens)

#loss = criterion_ce(masked_output/args.temp, masked_target)

In [20]:
masked_output.shape

torch.Size([8, 11004])

In [21]:
masked_output[0]

tensor([ 0.1716, -0.3986, -3.7944,  ..., -0.7063, -0.8574,  0.8694],
       device='cuda:0', grad_fn=<SelectBackward>)

In [22]:
masked_target

tensor([ 9754,  5586,  4885, 10527,  3733,  8851,    22,     2],
       device='cuda:0')

In [23]:
probs = F.softmax(masked_output)
max_vals, max_indices = torch.max(probs, 1)

  """Entry point for launching an IPython kernel.


In [24]:
probs.shape

torch.Size([8, 11004])

In [25]:
max_vals

tensor([1.0000, 1.0000, 1.0000, 0.9999, 0.9989, 1.0000, 1.0000, 0.9986],
       device='cuda:0', grad_fn=<MaxBackward0>)

In [26]:
#直接用autoencoder enc -> dec 得到的句子， 中间的hidden是noise=True (用了hidden+noise)
#对应的概率矩阵是probs, 对应的max概率是max_vals
max_indices
print(' '.join([corpus.dictionary.idx2word[w.item()] for w in max_indices]))

the man is waiting for someone . <eos>


In [27]:
#原句
print(' '.join([corpus.dictionary.idx2word[w.item()] for w in masked_target]))

the man is waiting for someone . <eos>


In [28]:
def forward(self, indices, lengths, noise, encode_only=False, generator=None, inverter=None):
    if not generator:   # only enc -> dec
        batch_size, maxlen = indices.size()
        self.embedding.weight.data[0].fill_(0)
        self.embedding_decoder.weight.data[0].fill_(0)
        hidden = self.encode(indices, lengths, noise)
        if encode_only:
            return hidden

        if hidden.requires_grad:
            hidden.register_hook(self.store_grad_norm)

        decoded = self.decode(hidden, batch_size, maxlen,
                          indices=indices, lengths=lengths)
    else:               # enc -> inv -> gen -> dec
        batch_size, maxlen = indices.size()
        self.embedding.weight.data[0].fill_(0)
        self.embedding_decoder.weight.data[0].fill_(0)
        hidden = self.encode(indices, lengths, noise)
        if encode_only:
            return hidden

        if hidden.requires_grad:
            hidden.register_hook(self.store_grad_norm)

        z_hat = inverter(hidden)
        c_hat = generator(z_hat)

        decoded = self.decode(c_hat, batch_size, maxlen,
                          indices=indices, lengths=lengths)

    return decoded

In [None]:
def encode(self, indices, lengths, noise):
    embeddings = self.embedding(indices)
    embeddings = embeddings.transpose(1,2)
    c_pre_lin = self.encoder(embeddings)
    c_pre_lin = c_pre_lin.squeeze(2)
    hidden = self.linear(c_pre_lin)
    # normalize to unit ball (l2 norm of 1) - p=2, dim=1
    norms = torch.norm(hidden, 2, 1)
    if norms.ndimension()==1:
        norms=norms.unsqueeze(1)
    hidden = torch.div(hidden, norms.expand_as(hidden))

    if noise and self.noise_radius > 0:
        gauss_noise = torch.normal(mean=torch.zeros(hidden.size()),
                                   std=self.noise_radius)
        if self.gpu:
            gauss_noise = gauss_noise.cuda()

        hidden = hidden + to_gpu(self.gpu, Variable(gauss_noise))

    return hidden

In [None]:
def generate(self, hidden, maxlen, sample=True, temp=1.0):
    """Generate through decoder; no backprop"""
    if hidden.ndimension() == 1:
        hidden = hidden.unsqueeze(0)
    batch_size = hidden.size(0)

    if self.hidden_init:
        # initialize decoder hidden state to encoder output
        state = (hidden.unsqueeze(0), self.init_state(batch_size))
    else:
        state = self.init_hidden(batch_size)

    if not self.gpu:
        self.start_symbols = self.start_symbols.cpu()
    else:
        self.start_symbols = self.start_symbols.cuda()
    # <sos>
    self.start_symbols.data.resize_(batch_size, 1)
    self.start_symbols.data.fill_(1)

    embedding = self.embedding_decoder(self.start_symbols)
    inputs = torch.cat([embedding, hidden.unsqueeze(1)], 2)

    # unroll
    all_indices = []
    for i in range(maxlen):
        output, state = self.decoder(inputs, state)
        overvocab = self.linear_dec(output.squeeze(1))

        if not sample:
            vals, indices = torch.max(overvocab, 1)
        else:
            # sampling
            probs = F.softmax(overvocab/temp)
            indices = torch.multinomial(probs, 1)

        if indices.ndimension()==1:
            indices = indices.unsqueeze(1)
        all_indices.append(indices)

        embedding = self.embedding_decoder(indices)
        inputs = torch.cat([embedding, hidden.unsqueeze(1)], 2)

    max_indices = torch.cat(all_indices, 1)

    return max_indices

In [29]:
indices = source
encode_only=False
generator=None
inverter=None

In [30]:
indices.shape

torch.Size([1, 10])

In [31]:
self = autoencoder
# enc -> dec
batch_size, maxlen = indices.size()
self.embedding.weight.data[0].fill_(0)
self.embedding_decoder.weight.data[0].fill_(0)
hidden_withnoise = self.encode(indices, lengths, noise=True)
hidden_nonoise = self.encode(indices,lengths,noise=False)

if hidden_withnoise.requires_grad:
    hidden_withnoise.register_hook(self.store_grad_norm)
if hidden_nonoise.requires_grad:
    hidden_nonoise.register_hook(self.store_grad_norm)

In [32]:
hidden_withnoise.shape

torch.Size([1, 300])

In [33]:
output.shape

torch.Size([1, 10, 11004])

In [34]:
#enc-> generate
max_indices_generated_withnoise_temp1 = autoencoder.generate(hidden_withnoise, args.maxlen, sample=True, temp=1.0)
max_indices_generated_withnoise_temp15 = autoencoder.generate(hidden_withnoise, args.maxlen, sample=True, temp=1.5)
max_indices_generated_withnoise_temp05 = autoencoder.generate(hidden_withnoise, args.maxlen, sample=True, temp=0.5)
max_indices_generated_nonoise_temp1 = autoencoder.generate(hidden_nonoise, args.maxlen, sample=True, temp=1.0)
max_indices_generated_nonoise_temp15 = autoencoder.generate(hidden_nonoise, args.maxlen, sample=True, temp=1.5)
max_indices_generated_nonoise_temp05 = autoencoder.generate(hidden_nonoise, args.maxlen, sample=True, temp=0.5)
max_indices_generated_withnoise = autoencoder.generate(hidden_withnoise, args.maxlen, sample=False, temp=1)
max_indices_generated_nonoise = autoencoder.generate(hidden_nonoise, args.maxlen, sample=False, temp=1)

  probs = F.softmax(overvocab/temp)


In [35]:
inverter = torch.load(open('output/1556583694/' + '/models/inverter_model.pt','rb'))

In [36]:
#enc->inv->gen->dec
inverter.eval()
gan_gen.eval()
inv_noise_withnoise = inverter(hidden_withnoise)
inv_noise_nonoise = inverter(hidden_nonoise)
inv_hidden_withnoise = gan_gen(inv_noise_withnoise)
inv_hidden_nonoise = gan_gen(inv_noise_nonoise)

In [37]:
#inv hidden
autoencoder.eval()
max_indices_invhidden_generated_withnoise_temp1 = autoencoder.generate(inv_hidden_withnoise, args.maxlen, sample=True, temp=1.0)
max_indices_invhidden_generated_withnoise_temp15 = autoencoder.generate(inv_hidden_withnoise, args.maxlen, sample=True, temp=1.5)
max_indices_invhidden_generated_withnoise_temp05 = autoencoder.generate(inv_hidden_withnoise, args.maxlen, sample=True, temp=0.5)
max_indices_invhidden_generated_nonoise_temp1 = autoencoder.generate(inv_hidden_nonoise, args.maxlen, sample=True, temp=1.0)
max_indices_invhidden_generated_nonoise_temp15 = autoencoder.generate(inv_hidden_nonoise, args.maxlen, sample=True, temp=1.5)
max_indices_invhidden_generated_nonoise_temp05 = autoencoder.generate(inv_hidden_nonoise, args.maxlen, sample=True, temp=0.5)
max_indices_invhidden_generated_withnoise = autoencoder.generate(inv_hidden_withnoise, args.maxlen, sample=False, temp=1)
max_indices_invhidden_generated_nonoise = autoencoder.generate(inv_hidden_nonoise, args.maxlen, sample=False, temp=1)

In [38]:
#原始的句子
' '.join([corpus.dictionary.idx2word[x.item()] for x in indices[0]])

'<sos> the man is waiting for someone . <pad> <pad>'

In [39]:
#enc->dec
#直接用autoencoder加原句得到的句子(取argmax)， 中间的hidden是noise=True (用了hidden+noise)
#对应的概率矩阵是probs, 对应的max概率是max_vals
print(' '.join([corpus.dictionary.idx2word[w.item()] for w in max_indices]))

the man is waiting for someone . <eos>


In [40]:
# enc->gen
#用hidden generate出的句子
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_generated_withnoise_temp1[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_generated_withnoise_temp15[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_generated_withnoise_temp05[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_generated_nonoise_temp1[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_generated_nonoise_temp15[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_generated_nonoise_temp05[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_generated_withnoise[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_generated_nonoise[0]]))

the man is waiting for someone . <eos> <eos> man
the man is waiting wore peoples . <eos> <eos> the
the man is waiting for someone . <eos> <eos> man
the man is waiting for someone . <eos> <eos> <eos>
the man is waiting for someone . <eos> <eos> <eos>
the man is waiting for someone . <eos> <eos> man
the man is waiting for someone . <eos> <eos> man
the man is waiting for someone . <eos> <eos> man


In [41]:
#enc->inv->gan_gen->gen
#用inv generate出的句子
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_invhidden_generated_withnoise_temp1[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_invhidden_generated_withnoise_temp15[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_invhidden_generated_withnoise_temp05[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_invhidden_generated_nonoise_temp1[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_invhidden_generated_nonoise_temp15[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_invhidden_generated_nonoise_temp05[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_invhidden_generated_withnoise[0]]))
print(' '.join([corpus.dictionary.idx2word[x.item()] for x in max_indices_invhidden_generated_nonoise[0]]))

very large large moving street moving about new moving new
large orange moving serving new outdoor moving about new very
orange orange moving new moving about moving moving about moving
large moving street moving about about moving large moving outdoor
drumstick large orange red new moving moving out moving orange
very large moving street moving about moving about moving moving
very large moving street moving about moving about moving moving
very large moving street moving about moving about moving moving


In [None]:
#perturb(data_source, epoch, corpus_test, hybrid=False)

In [108]:
#perturb(test_data, epoch, corpus_test, hybrid=args.hybrid)

<utils.SNLIDataset at 0x7ff696968278>

In [42]:
data_source = test_data

In [43]:
batch = next(iter(data_source))

In [44]:
gan_gen = gan_gen.cpu()
inverter = inverter.cpu()
autoencoder.eval()
autoencoder = autoencoder.cpu()
autoencoder.gpu = False

In [45]:
premise, hypothesis, target, premise_words, hypothesise_words, lengths = batch

c = autoencoder.encode(hypothesis, lengths, noise=False)
z = inverter(c).data.cpu()

batch_size = premise.size(0)

In [46]:
premise.shape

torch.Size([10, 10])

In [119]:
premise

tensor([[    1,    94,  5586,  6924,   331,  3110,  4207,  6319,  9074,    22],
        [    1,    94,  5586,  6924,   331,  3110,  4207,  6319,  9074,    22],
        [    1,    94,  5586,  6924,   331,  3110,  4207,  6319,  9074,    22],
        [    1,  9806,  3560,  2038,  6404,  6282,  9350,  9131,    22,     2],
        [    1,  9806,  3560,  2038,  6404,  6282,  9350,  9131,    22,     2],
        [    1,    94,  8768,  6319,    94, 10755,  6888,  6282,  8762,     2],
        [    1,    94,  8768,  6319,    94, 10755,  6888,  6282,  8762,     2],
        [    1,    94,  8768,  6319,    94, 10755,  6888,  6282,  8762,     2],
        [    1,   331,  6310, 10844,  9710,  9883,    94,  3895,    22,     2],
        [    1,   331,  6310, 10844,  9710,  9883,    94,  3895,    22,     2]])

In [120]:
hypothesis.shape

torch.Size([10, 10])

In [121]:
hypothesis

tensor([[    1,    94,  5586,  6924,   734,  6319,  9754,  3657,    22,     0],
        [    1,    94,  5586,  6924,  4207,  6319,  9074,    22,     0,     0],
        [    1,    94,  5586,  4885,  6747,  3733,  1598,    22,     0,     0],
        [    1,  9806,  3561,  2054, 10309,  3828,    94,  9350,  9131,    22],
        [    1,  9806,  3561,  6924,  1548,  4799,    94,  3556,  9131,    22],
        [    1,    94,  8762,  3503, 10827,    94,  8768,  6319,  4895,     0],
        [    1,    94,  8768,  3999,  6427,    94,  3503,  6282,  8762,     0],
        [    1,    94,  8780,  4734,    94,  1031,     0,     0,     0,     0],
        [    1,  9754,  5172,  4885,  2221,  2730,     0,     0,     0,     0],
        [    1,  9754,  5172,  4885, 10667,  4435,  3895,     0,     0,     0]])

In [122]:
target

tensor([2, 0, 1, 0, 2, 0, 1, 1, 2, 1])

In [124]:
lengths

[9, 8, 8, 10, 10, 9, 9, 6, 6, 7]

In [125]:
c.shape

torch.Size([10, 300])

In [126]:
z.shape

torch.Size([10, 100])

In [48]:
n=0
print(' '.join(premise_words[n]))
print(' '.join(hypothesise_words[n]))

<sos> a man playing an electric guitar on stage .
<sos> a man playing banjo on the floor . <pad>


In [None]:
'entailment':0, 'neutral':1, 'contradiction':2

In [46]:
def pred_fn(data):
    # query baseline classifiers with sentence pairs
    gpu = args.cuda
    premise, hyp_indices, hypothesis_c, dist = data
    edit_dist = []
    premise_words = " ".join(
        [corpus_test.dictionary.idx2word[x] for x in premise.data.cpu().numpy()[0]])
    premise_words_indices1 = [vocab_classifier1[w] if w in vocab_classifier1 else 3 for w in
                              premise_words.strip().split()]
    premise_words_indices1 = Variable(torch.LongTensor(premise_words_indices1)).unsqueeze(0)

    premise_words_indices2 = [vocab_classifier2[w] if w in vocab_classifier2 else 3 for w in
                              premise_words.strip().split()]
    premise_words_indices2 = Variable(torch.LongTensor(premise_words_indices2)).unsqueeze(0)

    hyp_sample_idx = autoencoder.generate(hypothesis_c, 10, True).data.cpu().numpy()
    words_all = []
    premise_word_inds1 = []
    premise_word_inds2 = []
    hypothesis_word_inds1 = []
    hypothesis_word_inds2 = []
    for i in range(hyp_sample_idx.shape[0]):
        words = [corpus_test.dictionary.idx2word[x] for x in hyp_sample_idx[i]]
        words_all.append(" ".join(words) + "\t" + str(dist[i]))

        edit_dist.append(
            len(set(hyp_indices[0].data.cpu().numpy()).intersection(set(hyp_sample_idx[0]))))
        hypothesis_word_indx1 = [vocab_classifier1[w] if w in vocab_classifier1 else 3 for w in
                                 words]
        hypothesis_word_indx1 = Variable(torch.LongTensor(hypothesis_word_indx1)).unsqueeze(0)
        hypothesis_word_indx2 = [vocab_classifier2[w] if w in vocab_classifier2 else 3 for w in
                                 words]
        hypothesis_word_indx2 = Variable(torch.LongTensor(hypothesis_word_indx2)).unsqueeze(0)
        if gpu:
            premise_words_indices1 = premise_words_indices1.cuda()
            premise_words_indices2 = premise_words_indices2.cuda()
            hypothesis_word_indx1 = hypothesis_word_indx1.cuda()
            hypothesis_word_indx2 = hypothesis_word_indx2.cuda()

        premise_word_inds1.append(premise_words_indices1)
        premise_word_inds2.append(premise_words_indices2)
        hypothesis_word_inds1.append(hypothesis_word_indx1)
        hypothesis_word_inds2.append(hypothesis_word_indx2)

    premise_word_inds1 = torch.cat(premise_word_inds1, 0)
    premise_word_inds2 = torch.cat(premise_word_inds2, 0)
    hypothesis_word_inds1 = torch.cat(hypothesis_word_inds1, 0)
    hypothesis_word_inds2 = torch.cat(hypothesis_word_inds2, 0)

    prob_distrib1 = classifier1((premise_word_inds1, hypothesis_word_inds1))
    prob_distrib2 = classifier2((premise_word_inds2, hypothesis_word_inds2))

    _, predictions1 = torch.max(prob_distrib1, 1)
    _, predictions2 = torch.max(prob_distrib2, 1)

    return predictions1, predictions2, words_all

In [49]:
x_adv1, x_adv2, d_adv1, d_adv2, all_adv = search_fast(
    gan_gen, pred_fn, (premise[n].unsqueeze(0), hypothesis[n].unsqueeze(0)),
    target[n], z[n].view(1, 100))

hyp_sample_idx1 = autoencoder.generate(x_adv1, 10, True).data.cpu().numpy()[0]
hyp_sample_idx2 = autoencoder.generate(x_adv2, 10, True).data.cpu().numpy()[0]
words1 = [corpus_test.dictionary.idx2word[x] for x in hyp_sample_idx1]
words2 = [corpus_test.dictionary.idx2word[x] for x in hyp_sample_idx2]
if "<eos>" in words1:
    words1 = words1[:words1.index("<eos>")]
if "<eos>" in words2:
    words2 = words2[:words2.index("<eos>")]

  perturb_z = Variable(mus + delta, volatile=True)
  input = module(input)


In [50]:
' '.join(words1)

'street waiting on huge very knitting on music waiting outside'

In [51]:
' '.join(words2)

'foot very very band waiting on very waiting new on'

In [52]:
print(' '.join(premise_words[n]))
print(' '.join(hypothesise_words[n]))

<sos> a man playing an electric guitar on stage .
<sos> a man playing banjo on the floor . <pad>


In [54]:
corpus_test.sentence_ids

{'287144': 'A kid is doing a jump at the basketball court .',
 '287145': 'A kid is riding a bike at the courts .',
 '287146': 'Little boy playing with his toy truck .',
 '287147': 'A boy plays in a sandbox .',
 '287140': 'Cyclists are talking to each other',
 '287141': 'A group of men cycling on the moon .',
 '287142': 'A youth riding a skateboard is airborne amongst the basketball courts .',
 '287143': 'A kid on a skateboard is outside .',
 '611459': 'Cyclists trying to blast past the leader who is wearing an orange and white shirt',
 '287148': 'A boy plays with a toy .',
 '287149': 'A guy in blue jeans with a black shirt has his hand in his pocket .',
 '378468': 'Messi practices his dribbling .',
 '378469': 'A waffle learns to speak .',
 '89370': 'The boy is playing in the grass',
 '89371': 'The boy is sleeping',
 '89372': 'A child is wearing glasses',
 '89373': 'A man in a neon shirt , khakis , and an orange hard hat walks by a mulch machine .',
 '89374': 'A woman is playing Mario K

In [58]:
corpus_test.test_data[0]

[[1, 94, 5586, 6924, 331, 3110, 4207, 6319, 9074, 22],
 [1, 94, 5586, 6924, 734, 6319, 9754, 3657, 22, 0],
 2,
 ['<sos>',
  'a',
  'man',
  'playing',
  'an',
  'electric',
  'guitar',
  'on',
  'stage',
  '.'],
 ['<sos>', 'a', 'man', 'playing', 'banjo', 'on', 'the', 'floor', '.', '<pad>'],
 9]

In [None]:
#train_baseline.py

In [53]:
import argparse
from models import Baseline_Embeddings, Baseline_LSTM
from utils import to_gpu, Corpus, batchify, SNLIDataset, collate_snli
import random
import torch
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torch.utils.data
from torch.autograd import Variable