In [3]:
import argparse
import copy
import os
import torch

from torch import nn, optim
from torch.autograd import Variable
from tensorboardX import SummaryWriter
from time import gmtime, strftime

from model.BIMPM import BIMPM
from model.utils import SNLI, Quora
from test import test


def train(args, data):
    model = BIMPM(args, data)
    if args.gpu > -1:
        model.cuda(args.gpu)

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_acc, max_test_acc = 0, 0

    iterator = data.train_iter
    for i, batch in enumerate(iterator):
        present_epoch = int(iterator.epoch)
        if present_epoch == args.epoch:
            break
        if present_epoch > last_epoch:
            print('epoch:', str(present_epoch + 1))
        last_epoch = present_epoch

        if args.data_type == 'SNLI':
            s1, s2 = 'premise', 'hypothesis'
        else:
            s1, s2 = 'q1', 'q2'

        s1, s2 = getattr(batch, s1), getattr(batch, s2)

        # limit the lengths of input sentences up to max_sent_len
        if args.max_sent_len >= 0:
            if s1.size()[1] > args.max_sent_len:
                s1 = s1[:, :args.max_sent_len]
            if s2.size()[1] > args.max_sent_len:
                s2 = s2[:, :args.max_sent_len]

        kwargs = {'p': s1, 'h': s2}

        if args.use_char_emb:
            char_p = Variable(torch.LongTensor(data.characterize(s1)))
            char_h = Variable(torch.LongTensor(data.characterize(s2)))

            if args.gpu > -1:
                char_p = char_p.cuda(args.gpu)
                char_h = char_h.cuda(args.gpu)

            kwargs['char_p'] = char_p
            kwargs['char_h'] = char_h

        pred = nn.DataParallel(model(**kwargs))
        
        optimizer.zero_grad()
        batch_loss = criterion(pred, batch.label)
        loss += batch_loss.data[0]
        batch_loss.backward()
        optimizer.step()
        del pred
        del batch_loss
        if (i + 1) % args.print_freq == 0:
            dev_loss, dev_acc = test(model, args, data, mode='dev')
            test_loss, test_acc = test(model, args, data)
            c = (i + 1) // args.print_freq

            writer.add_scalar('loss/train', loss, c)
            writer.add_scalar('loss/dev', dev_loss, c)
            writer.add_scalar('acc/dev', dev_acc, c)
            writer.add_scalar('loss/test', test_loss, c)
            writer.add_scalar('acc/test', test_acc, c)

            print('train loss: '+ str(loss) +' / dev loss: '+ str(dev_loss) + '/ test loss:' + str(test_loss) +
                  ' / dev acc:' + str(dev_acc) + 'test acc:' + str(test_acc))

            if dev_acc > max_dev_acc:
                max_dev_acc = dev_acc
                max_test_acc = test_acc
                best_model = copy.deepcopy(model)

            loss = 0
            model.train()

    writer.close()
    print('max dev acc:'+ str(max_dev_acc) + '/ max test acc: ' + str(max_test_acc))

    return best_model


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch-size', default=32, type=int)
    parser.add_argument('--char-dim', default=20, type=int)
    parser.add_argument('--char-hidden-size', default=50, type=int)
    parser.add_argument('--data-type', default='Quora', help='available: SNLI or Quora')
    parser.add_argument('--dropout', default=0.1, type=float)
    parser.add_argument('--epoch', default=10, type=int)
    parser.add_argument('--gpu', default=0, type=int)
    parser.add_argument('--hidden-size', default=100, type=int)
    parser.add_argument('--learning-rate', default=0.001, type=float)
    parser.add_argument('--max-sent-len', default=-1, type=int,
                        help='max length of input sentences model can accept, if -1, it accepts any length')
    parser.add_argument('--num-perspective', default=20, type=int)
    parser.add_argument('--print-freq', default=500, type=int)
    parser.add_argument('--use-char-emb', default=False, action='store_true')
    parser.add_argument('--word-dim', default=300, type=int)
    parser.add_argument('--training', default=0, type=int)
    args = parser.parse_args()

    if args.data_type == 'SNLI':
        print('loading SNLI data...')
        data = SNLI(args)
    elif args.data_type == 'Quora':
        print('loading Quora data...')
        data = Quora(args)
    else:
        raise NotImplementedError('only SNLI or Quora data is possible')

    setattr(args, 'char_vocab_size', len(data.char_vocab))
    setattr(args, 'word_vocab_size', len(data.TEXT.vocab))
    setattr(args, 'class_size', len(data.LABEL.vocab))
    setattr(args, 'max_word_len', data.max_word_len)
    setattr(args, 'model_time', strftime('%H:%M:%S', gmtime()))

    print('training start!')
    best_model = train(args, data)

    if not os.path.exists('saved_models'):
        os.makedirs('saved_models')
    torch.save(best_model.state_dict(), 'saved_models/BIBPM_'+args.data_type+'_'+args.model_time+'.pt')
    print('training finished!')


if __name__ == '__main__':
    main()


usage: ipykernel_launcher.py [-h] [--batch-size BATCH_SIZE]
                             [--char-dim CHAR_DIM]
                             [--char-hidden-size CHAR_HIDDEN_SIZE]
                             [--data-type DATA_TYPE] [--dropout DROPOUT]
                             [--epoch EPOCH] [--gpu GPU]
                             [--hidden-size HIDDEN_SIZE]
                             [--learning-rate LEARNING_RATE]
                             [--max-sent-len MAX_SENT_LEN]
                             [--num-perspective NUM_PERSPECTIVE]
                             [--print-freq PRINT_FREQ] [--use-char-emb]
                             [--word-dim WORD_DIM]
ipykernel_launcher.py: error: unrecognized arguments: -f /run/user/1192/jupyter/kernel-b9cd9221-4379-4717-9ef0-c798df2499da.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [6]:
import argparse
import copy
import os
import torch

from torch import nn, optim
from torch.autograd import Variable
from tensorboardX import SummaryWriter
from time import gmtime, strftime

from model.BIMPM import BIMPM
from model.utils import SNLI, Quora
from test import test


In [7]:
import sys
sys.argv = ['foo']
parser = argparse.ArgumentParser()
parser.add_argument('--batch-size', default=32, type=int)
parser.add_argument('--char-dim', default=20, type=int)
parser.add_argument('--char-hidden-size', default=50, type=int)
parser.add_argument('--data-type', default='Quora', help='available: SNLI or Quora')
parser.add_argument('--dropout', default=0.1, type=float)
parser.add_argument('--epoch', default=10, type=int)
parser.add_argument('--gpu', default=0, type=int)
parser.add_argument('--hidden-size', default=100, type=int)
parser.add_argument('--learning-rate', default=0.001, type=float)
parser.add_argument('--max-sent-len', default=-1, type=int,
                    help='max length of input sentences model can accept, if -1, it accepts any length')
parser.add_argument('--num-perspective', default=20, type=int)
parser.add_argument('--print-freq', default=500, type=int)
parser.add_argument('--use-char-emb', default=False, action='store_true')
parser.add_argument('--word-dim', default=300, type=int)
args = parser.parse_args()

if args.data_type == 'SNLI':
    print('loading SNLI data...')
    data = SNLI(args)
elif args.data_type == 'Quora':
    print('loading Quora data...')
    data = Quora(args)
else:
    raise NotImplementedError('only SNLI or Quora data is possible')

setattr(args, 'char_vocab_size', len(data.char_vocab))
setattr(args, 'word_vocab_size', len(data.TEXT.vocab))
setattr(args, 'class_size', len(data.LABEL.vocab))
setattr(args, 'max_word_len', data.max_word_len)
setattr(args, 'model_time', strftime('%H:%M:%S', gmtime()))


loading Quora data...


In [9]:
args.training = 0
model = BIMPM(args, data)
model.load_state_dict(torch.load('saved_models/BIBPM_Quora_19:04:33.pt'))

In [4]:
weight1 = model.state_dict()['mp_w1']

NameError: name 'model' is not defined

In [5]:
import torch.nn.functional as F
F.cosine_similarity(torch.cat([weight1]*20,0), weight1.view(20,100,1).repeat(1,20,1).view(400,100),0).mean()

NameError: name 'torch' is not defined

In [17]:
weight1 = model.state_dict()['mp_w2']
import torch.nn.functional as F
F.cosine_similarity(torch.cat([weight1]*20,0), weight1.view(20,100,1).repeat(1,20,1).view(400,100),0).mean()

0.04072304128824953

In [11]:
import torch.nn.functional as F
for i in range(1,9):
    weight1 = model.state_dict()['mp_w'+str(i)]
    print((F.cosine_similarity(torch.cat([weight1]*20,0),\
                               weight1.view(20,100,1).repeat(1,20,1).view(400,100),0).mean()*400\
           -F.cosine_similarity(weight1,weight1).mean()*20)/380)

0.005462830512916346
-0.009765220167142425
0.007928040630897732
-0.003508737254866757
-0.005286185414220917
-0.0020706250197882153
0.0004789412987711809
-0.005853073707227228


In [47]:
import numpy as np
for i in range(1,9):
    weight1 = model.state_dict()['mp_w'+str(i)].numpy()
    #np.matrix.trace(np.matmul(weight1,np.transpose(weight1))-np.identity(20))
    norm = np.abs(weight1).sum(axis=1)
    w2=weight1 / norm.reshape(20,1)
    #w2=np.ones((20,20))
    print(np.linalg.norm(np.matmul(w2,np.transpose(w2))-np.identity(20),ord=2))

#F.cosine_similarity(weight1,weight1).mean()

0.9932163740259714
0.9944117102145474
0.9960664214771039
0.9958546397736677
0.99487538417775
0.9941656890852634
0.9943433740303175
0.9942018194943735
