In [3]:
from data import get_nli, build_vocab

nli_path = './data/snli'
glove_path = './data/glove/glove.840B.300d.txt'

train, dev, test = get_nli(nli_path)
vocab, embeddings = build_vocab(train['s1']+train['s2']+test['s1']+test['s2']+dev['s1']+dev['s2'], glove_path)

# print(type(FLAGS.prem))

def get_batch_from_idx(sent, word_emb, config):
   
    embedded_sents = np.zeros((len(sent), config['emb_dim']))
    
    
    for i in range(len(sent)):
        
        if config['model_name'] == 'base':
            if sent[i] in word_emb:
                #return batch embeddings of dimension (L x B x D)
                embedded_sents[i, :] = word_emb[sent[i]]/len(sent)
            else: embedded_sents[i, :] = np.zeros((config['emb_dim']))

        else:
            if sent[i] in word_emb:
                embedded_sents[i, :] = word_emb[sent[i]]
            else: embedded_sents[i, :] = np.zeros((config['emb_dim']))
                
    return torch.from_numpy(embedded_sents).float(), len(sent)


---------Preprocessing NLI data---------
549367 instances extracted of train
9842 instances extracted of dev
9824 instances extracted of test
Example: train['s1'][0] =  A person on a horse jumps over a broken down airplane .

 -----------Building Vocab from the SNLI dataset-----------
----Getting Glove word embedding for each word in vocab (non vocab words ignored, ie, <unk> not used)----
Found 38957 words with Glove embeddings out of 43479 total words in corpus.


In [4]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function



import argparse


import os
import sys
import time
import argparse

import numpy as np

import torch
from torch.autograd import Variable
import torch.nn as nn

from data import get_nli, build_vocab
from models import Classifier, LSTM, biLSTM, LSTM_main
from dev_test_evals import model_eval

MODEL_NAME_DEFAULT = 'bilstm_pool'


s1_DEFAULT = 'Bob is in his room, but because of the thunder and lightning outside, he cannot sleep '
s2_DEFAULT = 'It is sunny outside'


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

parser = argparse.ArgumentParser()

parser.add_argument('--model_name', type = str, default = MODEL_NAME_DEFAULT,
                      help='model name: base / lstm / bilstm / bilstm_pool')
parser.add_argument('--prem', type = str, default = s1_DEFAULT,
                      help='premise')
parser.add_argument('--hyp', type = str, default = s2_DEFAULT,
                      help='hypothesis')

FLAGS, unparsed = parser.parse_known_args()

base_path = './checkout/base_final.pickle'
lstm_path = './checkout/lstm_final.pickle'
pool_path = './checkout/pool_final.pickle'
bi_path = './checkout/bilstm_final.pickle'


def main():

    #Print Flags
    for key, value in vars(FLAGS).items():
        print(key + ' : ' + str(value))

# main() 


config = {'model_name' : FLAGS.model_name,
         'emb_dim' : 300,
         'b_size' : 1,
         'fc_dim' : 512,
          'lstm_dim': 2048,
         'n_classes' : 3}

In [5]:
s1_embed, s1_len = get_batch_from_idx(FLAGS.prem.split(), embeddings, config)
s2_embed, s2_len = get_batch_from_idx(FLAGS.hyp.split(), embeddings, config)


print("\n\n Hi I am the   " + str(FLAGS.model_name) + "    model...!!")
print("\n Hhhhmmmmm....lemme think...\n")


if config['model_name']== 'base':
    model = Classifier(config).to(device)
    PATH = base_path
    model.load_state_dict(torch.load(PATH, map_location=device))
    model = model.to(device)
    
    u = torch.sum(s1_embed,0).to(device)
    v = torch.sum(s1_embed,0).to(device)

    
    feats = torch.cat((u, v, torch.abs(u- v), u*v), 0).to(device)
    
    with torch.no_grad():
            out = model.forward(feats).to(device)
            pred = torch.max(out,0)[1]
            
    
else: 
    if config['model_name'] == 'lstm':
        PATH = lstm_path
    elif config['model_name'] == 'bilstm':
        PATH = bi_path
    elif config['model_name'] == 'bilstm_pool':
        PATH = pool_path
    
    
    
    model = LSTM_main(config).to(device)
    
    model.load_state_dict(torch.load(PATH, map_location=device))
    model = model.to(device)

    s1_embed = s1_embed.expand(1,s1_len, -1).transpose(0,1)
    s2_embed = s2_embed.expand(1,s2_len, -1).transpose(0,1)
    
    s1_len = torch.as_tensor(s1_len, dtype=torch.int64).expand(1)
    s2_len = torch.as_tensor(s2_len, dtype=torch.int64).expand(1)
    

    with torch.no_grad():
            out = model.forward(((s1_embed, s1_len), (s2_embed, s2_len))).to(device)
            pred = torch.max(out[0],0)[1]
            
            
print("==========================================================================")

print("(encrypted) Model output:    ", str(out))

print("==========================================================================")
print("\nPremise: " + FLAGS.prem)
print("Hypothesis: " + FLAGS.hyp)
if pred == 0:
    print("\nPrediction is: entailment")
elif pred == 1:
    print("\nPrediction is: neutral")
elif pred == 2:
    print("\nPrediction is: contradiction")
    



 Hi I am the   bilstm_pool    model...!!

 Hhhhmmmmm....lemme think...

(encrypted) Model output:     tensor([[-3.5458,  1.3864,  2.1545]])

Premise: Bob is in his room, but because of the thunder and lightning outside, he cannot sleep 
Hypothesis: It is sunny outside

Prediction is: contradiction
