# Demo and Analysis of InferSent Model

In [7]:
import os
import requests
from tqdm import tqdm

import torch
from train import NLINet
from data import SNLIData

## Download pretrained model

Use the following file IDs to download the corresponding models.


1. MeanEmbedding: `1q4ZRin0tKohQ504fi5HVkjDiolLZjuxg`
2. LSTM: `1lwClDt1cNaOtOo5h-bTx-rWr7ePeIyIO`
3. BiLSTM: `1zPZzm1EECkLdcbQ_SShhYPOrBXNu_zvz`
4. BiLSTM-maxpool: `12BzrDODCYjMZLhld1SFcyckwAa4Vj4fL`

In [2]:
def download_file_from_google_drive(id, destination):
  
  def get_confirm_token(response):
    for key, value in response.cookies.items():
      if key.startswith('download_warning'):
        return value
    return None

  def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
      for chunk in tqdm(response.iter_content(CHUNK_SIZE)):
        if chunk:  # filter out keep-alive new chunks
          f.write(chunk)

  URL = "https://docs.google.com/uc?export=download"

  session = requests.Session()

  response = session.get(URL, params={'id': id}, stream=True)
  token = get_confirm_token(response)

  if token:
    params = {'id': id, 'confirm': token}
    response = session.get(URL, params=params, stream=True)

  save_response_content(response, destination)

Let's download the model checkpoint with file ID from above.

In [8]:
download_file_from_google_drive('12BzrDODCYjMZLhld1SFcyckwAa4Vj4fL','bilstm-maxpool.ckpt')

9193it [05:13, 29.34it/s]


## Load the model and vocabulary

Let's load the model with the downloaded checkpoint file.

In [56]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = NLINet.load_from_checkpoint('bilstm-maxpool.ckpt').to(device)
model.eval()

NLINet(
  (model): InferSent(
    (encoder): BiLSTMEncoder(
      (embedding): Embedding(33893, 300)
      (linear): Linear(in_features=300, out_features=1028, bias=True)
      (relu): ReLU()
      (projection): Sequential(
        (0): Embedding(33893, 300)
        (1): Linear(in_features=300, out_features=1028, bias=True)
        (2): ReLU()
      )
      (lstm): LSTM(1028, 1028, batch_first=True, bidirectional=True)
    )
    (classifier): Classifier(
      (lin1): Linear(in_features=8224, out_features=512, bias=True)
      (lin2): Linear(in_features=512, out_features=512, bias=True)
      (lin3): Linear(in_features=512, out_features=3, bias=True)
      (relu): ReLU()
      (net): Sequential(
        (0): Linear(in_features=8224, out_features=512, bias=True)
        (1): ReLU()
        (2): Linear(in_features=512, out_features=512, bias=True)
        (3): ReLU()
        (4): Linear(in_features=512, out_features=3, bias=True)
      )
    )
  )
  (criterion): CrossEntropyLoss()
)

In [None]:
data = SNLIData(batch_size=128)
vocab = data.get_vocab()

## Making predictions

In [47]:
def get_encoded_text(text, max_len=7):
    sent_idxs = []
    for token in text:
        sent_idxs.append(vocab[token])
    # padding
    for _ in range(len(sent_idxs) + 1, max_len + 1):
        sent_idxs.append(vocab["<pad>"])
    embed = torch.LongTensor([sent_idxs]).to(device)
    return model.model.encode(embed).long()

def get_label(idx):
    label_map = {0:'Entailment',1:'Contradiction',2:'Neural'}
    return label_map[idx]

Now let's provide our own premise and hypothesis and see what the model predicts.

In [52]:
premise = 'A soccer game with multiple males playing.'
hypothesis = 'Some men are playing a sport.'

In [55]:
premise_enc = get_encoded_text(premise)
hypothesis_enc = get_encoded_text(hypothesis)

out = model.model(((premise_enc,hypothesis_enc),_))
pred = get_label(torch.argmax(torch.nn.functional.softmax(out, 1), 1).item())

print('Prediction:', pred)

Prediction: Entailment
