<center><h1> Predicting the Next Sentence </h1></center>
<center> Using BERT </center>

# Import

In [2]:
!pip install pytorch-pretrained-bert --quiet

[33mYou are using pip version 19.0.2, however version 19.0.3 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [65]:
import torch
import numpy as np
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM,BertForNextSentencePrediction
from nltk import wordpunct_tokenize, WordNetLemmatizer, sent_tokenize, pos_tag

In [22]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

# Load the model

`BertForNextSentencePrediction` includes the BertModel Transformer followed by the next sentence classification head.

- Inputs comprises the inputs of the BertModel class plus an optional label:
- `next_sentence_label`: next sentence classification loss: torch.LongTensor of shape [batch_size] with indices selected in [0, 1]. 0 => next sentence is the continuation, 1 => next sentence is a random sentence.
- Outputs:
    - if `next_sentence_label` is not None: Outputs the next sentence classification loss.
    - if `next_sentence_label` is None: Outputs the next sentence classification logits.

In [4]:
# Load pre-trained model (weights)
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
model.eval()

100%|██████████| 407873900/407873900 [02:37<00:00, 2597743.18B/s]


BertForNextSentencePrediction(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1)
            )
          )
          (intermediate): BertInterme

# Tokenize the text

In [5]:
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

100%|██████████| 231508/231508 [00:00<00:00, 886843.71B/s]


In [42]:
# Tokenized input
text = "How was your day ? Apply soft max function"
tokenized_text = tokenizer.tokenize(text)

In [75]:
# Convert token to vocabulary indices
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
print(indexed_tokens)

[2129, 2001, 2115, 2154, 1029, 6611, 3730, 4098, 3853]


In [66]:
def preprocess(document):

    cleaned_document = []
    lemmatized_tokens = []
    
    # Break the document into sentences
    for sent in sent_tokenize(document):

        # Break the sentence into part of speech tagged tokens
        for token, tag in pos_tag(wordpunct_tokenize(sent)):
            
            lemmatized_tokens.append(token)
  
    return lemmatized_tokens

In [71]:
def sent_idx(text) :
    i = 0
    segments_ids = []
    for sent in sent_tokenize(text) :
        for word in preprocess(sent) :
            segments_ids.append(i)
        i = i+1
    return segments_ids

In [76]:
# Define sentence A and B indices associated to 1st and 2nd sentences
segments_ids = sent_idx(text)

# Make the prediction

In [77]:
# Convert inputs to PyTorch tensors
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])

In [78]:
# Predict is Next Sentence ?
predictions = model(tokens_tensor, segments_tensors )

In [79]:
# Output softmax probabilities
# 1. Value 0 is the score of Next sentence being True
# 2. Value 1 is the score of Next sentence being False

pred = softmax(predictions.detach().numpy())[0]
pred

array([0.20250973, 0.7974903 ], dtype=float32)

In [80]:
print("Probability is Next Sentence : ", pred[0])
print("Probability is not Next Sentence : ", pred[1])

Probability is Next Sentence :  0.20250973
Probability is not Next Sentence :  0.7974903


**Sources :**

[1] https://github.com/huggingface/pytorch-pretrained-BERT#doc 

[2] https://github.com/huggingface/pytorch-pretrained-BERT/issues/48