In [16]:
!pip install flask-ngrok
!pip install transformers -q
!pip install -U sentence-transformers



In [17]:
"""
Uncomment the code below if you want to load the files from drive
"""
# from google.colab import drive
# drive.mount('/content/drive')

'\nUncomment the code below if you want to load the files from drive\n'

In [18]:
from pickle import load
from sentence_transformers import SentenceTransformer
import numpy as np
import torch

In [19]:
if torch.cuda.is_available():      
    device = torch.device("cuda")
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [20]:
# model_names = ['bert-large-cased','albert-xxlarge-v2','roberta-large','xlnet-large-cased']

In [21]:
"""
Change the path in the following cells to their appropriate trained models shared.
"""

'\nChange the path in the following cells to their appropriate trained models shared.\n'

In [22]:
single_models_directory = {"bert":"/content/drive/MyDrive/IIT_Bombay/CS 626/Single_Models/BERT",
                    "roberta":"/content/drive/MyDrive/IIT_Bombay/CS 626/Single_Models/RoBERTa",
                    "xlnet":"/content/drive/MyDrive/IIT_Bombay/CS 626/Single_Models/XLNet",
                    "albert":"/content/drive/MyDrive/IIT_Bombay/CS 626/Single_Models/ALBERT"}

In [23]:
two_sentence_models_directory = {"bert":"/content/drive/MyDrive/IIT_Bombay/CS 626/Two_Sentences_Models/BERT",
                    "roberta":"/content/drive/MyDrive/IIT_Bombay/CS 626/Two_Sentences_Models/Roberta",
                    "xlnet":"/content/drive/MyDrive/IIT_Bombay/CS 626/Two_Sentences_Models/XL-Net",
                    "albert":"/content/drive/MyDrive/IIT_Bombay/CS 626/Two_Sentences_Models/ALBERT"}

In [24]:
sbert_sent_transformer = '/content/drive/MyDrive/IIT_Bombay/CS 626/trained_model_with_reasons_epoch10'
sbert_single_sentence = '/content/drive/MyDrive/IIT_Bombay/CS 626/svm_models/trained_svm.sav'
sbert_two_sentences = '/content/drive/MyDrive/IIT_Bombay/CS 626/svm_models/trained_svm_on_pairs.sav'

In [25]:
def load_model(m,c):
  if c=="s":
    models_directory = single_models_directory
  else:
    models_directory = two_sentence_models_directory
  import torch
  from transformers import AutoModelForSequenceClassification,AutoTokenizer,AdamW

  # Load a trained model and vocabulary that you have fine-tuned
  model = AutoModelForSequenceClassification.from_pretrained(models_directory[m])
  tokenizer = AutoTokenizer.from_pretrained(models_directory[m])

  # Copy the model to the GPU.
  model.to(device)
  return model, tokenizer

In [26]:
def predict_for_a_sentence(sentence0,model,tokenizer):
  import numpy as np
  from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
  from keras.preprocessing.sequence import pad_sequences
  from scipy.special import softmax

  # sentences = np.array([sentence0])
  labels = np.array([0])

  input_ids = []

  encoded_sent = tokenizer.encode(
                        sentence0,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                   )
    
  input_ids.append(encoded_sent)
  input_ids = pad_sequences(input_ids, maxlen=32, 
                          dtype="long", truncating="post", padding="post")
  # Create attention masks
  attention_masks = []

  # Create a mask of 1s for each token followed by 0s for padding
  for seq in input_ids:
    seq_mask = [float(i>0) for i in seq]
    attention_masks.append(seq_mask) 

  # Convert to tensors.
  prediction_inputs = torch.tensor(input_ids)
  prediction_masks = torch.tensor(attention_masks)
  prediction_labels = torch.tensor(labels)

  # Set the batch size.  
  batch_size = 32  

  # Create the DataLoader.
  prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)
  prediction_sampler = SequentialSampler(prediction_data)
  prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

  # Put model in evaluation mode
  model.eval()

  # Tracking variables 
  predictions , true_labels = [], []

  # Predict 
  for batch in prediction_dataloader:
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)
    
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch
    
    # Telling the model not to compute or store gradients, saving memory and 
    # speeding up prediction
    with torch.no_grad():
        # Forward pass, calculate logit predictions
        outputs = model(b_input_ids, token_type_ids=None, 
                        attention_mask=b_input_mask)

    logits = outputs[0]

    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    
    # Store predictions and true labels
    predictions.append(logits)
    true_labels.append(label_ids)

  return np.argmax(predictions[0], axis=1).flatten(),np.max(softmax(np.array(logits))[0])


In [27]:
def predict_for_two_sentences(sentence0,sentence1,model,tokenizer):
  from scipy.special import softmax
  import numpy as np
  from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
  MAX_LEN = 64
  # Tokenize all of the sentences and map the tokens to thier word IDs.
  labels = np.array([0])
  input_ids = []
  input_type_ids = []
  attention_masks = []

  # For every sentence...
  tokenizer.padding_side = 'right'
  encoded_sent = tokenizer.encode_plus(
                          sentence0,            # Sentence to encode.
                          sentence1,
                          add_special_tokens = True, 
                          max_length = MAX_LEN,
                          pad_to_max_length = True
                          )

  #Add the encoded sentence to the list.
  input_ids.append(encoded_sent['input_ids'])
  # input_type_ids.append(encoded_sent['token_type_ids'])
  attention_masks.append(encoded_sent['attention_mask'])

  # Convert to tensors.
  prediction_inputs = torch.tensor(input_ids)
  prediction_masks = torch.tensor(attention_masks)
  prediction_labels = torch.tensor(labels)

  # Set the batch size.  
  batch_size = 32  

  # Create the DataLoader.
  prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)
  prediction_sampler = SequentialSampler(prediction_data)
  prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

  model.eval()

  # Tracking variables 
  predictions , true_labels = [], []

  # Predict 
  for batch in prediction_dataloader:
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)
    
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch
    
    # Telling the model not to compute or store gradients, saving memory and 
    # speeding up prediction
    with torch.no_grad():
        # Forward pass, calculate logit predictions
        outputs = model(b_input_ids, token_type_ids=None, 
                        attention_mask=b_input_mask)

    logits = outputs[0]

    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    
    # Store predictions and true labels
    predictions.append(logits)
    true_labels.append(label_ids)

    return np.argmax(predictions[0], axis=1).flatten(), np.max(softmax(np.array(logits))[0])
    # return predictions

In [28]:
bert_model,bert_tokenizer = load_model("bert","s")
albert_model,albert_tokenizer = load_model("albert","s")
roberta_model,roberta_tokenizer = load_model("roberta","s")
xlnet_model,xlnet_tokenizer = load_model("xlnet","s")

In [29]:
bert_model_2,bert_tokenizer_2 = load_model("bert","t")
albert_model_2,albert_tokenizer_2 = load_model("albert","t")
roberta_model_2,roberta_tokenizer_2 = load_model("roberta","t")
xlnet_model_2,xlnet_tokenizer_2 = load_model("xlnet","t")

In [30]:
def predict_single_sent_sbert(input_sentence):
    sbert_model = SentenceTransformer(sbert_sent_transformer)
    svm_model = load(open(sbert_single_sentence, 'rb'))
    sentence = [input_sentence]
    sentence_embedding = sbert_model.encode(sentence)
    prediction = svm_model.predict(sentence_embedding)
    pred_prob = svm_model.predict_proba(sentence_embedding)
    print(prediction)
    print(pred_prob)
    return prediction[0], pred_prob[0][1]
    

def predict_pairs_sbert(input1, input2):
    sbert_model = SentenceTransformer(sbert_sent_transformer)
    svm_model = load(open(sbert_two_sentences, 'rb'))
    sentence = [input1, input2]
    sentence_embedding = sbert_model.encode(sentence)
    input_embedding = [np.concatenate((sentence_embedding[0], sentence_embedding[1]))]
    prediction = svm_model.predict(input_embedding)
    pred_prob = svm_model.predict_proba(input_embedding)
    print(prediction)
    print(pred_prob)
    return prediction[0], pred_prob[0][prediction[0]]

In [31]:
label, score = predict_single_sent_sbert('An apple is in the moon')
print("Label: ", label, " Score: ", score)
label, score = predict_pairs_sbert('An apple is in the refrigerator', 'An elephant is in the refrigerator')
print("Label: ", label, " Score: ", score)

[0]
[[0.97882327 0.02117673]]
Label:  0  Score:  0.021176733891169686
[0]
[[0.99751757 0.00248243]]
Label:  0  Score:  0.9975175661909539


In [32]:
predict_for_a_sentence("he eats spoon",bert_model_2,bert_tokenizer_2)

(array([1]), 0.85771805)

In [33]:
predict_for_two_sentences("he drinks milk","he drinks spoon",bert_model_2,bert_tokenizer_2)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


(array([1]), 0.757974)

In [None]:
"""
Add the templates folder containing the index.html to the folder. 
Change the form action url in html to the ngrok url after running this cell.
"""

from flask import Flask, request, jsonify, render_template
from flask_ngrok import run_with_ngrok
import json

app = Flask(__name__)
run_with_ngrok(app)


@app.route('/')
def hello_world():
    """
    Demo function to check the api calls.

    Returns : string
    """
    return render_template("index.html")

@app.route('/getprediction',methods=['POST'])
def getprediction():   
  inputs = request.form.values()
  sentences = []
  for i in inputs:
    sentences.append(i)
  
  if len(sentences[1])==0:
    bert_output = predict_for_a_sentence(sentences[0],bert_model,bert_tokenizer)
    albert_output = predict_for_a_sentence(sentences[0],albert_model,albert_tokenizer)
    roberta_output = predict_for_a_sentence(sentences[0],roberta_model,roberta_tokenizer)
    xlnet_output = predict_for_a_sentence(sentences[0],xlnet_model,xlnet_tokenizer)
    sbert_label, sbert_score = predict_single_sent_sbert(sentences[0])

    input_type = "Single Sentence"
    input_text = sentences[0]
    if bert_output[0][0]==1:
      bert_sense = "no sense"
      bert_score = str(1 - bert_output[1])
    else:
      bert_sense = "makes sense"
      bert_score = str(bert_output[1])
    if albert_output[0][0]==1:
      albert_sense = "no sense"
      albert_score = str(1-albert_output[1])
    else:
      albert_sense = "makes sense"
      albert_score = str(albert_output[1])
    if roberta_output[0][0]==1:
      roberta_sense = "no sense"
      roberta_score = str(1-roberta_output[1])
    else:
      roberta_sense = "makes sense"
      roberta_score = str(roberta_output[1])
    if xlnet_output[0][0]==1:
      xlnet_sense = "no sense"
      xlnet_score = str(1-xlnet_output[1])
    else:
      xlnet_sense = "makes sense"
      xlnet_score = str(xlnet_output[1])
    sbert_sense = "no sense" if (sbert_label == 0) else "makes sense"
    sbert_score = str(sbert_score)
    

        
        
    output_sentence = sentences[0]
    # bert = "BERT :    " + bert_sense + "   ---> " + str(bert_output[1])
    # albert = "ALBERT :  " + albert_sense + "   ---> " + str(albert_output[1])
    # roberta = "ROBERTA : " + roberta_sense + "   ---> " + str(roberta_output[1])
    # xlnet = "XLNET :   " + xlnet_sense + "   ---> " + str(xlnet_output[1])
    # bert_score = str(bert_output[1])
    # albert_score = str(albert_output[1])
    # roberta_score = str(roberta_output[1])
    # xlnet_score = str(xlnet_output[1])
    # sbert_score = str(sbert_score)
  else:
    bert_output, bert_score = predict_for_two_sentences(sentences[0],sentences[1],bert_model_2,bert_tokenizer_2)
    albert_output, albert_score = predict_for_two_sentences(sentences[0],sentences[1],albert_model_2,albert_tokenizer_2)
    roberta_output, roberta_score = predict_for_two_sentences(sentences[0],sentences[1],roberta_model_2,roberta_tokenizer_2)
    xlnet_output, xlnet_score = predict_for_two_sentences(sentences[0],sentences[1],xlnet_model_2,xlnet_tokenizer_2)
    sbert_label, sbert_score = predict_pairs_sbert(sentences[0], sentences[1])

    input_type = "Sentence Pair"
    input_text = sentences[0] + " , " + sentences[1]

    output_header = "Sentence that makes sense: "
    if bert_output[0]==1:
      bert_sense = output_header + sentences[0]
      # bert_sense = sentences[0] + " : makes sense, " + sentences[1] + " : no sense" 
    else:
      # bert_sense = sentences[0] + " : no sense,    " + sentences[1] + " : makes sense" 
      bert_sense = output_header + sentences[1]
    if albert_output[0]==1:
      # albert_sense = sentences[0] + " : makes sense, " + sentences[1] + " : no sense" 
      albert_sense = output_header + sentences[0]
    else:
      # albert_sense = sentences[0] + " : no sense,    " + sentences[1] + " : makes sense" 
      albert_sense = output_header + sentences[1]
    if roberta_output[0]==1:
      # roberta_sense = sentences[0] + " : makes sense, " + sentences[1] + " : no sense" 
      roberta_sense = output_header + sentences[0]
    else:
      # roberta_sense = sentences[0] + " : no sense,    " + sentences[1] + " : makes sense" 
      roberta_sense = output_header + sentences[1]
    if xlnet_output[0]==1:
      # xlnet_sense = sentences[0] + " : makes sense, " + sentences[1] + " : no sense" 
      xlnet_sense = output_header + sentences[0]
    else:
      # xlnet_sense = sentences[0] + " : no sense,    " + sentences[1] + " : makes sense" 
      xlnet_sense = output_header + sentences[1]


    bert_score = str(bert_score)
    albert_score = str(albert_score)
    roberta_score = str(roberta_score)
    xlnet_score = str(xlnet_score)
    sbert_score = str(sbert_score)
    
    sbert_sense = "Sentence that makes sense: "
    sbert_sense += sentences[0] if (sbert_label == 0) else sentences[1]
        
  # return render_template("index.html", bert="BERT    ----> "+ bert_sense, albert="ALBERT  ----> "+albert_sense, roberta="ROBERTA t----> "+roberta_sense, xlnet="XLNET   ----> "+xlnet_sense, sbert="SBERT   ---> "+ sbert_sense)
  return render_template("index.html", bert=bert_sense, albert=albert_sense, roberta=roberta_sense, xlnet=xlnet_sense, sbert=sbert_sense, bert_sc = bert_score, albert_sc = albert_score, roberta_sc = roberta_score, xlnet_sc = xlnet_score, sbert_sc = sbert_score, selection = input_type, input_txt = input_text)


if __name__ == "__main__":
  app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://bfc9-34-80-255-190.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [28/Nov/2021 06:39:27] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Nov/2021 06:39:29] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
127.0.0.1 - - [28/Nov/2021 06:39:29] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [28/Nov/2021 06:39:34] "[31m[1mGET /getprediction HTTP/1.1[0m" 405 -
127.0.0.1 - - [28/Nov/2021 06:39:34] "[37mPOST /getprediction HTTP/1.1[0m" 200 -


[1]
[[0.0128885 0.9871115]]


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

[0]
[[0.99657874 0.00342126]]
