In [None]:
from transformers import AutoTokenizer, AutoModel
sentence_model = "sentence-transformers/paraphrase-MiniLM-L3-v2"
tokenizer = AutoTokenizer.from_Pre-trained(sentence_model)

# Derive mean pooling for sentence model
def mean_pooling(model_input, attention_mask_input):
   token_embeddings = model_input[0] #First element of model_input contains all token embeddings
   input_mask_expanded = attention_mask_input.unsqueeze(-1).expand(token_embeddings.size()).float()
   sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
   sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
   return sum_embeddings / sum_mask

class SentenceBertModelClass(torch.nn.Module):
   def __init__(self, model_name="sentence-transformers/paraphrase-MiniLM-L3-v2", in_features=384):
       super(SentenceBertModelClass, self).__init__()
       self.l1 = AutoModel.from_Pre-trained(model_name)
       self.pre_classifier = torch.nn.Linear(in_features*3, 768)
       self.dropout = torch.nn.Dropout(0.3)
       self.classifier = torch.nn.Linear(768, 1)
       self.classifierSigmoid = torch.nn.Sigmoid()

   def forward(self, sent_ids_var, doc_ids_var, sent_mask_var, doc_mask):
       sent_output = self.l1(input_ids=sent_ids_var, attention_mask_input=sent_mask_var)
       sentence_embeddings_var = mean_pooling(sent_output, sent_mask_var)
       doc_output = self.l1(input_ids=doc_ids_var, attention_mask_input=doc_mask)
       doc_embeddings = mean_pooling(doc_output, doc_mask)
       # elementwise product of sentence embs and doc embs
       combined_features = sentence_embeddings_var * doc_embeddings
       # Concatenate input features and their elementwise product
       concat_features = torch.cat((sentence_embeddings_var, doc_embeddings, combined_features), dim=1)
       pooler_var = self.pre_classifier(concat_features)
       pooler_var = torch.nn.ReLU()(pooler_var)
       pooler_var = self.dropout(pooler_var)
       output_var = self.classifier(pooler_var)
       output_var = self.classifierSigmoid(output_var)
       return output_var


In [None]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
model_var = SentenceBertModelClass(model_name=sentence_model)
model_var.to(device)
loss_function = torch.nn.BCELoss()
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)


In [None]:
def train(epoch):
   tr_loss = 0
   n_correct = 0
   nb_tr_steps = 0
   nb_tr_examples = 0
   model_var.train()
   for _,data in tqdm(enumerate(training_loader, 0)):
       sent_ids_var = data['sent_ids'].to(device, dtype = torch.long)
       doc_ids_var = data['doc_ids'].to(device, dtype = torch.long)
       sent_mask_var = data['sent_mask'].to(device, dtype = torch.long)
       doc_mask_var = data['doc_mask'].to(device, dtype = torch.long)
       targets = data['targets'].to(device, dtype = torch.float)
       outputs_var = model_var(sent_ids_var, doc_ids_var, sent_mask_var,
       doc_mask_var)
       loss_var = loss_function(outputs_var, targets)
       tr_loss += loss_var.item()
       n_correct += torch.count_nonzero(targets == (outputs_var > 0.5)).item()
       nb_tr_steps += 1
       nb_tr_examples+=targets.size(0)
       if _%print_n_steps==0:
           loss_step = tr_loss/nb_tr_steps
           accu_step = (n_correct*100)/nb_tr_examples
           print(str(_* train_params["batch_size"]) + "/" +
           str(len(train_df)) + " - Steps. Acc ->", accu_step, "Loss ->",
           loss_step)
           acc_step_holder.append(accu_step), loss_step_holder.append(loss_step)
       optimizer.zero_grad()
       loss_var.backward()
       # # When using GPU
       optimizer.step()

   print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
   epoch_loss = tr_loss/nb_tr_steps
   epoch_accu = (n_correct*100)/nb_tr_examples
   print(f"Training Loss Epoch: {epoch_loss}")
   print(f"Training Accuracy Epoch: {epoch_accu}")
   return

for epoch in range(EPOCHS):
   train(epoch)


In [None]:
def validate_model(model, testing_loader):
   model.eval()
   n_correct = 0; n_wrong = 0; total = 0;  tr_loss = 0; nb_tr_steps = 0 ; nb_tr_examples = 0
   with torch.no_grad():
       for _, data in enumerate(testing_loader, 0):
           sent_ids_var = data['sent_ids'].to(device, dtype = torch.long)
           doc_ids_var = data['doc_ids'].to(device, dtype = torch.long)
           sent_mask_var = data['sent_mask'].to(device, dtype = torch.long)
           doc_mask_var = data['doc_mask'].to(device, dtype = torch.long)
           targets = data['targets'].to(device, dtype = torch.float)
           outputs = model(sent_ids_var,doc_ids_var, sent_mask_var,doc_mask 
           _var)
           loss_var = loss_function(outputs, targets)
           tr_loss += loss_var.item()
           n_correct += torch.count_nonzero(targets == (outputs >
           0.5)).item()
           nb_tr_steps += 1
           nb_tr_examples+=targets.size(0)
           if _%print_n_steps==0:
               loss_step = tr_loss/nb_tr_steps
               accu_step = (n_correct*100)/nb_tr_examples
               print(str(_* test_params["batch_size"]) + "/" +
   str(len(train_df)) + " - Steps. Acc ->", accu_step, "Loss ->", loss_step)
   epoch_loss = tr_loss/nb_tr_steps
   epoch_accu = (n_correct*100)/nb_tr_examples
   print(f"Validation Loss Epoch: {epoch_loss}")
   print(f"Validation Accuracy Epoch: {epoch_accu}")
   return epoch_accu


In [None]:
# create spacy model
nlp = spacy.load('en_core_web_lg')
# tokenize text as required by BERT-based models
def get_tokens(text, tokenizer):
 inputs = tokenizer.batch_encode_plus(
           text,
           add_special_tokens=True,
           max_length=512,
           padding="max_length",
           return_token_type_ids=True,
           truncation=True
       )
 ids = inputs['input_ids']
 mask = inputs['attention_mask_input']
 return ids, mask

# get predictions given some an array of sentences and their corresponding documents
def predict(sents, doc):
 sent_id_var, sent_mask_var = get_tokens(sents,tokenizer)
 sent_id_var, sent_mask_var = torch.tensor(sent_id_var,
 dtype=torch.long),torch.tensor(sent_mask_var, dtype=torch.long)
 doc_id_var, doc_mask = get_tokens([doc],tokenizer)
 doc_id_var, doc_mask = doc_id_var * len(sents), doc_mask* len(sents)
 doc_id_var, doc_mask = torch.tensor(doc_id_var,
 dtype=torch.long),torch.tensor(doc_mask, dtype=torch.long)
 preds = model(sent_id_var, doc_id_var, sent_mask_var, doc_mask)
 return preds

def summarize(doc, model, min_sentence_length=14, top_k=4, batch_size=3):
 doc = doc.replace("\n","")
 doc_sentences = []
 for sent in nlp(doc).sents:
   if len(sent) > min_sentence_length:
     doc_sentences.append(str(sent))
 doc_id_var, doc_mask = get_tokens([doc],tokenizer)
 doc_id_var, doc_mask = doc_id_var * batch_size, doc_mask* batch_size
 doc_id_var, doc_mask = torch.tensor(doc_id_var, dtype=torch.long),torch.tensor 
 (doc_mask, dtype=torch.long)
 scores_var = []
 # run predictions using some batch size
 for i in tqdm(range(int(len(doc_sentences) / batch_size) + 1)):
   preds_var = predict(doc_sentences[i*batch_size: (i+1) * batch_size], doc)
   scores_var = scores_var + preds_var.tolist()
 sent_pred_list = [{"sentence": doc_sentences[i], "score": scores_var[i][0],
 "index":i} for i in range(len(doc_sentences))]
 sorted_sentences = sorted(sent_pred_list, key=lambda k: k['score'],
 reverse=True)
 sorted_result = sorted_sentences[:top_k]
 sorted_result = sorted(sorted_result, key=lambda k: k['index'])
 summary = [ x["sentence"] for x in sorted_result]
 summary = " ".join(summary)
 return summary, scores_var, doc_sentences


In [None]:
import torch
from transformers import BertTokenizer, BertForQuestionAnswering
# Load pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_Pre-trained(model_name)
model = BertForQuestionAnswering.from_Pre-trained(model_name)


In [None]:
def predict_answer(context, question):
    inputs = tokenizer.encode_plus(question, context, return_tensors="pt", padding=True, truncation=True)
    start_logits, end_logits = model(**inputs).start_logits, model(**inputs).end_logits
    start_idx = start_logits.argmax().item()
    end_idx = end_logits.argmax().item()
    answer_tokens = inputs["input_ids"][0][start_idx : end_idx + 1]
    answer = tokenizer.decode(answer_tokens)
    return answer

# Example usage
context = "Mount Everest is the highest peak in the world."
question = "What is the height of Mount Everest?"
predicted_answer = predict_answer(context, question)
print(f"Predicted Answer: {predicted_answer}")
