In [None]:

from torch.nn import functional as F

from transformers import (BertTokenizer, BertForMaskedLM, BertForNextSentencePrediction,
                        AutoModelForQuestionAnswering,AutoTokenizer, AutoModelForSeq2SeqLM, 
                        LEDTokenizer, LEDForConditionalGeneration)




In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

## Encoding

In [None]:
text = '08:59: waiting for my team to join the call'
encoding = tokenizer.encode_plus(text, add_special_tokens = True, truncation = True, padding = "max_length", return_attention_mask = True, return_tensors = "pt")
encoding

## Masked

In [None]:
masked_model = BertForMaskedLM.from_pretrained('bert-base-uncased', return_dict = True)

In [None]:
text = "The Opera House in Australia is in , " + tokenizer.mask_token + " city"

input = tokenizer.encode_plus(text, return_tensors = "pt")
mask_index = torch.where(input["input_ids"][0] == tokenizer.mask_token_id)

In [None]:
output = masked_model(**input)
softmax = F.softmax(output.logits, dim = -1)
mask_word = softmax[0, mask_index, :]
top_10 = torch.topk(mask_word, 3, dim = 1)[1][0]

In [None]:
for token in top_10:
   word = tokenizer.decode([token])
   new_sentence = text.replace(tokenizer.mask_token, word)
   print(new_sentence)

## Next sentence prediction

In [None]:
nsp_model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')

In [None]:
prompt = "Incredible journey, Isha! Your dedication and teamwork shine through this experience."

next_sentence = "It's inspiring to see how you're using technology for such a meaningful cause."

encoding = tokenizer.encode_plus(prompt, next_sentence, return_tensors='pt')
outputs = nsp_model(**encoding)[0]
F.softmax(outputs, dim = 1)

In [None]:
prompt = "Incredible journey, Isha! Your dedication and teamwork shine through this experience."

next_sentence = "80% of chronic diseases are preventable. "

encoding = tokenizer.encode_plus(prompt, next_sentence, return_tensors='pt')
outputs = nsp_model(**encoding)[0]
F.softmax(outputs, dim = 1)

## Question Answer
### barely acceptable 

In [None]:
model_name = "deepset/bert-base-cased-squad2"
qa_model = AutoModelForQuestionAnswering.from_pretrained(model_name)
qa_tokeniser = AutoTokenizer.from_pretrained(model_name)

In [None]:
context = "My name is Clara and I live in Berkeley."

question = "Where do I live?"

# We can use our tokenizer to automatically generate 2 sentence by passing the
# two sequences to tokenizer as two arguments
tokenized_inputs = qa_tokeniser(question, context, return_tensors="pt")
tokenized_inputs

with torch.no_grad():
    outputs = qa_model(**tokenized_inputs)

answer_start_index = outputs.start_logits.argmax()
answer_end_index = outputs.end_logits.argmax()

''' start_logits (torch.FloatTensor of shape (batch_size, sequence_length)) — Span-start scores (before SoftMax).

end_logits (torch.FloatTensor of shape (batch_size, sequence_length)) — Span-end scores (before SoftMax). '''

predict_answer_tokens = tokenized_inputs.input_ids[0, answer_start_index : answer_end_index + 1]
qa_tokeniser.decode(predict_answer_tokens)
     
     

## Bert Text Generation (Dual Bert Architecture)
### Don't Use it


In [None]:
b2b_tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
model = AutoModelForSeq2SeqLM.from_pretrained("google/roberta2roberta_L-24_discofuse")

In [None]:
discofuse = """As a run-blocker, Zeitler moves relatively well. Zeitler often struggles at the point of contact in space."""

input_ids = b2b_tokenizer(discofuse, return_tensors="pt").input_ids
output_ids = model.generate(input_ids)[0]
print(tokenizer.decode(output_ids, skip_special_tokens=True))