<a href="https://colab.research.google.com/github/sumeyyedemir5/nlp-preprocessing_and_textRepresentation/blob/main/Question_Answering_with_BERT_And_GPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# BERT
from transformers import BertTokenizer, BertForQuestionAnswering
import torch

import warnings
warnings.filterwarnings('ignore')

#squad veri seti üzerinde fine tuning yapılmış BERT dil modeli

model_name  = "bert-large-uncased-whole-word-masking-finetuned-squad"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForQuestionAnswering.from_pretrained(model_name)

def predict_answer(context, question):
  #token
  encoding = tokenizer.encode_plus(question, context, return_tensors="pt",max_lenght=512, truncation=True)
  #giriş tensorlar
  input_ids = encoding["input_ids"]
  attention_mask = encoding["attention_mask"]

  with torch.no_grad():
    start_scores, end_scores = model(input_ids, attention_mask = attention_mask, return_dict= False)

  # en yüksek olasılıklı başlangıç ve bitiş indeksleri
  start_index = torch.argmax(start_scores, dim=1).item()
  end_index = torch.argmax(end_scores, dim=1).item()

  #tokenları al ve cevabı çöz
  answer_tokens = tokenizer.convert_ids_to_tokens(input_ids[0][start_index:end_index+1])
  answer = tokenizer.convert_tokens_to_string(answer_tokens)

  return answer

In [None]:
question = "What is the capital of France"
context = "France, officially the French Republic, is a country whose capital is Paris"

answer = predict_answer(context, question)

print(question)
print(answer)
"""
What is the capital of France
paris

"""

In [None]:
# GPT
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

import warnings
warnings.filterwarnings('ignore')

In [None]:
model_name = 'gpt2'

tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

def generate_answer(context, question):
  input_text = f"question: {question} context: {context}. Please answer the question according to context"
  inputs = tokenizer.encode(input_text, return_tensors="pt")

  with torch.no_grad(): #training veya back-prop. yapmaması için
    outputs = model.generate(inputs, max_length=350, no_repeat_ngram_size=1) # modeli çalıştırıyor ve sonuç üretiyor

  # üretilen yanıtı decode
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

  # post processing
  answer.split("answer:")[-1].strip()
  return answer

In [None]:
question = "What is the capital of France"
context = "France, officially the French Republic, is a country whose capital is Paris"

answer = generate_answer(context, question)

print(question)
print(answer)