## Model Demo

- run the following cell to download dependencies and model

In [None]:
#@title just run this (it takes about 5 minutes)
!pip install -q transformers
!pip install -q sentencepiece

from tqdm import tqdm
from IPython.display import clear_output
import torch
from transformers import AutoModelForQuestionAnswering, AutoTokenizer

model_name = 'SajjadAyoubi/xlm-roberta-large-fa-qa'
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
clear_output()

# https://github.com/sajjjadayobi/PersianQA/blob/main/src/utils.py
from tqdm import tqdm
import torch

class AnswerPredictor:
  def __init__(self, model, tokenizer, device='cuda', n_best=10, max_length=512, stride=256, no_answer=False):
      """Initializes PyTorch Question Answering Prediction
      It's best to leave use the default values.
      Args:
          model: Fine-tuned torch model
          tokenizer: Transformers tokenizer
          device (torch.device): Running device
          n_best (int): Number of best possible answers
          max_length (int): Tokenizer max length
          stride (int): Tokenizer stride
          no_answer (bool): If True, model can return "no answer"
      """
      self.model = model.eval().to(device)
      self.tokenizer = tokenizer
      self.device = device
      self.max_length = max_length
      self.stride = stride
      self.no_answer = no_answer
      self.n_best = n_best


  def model_pred(self, questions, contexts, batch_size=1):
      n = len(contexts)
      if n%batch_size!=0:
          raise Exception("batch_size must be divisible by sample length")

      tokens = self.tokenizer(questions, contexts, add_special_tokens=True, 
                              return_token_type_ids=True, return_tensors="pt", padding=True, 
                              return_offsets_mapping=True, truncation="only_second", 
                              max_length=self.max_length, stride=self.stride)

      start_logits, end_logits = [], []
      for i in tqdm(range(0, n-batch_size+1, batch_size)):
          with torch.no_grad():
              out = self.model(tokens['input_ids'][i:i+batch_size].to(self.device), 
                          tokens['attention_mask'][i:i+batch_size].to(self.device), 
                          tokens['token_type_ids'][i:i+batch_size].to(self.device))

              start_logits.append(out.start_logits)
              end_logits.append(out.end_logits)

      return tokens, torch.stack(start_logits).view(n, -1), torch.stack(end_logits).view(n, -1)


  def __call__(self, questions, contexts, batch_size=1, answer_max_len=100):
      """Creates model prediction
      
      Args: 
          questions (list): Question strings
          contexts (list): Contexts strings
          batch_size (int): Batch size
          answer_max_len (int): Sets the longests possible length for any answer
        
      Returns:
          dict: The best prediction of the model
              (e.g {0: {"text": str, "score": int}})
      """
      tokens, starts, ends = self.model_pred(questions, contexts, batch_size=batch_size)
      start_indexes = starts.argsort(dim=-1, descending=True)[:, :self.n_best]
      end_indexes = ends.argsort(dim=-1, descending=True)[:, :self.n_best]

      preds = {}
      for i, (c, q) in enumerate(zip(contexts, questions)):  
          min_null_score = starts[i][0] + ends[i][0] # 0 is CLS Token
          start_context = tokens['input_ids'][i].tolist().index(self.tokenizer.sep_token_id)
          
          offset = tokens['offset_mapping'][i]
          valid_answers = []
          for start_index in start_indexes[i]:
              # Don't consider answers that are in questions
              if start_index<start_context:
                  continue
              for end_index in end_indexes[i]:
                  # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
                  # to part of the input_ids that are not in the context.
                  if (start_index >= len(offset) or end_index >= len(offset)
                      or offset[start_index] is None or offset[end_index] is None):
                      continue
                  # Don't consider answers with a length that is either < 0 or > max_answer_length.
                  if end_index < start_index or (end_index-start_index+1) > answer_max_len:
                      continue

                  start_char = offset[start_index][0]
                  end_char = offset[end_index][1]
                  valid_answers.append({"score": (starts[i][start_index] + ends[i][end_index]).item(),
                                        "text": c[start_char: end_char]})
                  
          if len(valid_answers) > 0:
              best_answer = sorted(valid_answers, key=lambda x: x["score"], reverse=True)[0]
          else:
              best_answer = {"text": "", "score": min_null_score}

          if self.no_answer:
              preds[i] = best_answer if best_answer["score"] >= min_null_score else {"text": "", "score": min_null_score}
          else:
              preds[i] = best_answer

      return preds

predictor = AnswerPredictor(model, tokenizer, device='cuda', n_best=10, no_answer=True)

In [None]:
#@title Paste your Context and ask a Question
context = "من سجاد ایوبی هستم. به پردازش زبان طبیعی علاقه دارم" #@param {type:"string"}
question = "به چی علاقه دارم؟" #@param {type:"string"}

preds = predictor([question], [context], batch_size=1)

print('\n\n Model Prediction: ', preds[0]['text'].strip())

100%|██████████| 1/1 [00:00<00:00, 50.70it/s]



 Model Prediction:  پردازش زبان طبیعی





## Dataset Demo

- an example from the Dataset
  - from this [wikipedia](https://fa.wikipedia.org/wiki/%D8%B9%D8%A8%D8%A7%D8%B3_%D8%A8%D8%A7%D8%A8%D8%A7%DB%8C%DB%8C)
  - 7 questions with answers 
  - 3 questions without any answers

```bash
{
      "title": "عباس بابایی",
      "paragraphs": [
        {
          "qas": [
            {
              "question": "عباس بابایی دقیقا چه شخصی هست؟",
              "sign": "",
              "is_impossible": false,
              "answers": [
                {
                  "answer_start": 12,
                  "answer_end": 123,
                  "text": "سرتیپ خلبان نورثروپ اف-۵ و اف - ۱۴ تام‌کت نیروی هوایی ارتش جمهوری اسلامی ایران و معاون عملیات فرماندهی این نیرو"
                }
              ]
            },
            {
              "question": "عباس بابایی معاون عملیات چه سازمانی بود؟",
              "sign": "",
              "is_impossible": false,
              "answers": [
                {
                  "answer_start": 54,
                  "answer_end": 90,
                  "text": "نیروی هوایی ارتش جمهوری اسلامی ایران"
                }
              ]
            },
            {
              "question": "عباس بابایی در چه رشته ای تحصیلات خود را تکمیل کرد؟",
              "sign": "",
              "is_impossible": false,
              "answers": [
                {
                  "answer_start": 322,
                  "answer_end": 340,
                  "text": "خلبانی نیروی هوایی"
                }
              ]
            },
            {
              "question": "عباس بابایی چرا رشته پزشکی را ادامه نداد؟",
              "sign": "",
              "is_impossible": false,
              "answers": [
                {
                  "answer_start": 276,
                  "answer_end": 299,
                  "text": "به دلیل علاقه به خلبانی"
                }
              ]
            },
            {
              "question": "چرا عباس بابایی یه دوره ای رفته بود آمریکا",
              "sign": "",
              "is_impossible": false,
              "answers": [
                {
                  "answer_start": 373,
                  "answer_end": 392,
                  "text": "برای تکمیل تحصیلاتش"
                }
              ]
            },
            {
              "question": "جنازه عباس بابایی کجا دفن شده؟",
              "sign": "",
              "is_impossible": false,
              "answers": [
                {
                  "answer_start": 989,
                  "answer_end": 1029,
                  "text": "در گلزار شهدا در جنوب شاهزاده حسین قزوین"
                }
              ]
            },
            {
              "question": "عباس بابایی برای چه مأموریتی رفته بود که کشته شد؟",
              "sign": "",
              "is_impossible": false,
              "answers": [
                {
                  "answer_start": 559,
                  "answer_end": 606,
                  "text": "شناسایی منطقه و تعیین راه کار صحیح اجرای عملیات"
                }
              ]
            },
            {
              "question": "چرا سپاه خودی عباس بابایی را کشت؟",
              "sign": "",
              "is_impossible": true,
              "answers": []
            },
            {
              "question": "وقتی عباس بابایی ۳۷ سال داشت در چه عملیاتی پیروز شد؟",
              "sign": "",
              "is_impossible": true,
              "answers": []
            },
            {
              "question": "سرهنگ علی محمد نادری چه نسبتی با عباس بابایی داشت",
              "sign": "",
              "is_impossible": true,
              "answers": []
            }
          ],
```