In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '4'
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = 'true'

import transformers
import evaluate
import torch
import operator
import re
import sys
import collections
import string
import contextlib

import numpy as np
import pandas as pd
import torch.nn as nn

from multiprocessing import cpu_count
from nusacrowd import NusantaraConfigHelper
from datetime import datetime
from tqdm import tqdm
from deep_translator import GoogleTranslator
from huggingface_hub import HfApi, create_repo

from datasets import (
    load_dataset, 
    Dataset,
    DatasetDict
)
from transformers import (
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
    BertForQuestionAnswering,
    AutoTokenizer,
    EarlyStoppingCallback,
    AutoModelForQuestionAnswering,
    pipeline
)

In [2]:
tokenizer_kwargs = {'truncation': True, 'max_length': 512}
MODEL_QA_NAME = "muhammadravi251001/fine-tuned-DatasetQAS-IDK-MRC-with-xlm-roberta-large-without-ITTL-without-freeze-LR-1e-05"

pipeline_qa = pipeline(task="question-answering", model=MODEL_QA_NAME, tokenizer=MODEL_QA_NAME, 
                device=torch.cuda.current_device())

tokenizer_qa = AutoTokenizer.from_pretrained(MODEL_QA_NAME)
model_qa = AutoModelForQuestionAnswering.from_pretrained(MODEL_QA_NAME)

In [3]:
def forward_qa_uncomment(question, context):
    
    model_qa.eval()
    
    inputs = tokenizer_qa(question, context, 
                          return_tensors="pt",
                          **tokenizer_kwargs)
    
    outputs = model_qa(**inputs)
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits
    
    #max_start_logit = torch.max(start_logits).item()
    #max_end_logit = torch.max(end_logits).item()
    #min_max_range = max_end_logit - max_start_logit
    
    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits)
    answer_tokens = inputs["input_ids"][0][start_index : end_index + 1]
    answer = tokenizer_qa.decode(answer_tokens)

    #start_score = (torch.max(start_logits).item() - max_start_logit) / min_max_range
    #end_score = (torch.max(end_logits).item() - max_start_logit) / min_max_range
    #score = (start_score + end_score) / 2.0
    
    #return {'score': score, 'start': start_index.item(), 'end': end_index.item(), 'answer': answer}
    return answer

In [4]:
def forward_qa(question, context):
    
    model_qa.eval()
    
    inputs = tokenizer_qa(question, context, 
                          return_tensors="pt",
                          **tokenizer_kwargs)
    with torch.no_grad():
        outputs = model_qa(**inputs)
    
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits
    
    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits)
    answer_tokens = inputs["input_ids"][0][start_index : end_index + 1]
    answer = tokenizer_qa.decode(answer_tokens)
    
    return answer

In [5]:
def forward_qa_grad(question, context):
    
    model_qa.eval()
    
    inputs = tokenizer_qa(question, context, 
                          return_tensors="pt",
                          **tokenizer_kwargs)
    outputs = model_qa(**inputs)
    
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits
    
    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits)
    answer_tokens = inputs["input_ids"][0][start_index : end_index + 1]
    answer = tokenizer_qa.decode(answer_tokens)
    
    return answer

In [6]:
context1 = "Mikail (Arab: ميكائيل) adalah malaikat yang mengatur air, menurunkan hujan/petir, membagikan rezeki pada manusia, tumbuh-tumbuhan juga hewan-hewan dan lain-lain di muka bumi ini. Dikatakan setiap satu makhluk yang memerlukan rezeki untuk hidup di dunia ini akan diselia rezekinya oleh satu malaikat Karubiyyuun."
question1 = "Apa tugas Malaikat Mikhael di luar Islam?"

context2 = "Gagasan atau peluang adalah istilah yang dipakai baik secara populer maupun dalam bidang filsafat dengan pengertian umum \"citra mental\" atau \"pengertian\". Terutama Plato adalah eksponen pemikiran seperti ini."
question2 = "apakah yang dimaksud dengan gagasan?"

context3 = "Pornografi dalam rancangan pertama didefinisikan sebagai \"substansi dalam media atau alat komunikasi yang dibuat untuk menyampaikan gagasan-gagasan yang mengeksploitasi seksual, kecabulan, dan/atau erotika\" sementara pornoaksi adalah \"perbuatan mengeksploitasi seksual, kecabulan, dan/atau erotika di muka umum\"."
question3 = "Apa yang dimaksud dengan pornoaksi ?"

In [8]:
def show_answer(context, question):
    
    answer_pipeline = pipeline_qa(question, context)['answer']
    answer_forward_no_grad = forward_qa(question, context)
    answer_forward_grad = forward_qa_grad(question, context)
    
    print(f"CONTEXT:")
    print(context)
    print()
    
    print(f"QUESTION:")
    print(question)
    print()
    
    print("This answer is from PIPELINE:")
    print(answer_pipeline)
    print()
    
    
    print("This answer is from MODEL FORWARD NO grad:")
    print(answer_forward_no_grad)
    print()
    
    print("This answer is from MODEL FORWARD WITH grad:")
    print(answer_forward_grad)
    print()

In [13]:
show_answer(context1, question1)

CONTEXT:
Mikail (Arab: ميكائيل) adalah malaikat yang mengatur air, menurunkan hujan/petir, membagikan rezeki pada manusia, tumbuh-tumbuhan juga hewan-hewan dan lain-lain di muka bumi ini. Dikatakan setiap satu makhluk yang memerlukan rezeki untuk hidup di dunia ini akan diselia rezekinya oleh satu malaikat Karubiyyuun.

QUESTION:
Apa tugas Malaikat Mikhael di luar Islam?

This answer is from PIPELINE:
Mikail

This answer is from MODEL FORWARD NO grad:


This answer is from MODEL FORWARD WITH grad:


