In [1]:
import transformers
import datasets
import torch
from nltk import sent_tokenize
from helper_functions import _prepare_inputs_for_e2e_qg_from_answers_hl

In [2]:
e2e_qg_model = transformers.AutoModelForSeq2SeqLM.from_pretrained('output_models/model_e2e_qg_hl_t5')
tokenizer = transformers.AutoTokenizer.from_pretrained('tokenizers/t5_qg_tokenizer')

In [3]:
# texts = ["Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary."]

def generate_e2e_question(texts):
    texts = _prepare_inputs_for_e2e_qg_from_answers_hl([texts])
    print(texts)

    e2e_encodings = tokenizer.batch_encode_plus(texts, padding=True)

    e2e_output_ids = e2e_qg_model.generate(
        torch.tensor(e2e_encodings['input_ids']),
        attention_mask = torch.tensor(e2e_encodings['attention_mask']),
        max_length = 256,
        num_beams = 4,
        length_penalty = 1.4,
        no_repeat_ngram_size=3,
        early_stopping = True
    )

    e2e_question_texts = tokenizer.batch_decode(
        e2e_output_ids,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True)
    
    e2e_question_texts=[q.replace('<sep>','') for q in e2e_question_texts]

    output_text=''
    for text in e2e_question_texts:
        output_text = output_text + text + '\n'

    return output_text


In [4]:
import gradio as gr

context_in = gr.inputs.Textbox(lines=20, label='Context')
question_out = gr.outputs.Textbox()

demo = gr.Interface(
    generate_e2e_question,
    context_in,
    question_out,
    theme='huggingface',
    layout='horizontal',
    title="E2E question generation based on T5")

demo.launch(share=True)

Running locally at: http://127.0.0.1:7861/
This share link will expire in 24 hours. If you need a permanent link, visit: https://gradio.app/introducing-hosted (NEW!)
Running on External URL: https://21910.gradio.app
Interface loading below...


(<Flask 'gradio.networking'>,
 'http://127.0.0.1:7861/',
 'https://21910.gradio.app')

["generate questions: This is a list of seasons completed by the Buffalo Bulls football team of the National Collegiate Athletic Association (NCAA) Division I Football Bowl Subdivision (FBS). Buffalo's first football team was fielded in 1894.\nBuffalo originally competed as a football independent. Following the 1970 season, Buffalo's football team was discontinued for six seasons, before being reinstated as a Division III team in 1977. Buffalo competed as a I-AA team for six seasons before joining the I-A's Mid-American Conference in 1999, of which it has been a member since."]


To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
