# Question Generation
Code taken from https://haystack.deepset.ai/tutorials/13_question_generation

In [1]:
from pprint import pprint
from tqdm.auto import tqdm
from haystack.nodes import QuestionGenerator, BM25Retriever, FARMReader
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.pipelines import RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline
from haystack.utils import print_questions

2023-07-28 18:27:53.162872: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
text1 = "Mendel postulated that genes (characteristics) are inherited as pairs of alleles (traits) that behave in a dominant and recessive pattern. Alleles segregate into gametes such that each gamete is equally likely to receive either one of the two alleles present in a diploid individual. In addition, genes are assorted into gametes independently of one another. That is, alleles are generally not more likely to segregate into a gamete with a particular allele of another gene. A dihybrid cross demonstrates independent assortment when the genes in question are on different chromosomes or distant from each other on the same chromosome. For crosses involving more than two genes, use the forked line or probability methods to predict offspring genotypes and phenotypes rather than a Punnett square."
docs = [{"content": text1}]

# Initialize document store and write in the documents
document_store = ElasticsearchDocumentStore()
# Clear documents from previous runs
document_store.delete_documents()
# Write documents from current execution
document_store.write_documents(docs)
# Initialize Question Generator
question_generator = QuestionGenerator(split_length=100, split_overlap=20, use_gpu=True)

# Question Answer Generation
Input: Documents<br>
Output: Question, Answers<br>
Procedure:
- Question generated using `valhalla/t5-base-e2e-qg`. 
- Answers for the generated question generated using `deepset/roberta-base-squad2`. 
- Both pretrained model (T5 and RoBERTa base) are finetuned on SQuADv2. 

In [2]:
reader = FARMReader("deepset/roberta-base-squad2", use_gpu=True)
qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)
for idx, document in enumerate(tqdm(document_store)):

    print(f"\n * Generating questions and answers for document {idx}: {document.content[:50]}...\n")
    result = qag_pipeline.run(documents=[document])
    print_questions(result)

NameError: name 'question_generator' is not defined

# Retriever Question Generation Pipeline
Input: Document, Query<br>
Output: QA Pairs from documents belonging to the query<br>
Procedure:
- Filter documents using BM25, TF-IDF, Embedding based retriever.
- Generate QA pairs on the filtered documents.


In [None]:
retriever = BM25Retriever(document_store=document_store)
rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)

print(f"\n * Generating questions for documents matching the query 'Arya Stark'\n")
result = rqg_pipeline.run(query="Arya Stark")
print_questions(result)