In [95]:
from langchain.llms import Ollama
import re

In [96]:
qustion_types = {
    "confirmation": "Focus only on confirmation questions, i.e. questions that can be answered with a yes or no.",
    "factoid": "Focus only on factoid questions, that usually begin with a who, what, where, when, why, or how.",
    "list": "Focus only on list questions, i.e. questions that are answered with a list of items.",
    "causal": "Focus only on causal questions, i.e. questions that begin with why or how.",
    "hypothetical": "Focus only on hypothetical questions, i.e. questions that ask what if.",
    "complex": "Focus only on complex questions, i.e. questions that require multi-step reasoning and comparisons.",
    "default": ""
}

In [97]:
def generate_question_answer_pairs(context, prompt=None, question_type=None, n=1, verbose=False):
    if prompt is None:
        
        question_type_prompt = qustion_types.get(question_type, qustion_types["default"])

        prompt = f'You are Mistral-Jeopardy, an intelligent AI agent that guesses questions that would answered by a particular exerpt of text. \
            You are given the following exerpt of text:\n\n```\n{context}```\n\nGenerate {n} questions that would be answered by the exerpt of text. {question_type_prompt} \
            Format the pairs as follows: `{{QUESTION i}}: <question i> {{ANSWER i}}: <answer i>`. Do not deviate from this format, since it will be used to extract the questions with a regex.\n\n'

    # for token in ollama.stream(prompt):
    #     print(token, end="")
        
    # Clear the message history
    ollama = Ollama(
        base_url="http://localhost:11434",
        model="llama2",
        verbose=True,
        stop=["<|im_end|>"]
    )

    # Generate the question-answer pairs
    qa_pairs = ollama(prompt)

    # Filter out the question-answer pairs
    qa_pairs = re.findall(r'{QUESTION \d+}: .+ {ANSWER \d+}: .+', qa_pairs)

    questions = []
    answers = []

    for qa_pair in qa_pairs:
        question, answer = qa_pair.split("{ANSWER")[0].split("}: ")[1], qa_pair.split("{ANSWER")[1].split("}: ")[1]
        questions.append(question)
        answers.append(answer)

    if len(questions) == 0 or len(answers) == 0:
        raise Exception(f"No question-answer pairs were generated: {qa_pairs}")

    return questions, answers

In [101]:
example_document = """

EUR-Lex - 32001Y0123(02) - EN
Avis juridique important
|
32001Y0123(02)
Commission opinion of 20 December 2000 concerning the plan for the disposal of radioactive waste from the commissioning of the liquid metal disposal plant (LMDP) and the waste receipt, assay, characterisation and supercompaction facility (WRACS) located on the Dounreay nuclear site in Scotland (United Kingdom), in accordance with Article 37 of the Euratom Treaty  
Official Journal C 020 , 23/01/2001 P. 0004 - 0004 
Commission opinionof 20 December 2000concerning the plan for the disposal of radioactive waste from the commissioning of the liquid metal disposal plant (LMDP) and the waste receipt, assay, characterisation and supercompaction facility (WRACS) located on the Dounreay nuclear site in Scotland (United Kingdom), in accordance with Article 37 of the Euratom Treaty(2001/C 20/03)(Only the English text is authentic)On 8 June 2000 the European Commission received from the United Kingdom Government, in accordance with Article 37 of the Euratom Treaty, general data relating to the plan for the disposal of radioactive waste resulting from the commissioning of the liquid metal disposal plant (LMDP) and the waste receipt, assay, characterisation and supercompaction facility (WRACS).On the basis of these data and clarifications subsequently provided by the United Kingdom Government, and following consultation with the group of experts, the Commission has drawn up the following opinion:(a) The distance between the plant and the nearest point of another Member State, in this case Denmark (Faeroe Islands), is approximately 370 km;(b) Under normal operating conditions, the discharges of liquid and gaseous effluents will not cause an exposure of the population in other Member States that is significant from the point of view of health;(c) Solid low and intermediate level radioactive waste arising from the operations of the LMDP and WRACS will be stored on-site. Off-site movement of waste is not currently envisaged;(d) In the event of unplanned discharges of radioactive waste, which may follow an accident on the scale considered in the General Data, the doses likely to be received by the population in other Member States would not be significant from the point of view of health.In conclusion, the Commission is of the opinion that the implementation of the plan for the disposal of radioactive waste, resulting from the commissioning of the liquid metal disposal plant (LMDP) and the waste receipt, assay, characterisation and supercompaction facility (WRACS), both in normal operation and in the event of an accident of the type and magnitude considered in the General Data, is not liable to result in the radioactive contamination, significant from the point of view of health, of the water, soil or airspace of another Member State. 
"""
len(example_document)

2841

In [99]:
qa_pairs = generate_question_answer_pairs(example_document, n=5)

for i, (question, answer) in enumerate(zip(*qa_pairs)):
    print(f"Question {i+1}: {question}\nAnswer {i+1}: {answer}\n")

Question 1: What is the distance between the plant and the nearest point of another Member State? 
Answer 1: Approximately 370 km.

Question 2: Under normal operating conditions, will the discharges of liquid and gaseous effluents cause an exposure of the population in other Member States that is significant from the point of view of health? 
Answer 2: No, the doses likely to be received by the population in other Member States would not be significant from the point of view of health.

Question 3: In the event of unplanned discharges of radioactive waste, what are the likely doses that would be received by the population in other Member States? 
Answer 3: The doses likely to be received by the population in other Member States would not be significant from the point of view of health.

Question 4: What is the storage location for solid low and intermediate level radioactive waste arising from the operations of the LMDP and WRACS? 
Answer 4: Solid low and intermediate level radioactive

In [100]:
for question_type in qustion_types:
    qa_pairs = generate_question_answer_pairs(example_document, question_type=question_type, n=5)

    print(f"Question type: {question_type}\n")
    for i, (question, answer) in enumerate(zip(*qa_pairs)):
        print(f"Question {i+1}: {question}\nAnswer {i+1}: {answer}\n")

Question type: confirmation

Question 1: Will the discharges of liquid and gaseous effluents from the plant cause an exposure of the population in other Member States that is significant from the point of view of health? 
Answer 1: No.

Question 2: Is solid low and intermediate level radioactive waste arising from the operations of the LMDP and WRACS stored on-site? 
Answer 2: Yes.

Question 3: In the event of unplanned discharges of radioactive waste, would the doses likely to be received by the population in other Member States be significant from the point of view of health? 
Answer 3: No.

Question 4: Is the implementation of the plan for the disposal of radioactive waste resulting from the commissioning of the liquid metal disposal plant (LMDP) and the waste receipt, assay, characterisation and supercompaction facility (WRACS), both in normal operation and in the event of an accident of the type and magnitude considered in the General Data, liable to result in the radioactive cont