In [1]:
from datetime import datetime
import logging
from typing import List, Optional

from firebase_admin import initialize_app, firestore
from google.cloud.firestore_v1.base_query import FieldFilter
from google.cloud import firestore
import vertexai

from langchain.prompts import PromptTemplate
from langchain.llms import VertexAI
from langchain.chat_models import ChatVertexAI
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema import HumanMessage, SystemMessage, AIMessage

initialize_app(name="schoolscan-4c8d8")
firestore_client = firestore.Client(project="schoolscan-4c8d8")

In [2]:
user_id = "I6KPdRipfkPJMD5jsY8nBRHCjcI3" # "edUQCUBsomU2qLlEhtqs8EDivP73"

In [3]:
topics = firestore_client.collection("topics").where(filter=FieldFilter(f"roles.{user_id}", "==", "owner")).stream()
for topic_ref in topics:
    print(topic_ref.id)
    print(topic_ref.to_dict())

E21gEE4MtLDXqz2oXEew
{'language': 'nl', 'description': 'Hoe organiseerden de mensen zich in de eerste steden in het oude nabije oosten?', 'tags': ['Mesopotamië', 'Sumer', 'Akkadië', 'Babylon', 'handel'], 'quizStatus': 'done', 'summaryStatus': 'done', 'summary': 'The inhabitants of Mesopotamia made art to serve different purposes. The gate of Ishtar was a propaganda tool that showed the power of the king and the city-state of Babylon. The praying figurines and the amulet of Lamasjtoe had a religious function and helped people in their daily lives.', 'roles': {'I6KPdRipfkPJMD5jsY8nBRHCjcI3': 'owner', '0li2Yf3Cf7dTI1Z3IsZbG22n8Y83': 'owner', 'EZguMyHn1HXQoWLB7pyHhyLbkdj1': 'owner', 'KNKcHeL2j6eOtUx6u5bQZjc2pr43': 'owner'}, 'outlineStatus': 'done', 'extractStatus': 'done', 'outline': ' I. Introduction\n   A. Mesopotamia: A Fertile Crescent Civilization\n   B. The Development of Cities in Mesopotamia\nII. The City-State of Ur\n   A. The City of Ur\n   B. The Rulers of Ur\n   C. The Economy 

In [4]:
topic_id = "XBOZI2pkqIAWhP7LLqcH" # "E21gEE4MtLDXqz2oXEew"

In [5]:
files = firestore_client.collection(f"topics/{topic_id}/files").stream()

fulltext = ""
for document in files:
    fulltext += document.get("text")

print(fulltext)

verhaal
ANISSA HEEFT ARFID, EEN AANGEBOREN AFKEER VAN ETEN
Anissa (38): "Als kind at ik alles, tot ik vast voedsel moest leren
eten. Waar ik gepureerde papjes en melk zonder problemen at,
blokkeerde ik zodra ik brokjes proefde. Mijn moeder begreep er
niets van en zat met de handen in het haar. Mijn broer en zus aten
alles, ik niets. Keer op keer bleef ze me hetzelfde voorschotelen
als hen, keer op keer weigerde ik te eten. Ze ging ervoor naar de
dokter, dacht dat ze iets verkeerd deed in haar opvoeding. Maar
niemand kon haar, en mij, helpen. Op de duur gaf ze de strijd op.
Als ik maar iets at. Andere ouders zeiden dat ze me verwende, dat
ze me de dag erna gewoon hetzelfde bord moest voorschotelen
als ik niets at. Maar mijn ouders hebben alles geprobeerd: of het
nu ongezonde McDonalds was of gezonde groenten, ik moest het
allemaal niet hebben.
"IK WIL OP VOORHAND
WETEN HOE HET ETEN
IN MIJN MOND ZAL
VOELEN. VOORAL VAN
ZACHTE TEXTUREN
GRUWEL IK”
18 Ubelle
mijn mond zal voelen. Dat een tom

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=16000, chunk_overlap=50)
texts = text_splitter.split_text(fulltext)
print(texts)
print(texts.__len__())

['verhaal\nANISSA HEEFT ARFID, EEN AANGEBOREN AFKEER VAN ETEN\nAnissa (38): "Als kind at ik alles, tot ik vast voedsel moest leren\neten. Waar ik gepureerde papjes en melk zonder problemen at,\nblokkeerde ik zodra ik brokjes proefde. Mijn moeder begreep er\nniets van en zat met de handen in het haar. Mijn broer en zus aten\nalles, ik niets. Keer op keer bleef ze me hetzelfde voorschotelen\nals hen, keer op keer weigerde ik te eten. Ze ging ervoor naar de\ndokter, dacht dat ze iets verkeerd deed in haar opvoeding. Maar\nniemand kon haar, en mij, helpen. Op de duur gaf ze de strijd op.\nAls ik maar iets at. Andere ouders zeiden dat ze me verwende, dat\nze me de dag erna gewoon hetzelfde bord moest voorschotelen\nals ik niets at. Maar mijn ouders hebben alles geprobeerd: of het\nnu ongezonde McDonalds was of gezonde groenten, ik moest het\nallemaal niet hebben.\n"IK WIL OP VOORHAND\nWETEN HOE HET ETEN\nIN MIJN MOND ZAL\nVOELEN. VOORAL VAN\nZACHTE TEXTUREN\nGRUWEL IK”\n18 Ubelle\nmijn mond

In [8]:
vertexai.init(project="schoolscan-4c8d8", location="us-central1")

In [9]:
llm = VertexAI(
            model_name="text-bison",
            max_output_tokens=2048,
            temperature=0.2,
            top_p=0.7,
            top_k=40,
            n=1,
        )

In [10]:
class Question(BaseModel):
    """
    `questions` children
    """

    type: str = Field(
        description="the type of question (multiple_choice, multiple_choice_multi, connect_terms or free_text)"
    )
    question: Optional[str] = Field(description="the question")
    choices: Optional[List[str]] = Field(
        description="the choices for a multiple_choice question or a multiple_choice_multi question (only include field for multiple_choice questions), should have at least 3 values (choices should have meaning and be short and concise)"
    )
    left_column: Optional[List[str]] = Field(
        description="the left column for a connect_terms question (only include field for connect_terms questions), should have at least 3 values (choices should have meaning and be short and concise)"
    )
    right_column: Optional[List[str]] = Field(
        description="the right column for a connect_terms question (only include field for connect_terms questions), should have at least 3 values (same amount as left column) (choices should have meaning and be short and concise)"
    )
    answer: str = Field(
        description="the exact correct answer in case of multiple_choice and free_text. in case of connect_terms, it's the combination of the index of the left and write column with a hyphen, separated by a comma eg. '1-3,2-2,3-1'. in case of multiple_choice_multi, it's all correct answers separated by a comma eg. 'first correct answer,second correct answer'. this field is always required!"
    )

class TopicQuestions(BaseModel):
    questions: List[Question] = Field(default=[], description="the list of questions")

In [11]:
template = """
Generate a quiz with 5 questions that test a reader's comprehension of the following text.
The quiz you generate will have 5 question items and you can have 4 types of question items: 
    1.Multiple choice (multiple_choice): provide at least 3 choices per question and provide the correct answer (exact).
    2.Multiple choice with multiple answers (multiple_choice_multi): provide at least 3 choices per question and provide the correct answers, separated by commas (a potential correct value for answer could be 'foo,bar,test').
    3.Connect relevant terms (connect_terms): at least 3 terms in a random order in 1 column and at least 3 terms in a random order in the other column. The person that takes the test must select a matching term in each column.
    4.A free text question (free_text). Make sure to ask a question of which the answer can be found in the provided text, and make sure to provide the correct answer in the answer field. 'What do you think of ...?' is not a good question! There should be maximum 1 question of this type.
For each question, you also need to provide the correct answer. Make sure that the correct answer is exactly the same as the value of the choice (for connect_terms it should format a string with the indexes of the answers for each column '1-3,2-2,3-1').
The question should be concise and clear. The question itself should not list possible choices. The quiz should be sufficiently difficult and should contain at least 1 of each question type.
Questions, choices and answers should always be short and concise: answers should never be more than 3 words. The questions and answers should only take the text into account, nothing else.

The values of the name, description, questions, choices, answers should all be in the same language as the text.
Make sure that all output is in the same language as the text (all field values).

{format_instructions}

Text:
{input}

Json:"""

In [12]:
output_parser = PydanticOutputParser(pydantic_object=TopicQuestions)
format_instructions = output_parser.get_format_instructions()
prompt = PromptTemplate(
    input_variables=["input"],
    partial_variables={"format_instructions": format_instructions},
    template=template,
)
final_prompt = prompt.format(input=fulltext)
print(final_prompt)


Generate a quiz with 5 questions that test a reader's comprehension of the following text.
The quiz you generate will have 5 question items and you can have 4 types of question items: 
    1.Multiple choice (multiple_choice): provide at least 3 choices per question and provide the correct answer (exact).
    2.Multiple choice with multiple answers (multiple_choice_multi): provide at least 3 choices per question and provide the correct answers, separated by commas (a potential correct value for answer could be 'foo,bar,test').
    3.Connect relevant terms (connect_terms): at least 3 terms in a random order in 1 column and at least 3 terms in a random order in the other column. The person that takes the test must select a matching term in each column.
    4.A free text question (free_text). Make sure to ask a question of which the answer can be found in the provided text, and make sure to provide the correct answer in the answer field. 'What do you think of ...?' is not a good question!

In [13]:
res_text = llm(final_prompt)
print(res_text)
res = output_parser.parse(res_text)

 {"questions": [{"type": "multiple_choice", "question": "Wat is de naam van de eetstoornis waar Anissa aan lijdt?", "choices": ["Arfid", "Anorexia nervosa", "Boulimia nervosa", "Pica"], "answer": "Arfid"}, {"type": "multiple_choice_multi", "question": "Welke van de volgende voedingsmiddelen lust Anissa wel?", "choices": ["Appels", "Tomaten", "Pizza", "Soep", "Vis", "Vlees"], "answer": "Appels,Pizza,Vis,Vlees"}, {"type": "connect_terms", "question": "Verbind de eetmomenten met de bijbehorende maaltijden", "left_column": ["ontbijt", "lunch", "avondeten"], "right_column": ["boterhammen met choco", "pizza", "vol-au-vent"], "answer": "1-1,2-3,3-2"}, {"type": "multiple_choice", "question": "Wat is de reden dat Anissa moeite heeft met eten?", "choices": ["Ze heeft een trauma met eten gehad", "Ze heeft autisme", "Ze heeft een aangeboren afkeer van eten", "Ze is verwend"], "answer": "Ze heeft een aangeboren afkeer van eten"}, {"type": "free_text", "question": "Wat is de naam van de Facebookgroe

In [14]:
more_template = """
Generate a quiz with 5 questions that test a reader's comprehension of the following text.
The quiz you generate will have 5 question items and you can have 4 types of question items: 
    1.Multiple choice (multiple_choice): provide at least 3 choices per question and provide the correct answer (exact).
    2.Multiple choice with multiple answers (multiple_choice_multi): provide at least 3 choices per question and provide the correct answers, separated by commas (a potential correct value for answer could be 'foo,bar,test').
    3.Connect relevant terms (connect_terms): at least 3 terms in a random order in 1 column and at least 3 terms in a random order in the other column. The person that takes the test must select a matching term in each column.
    4.A free text question (free_text). Make sure to ask a question of which the answer can be found in the provided text, and make sure to provide the correct answer in the answer field. 'What do you think of ...?' is not a good question! There should be maximum 1 question of this type.
For each question, you also need to provide the correct answer. Make sure that the correct answer is exactly the same as the value of the choice (for connect_terms it should format a string with the indexes of the answers for each column '1-3,2-2,3-1').
The question should be concise and clear. The question itself should not list possible choices. The quiz should be sufficiently difficult and should contain at least 1 of each question type.
Questions, choices and answers should always be short and concise: answers should never be more than 3 words. The questions and answers should only take the text into account, nothing else.

The values of the name, description, questions, choices, answers should all be in the same language as the text.
Make sure that all output is in the same language as the text (all field values).

{format_instructions}

Text:
{input}

Json:"""

In [15]:
output_parser = PydanticOutputParser(pydantic_object=TopicQuestions)
new_prompt = PromptTemplate(
    input_variables=["input", "existing_questions"],
    partial_variables={"format_instructions": format_instructions},
    template=more_template,
)
final_new_prompt = new_prompt.format(input=fulltext, existing_questions=res_text)
print(final_new_prompt)

ValidationError: 1 validation error for PromptTemplate
__root__
  Invalid prompt schema; check for mismatched or missing input parameters. {'existing_questions'} (type=value_error)

In [None]:
new_res_text = llm(final_new_prompt)
print(new_res_text)
res = output_parser.parse(new_res_text)