<a href="https://colab.research.google.com/github/mambabhi/indic-learn/blob/main/indic_story_english_quiz_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!mkdir -p indic-quiz/backend

In [5]:
%%writefile indic-quiz/backend/haystack_pipeline.py

Writing indic-quiz/backend/haystack_pipeline.py


In [6]:
!pip install haystack-ai langfuse-haystack langfuse groq surf json-repair datasets



In [7]:
import json
import json_repair
import os
from pathlib import Path
from typing import List, Dict
from getpass import getpass

In [9]:
from haystack import Pipeline, component
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders import PromptBuilder
from haystack.components.websearch.serper_dev import SerperDevWebSearch
from haystack.utils import Secret

import requests
import pandas as pd
from pprint import pprint

Gets API keys from environment variables or user input.

In [12]:
if "GROQ_API_KEY" not in os.environ:
    os.environ["GROQ_API_KEY"] = getpass("Enter Groq API key:")

Enter Groq API key:··········


In [13]:
if "SERPERDEV_API_KEY" not in os.environ:
    os.environ["SERPERDEV_API_KEY"] = getpass("Enter Serper Dev key:")

Enter Serper Dev key:··········


In [10]:
@component
class QuizParser:
    """Parses the quiz JSON out of the LLM's response."""
    @component.output_types(quiz=Dict)
    def run(self, replies: List[str]):
        reply = replies[0]
        first_index = min(reply.find("{"), reply.find("["))
        last_index = max(reply.rfind("}"), reply.rfind("]")) + 1
        json_portion = reply[first_index:last_index]

        try:
            quiz = json.loads(json_portion)
        except json.JSONDecodeError:
            quiz = json_repair.loads(json_portion)
        if isinstance(quiz, list):
            quiz = quiz[0]
        return quiz

In [33]:
def build_english_quiz_pipeline():
    pipeline = Pipeline()
    pipeline.add_component("websearch", SerperDevWebSearch(top_k=5))
    pipeline.add_component(
        "prompt_builder",
        PromptBuilder(
            template="""
            Given the following - {{text}} - in English language, create 5 multiple choice quizzes in JSON format in English language.
            Each question should have 4 different options, and only one of them should be correct.
            The options should be unambiguous.
            Each option should begin with a letter followed by a period and a space (e.g., "a. king").
            The question should also briefly mention the general topic of the text so that it can be understood in isolation.
            Each question should not give hints to answer the other questions.
            Include challenging questions, which require reasoning.
            Generate simple english sentences so that they can be easily converted to Sanskrit.

            Respond with JSON only, no markdown or descriptions.
            The JSON should be entirely in English.

            Note that you are able to provide more accurate english sentences because you can understand additional context from web sources.
            You are able to use the snippets extracted from the web to excel in your translation.
            Only, if you aren't able to find sources on the web that matches the text, then use your knowledge of English grammar.
            Provide the link to the source from the web that you might have used.

            Example JSON format you should absolutely follow, including the reasoning:

            {
              "quiz": {
                "topic": "a sentence explaining the topic of the text",
                "questions": [
                  {
                    "question": "text of the question",
                    "options": ["a. 1st option", "b. 2nd option", "c. 3rd option", "d. 4th option"],
                    "right_option": "c",
                    "source": "I found a source: <paste_actual_link_here> which provided the context for me to properly generate english quiz"
                  }
                ]
              }
            }

            Snippets:
            {% for doc in documents %}
            - snippet: "{{doc.content}}"
            {% endfor %}
            """
        ),
    )
    pipeline.add_component(
        "generator",
        OpenAIGenerator(
            api_key=Secret.from_env_var("GROQ_API_KEY"),
            api_base_url="https://api.groq.com/openai/v1",
            model="llama3-70b-8192",
            generation_kwargs={
                "max_tokens": 5000,
                "temperature": 0.5,
                "top_p": 1,
            },
        ),
    )
    pipeline.connect("websearch.documents", "prompt_builder.documents")
    pipeline.connect("prompt_builder", "generator")
    return pipeline

In [24]:
english__quiz_generation= (
    build_english_quiz_pipeline()
)



In [25]:
indic_topic = "The King’s Monkey Servant"

In [26]:
indic_text = """Title: The King’s Monkey Servant
Moral: A king wishing long life should never keep foolish servants.”
Story: A king had a monkey as his body-guard. He was very fond of the king, and as he was very much trusted by the king, he could go into the kings’ bed room without being stopped by anyone.
Once when the king was sleeping the monkey started breezing the king with a fan. While doing this a fly came and sat on the king’s chest. The monkey tried to ward off the fly with the fan. But the fly would come again and sit on the same place.
The monkey due to its foolish nature became angry, got a sharp sword and hit the fly to kill it. The fly flew away but, the king’s chest was divided into two, and the king died."""

In [30]:
english_quiz = english__quiz_generation.run(
    data={
        "websearch": {"query": f"""
          Find some knowledgeable web sources to understand the context of the topic of the indic story: "{indic_topic}".
        """},
        "prompt_builder": {
            "text": indic_text
        }
    }
)

In [31]:
parser = QuizParser()

In [32]:
english_quiz_text = parser.run(replies=english_quiz['generator']['replies'])
pprint(english_quiz_text)

{'quiz': {'questions': [{'options': ['a. Cook',
                                     'b. Bodyguard',
                                     'c. Advisor',
                                     'd. Messenger'],
                         'question': "What was the monkey's role in the king's "
                                     'palace?',
                         'right_option': 'b',
                         'sources': ''},
                        {'options': ['a. To wake the king up',
                                     'b. To kill a fly',
                                     'c. To test the sword',
                                     'd. To play a prank'],
                         'question': "Why did the monkey hit the king's chest "
                                     'with a sword?',
                         'right_option': 'b',
                         'sources': ''},
                        {'options': ['a. A mosquito',
                                     'b. A fly',
             