In [None]:
#!pip install pypdf
#!pip install faiss-cpu

In [59]:
import os
import getpass
import json
import PyPDF2
from dotenv import load_dotenv
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate,PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import PyPDFLoader
import streamlit as st
from utils import get_table_data
import pandas as pd

In [2]:
load_dotenv()

True

In [3]:
# This is an LLMChain to create 10-20 multiple choice questions from a given piece of text.
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [4]:
embeddings = OpenAIEmbeddings()

# RAG Module

### Loading of document

In [70]:
file_path = os.path.join("001_ed_tech_quiz", "Neural_Network_Excellent_Explanation.pdf")
loader = PyPDFLoader(file_path)
docs = loader.load_and_split()

In [14]:
loader = PyPDFLoader("Neural_Network_Excellent_Explanation.pdf")
docs = loader.load_and_split()

In [24]:
len(docs)

238

In [28]:
print(docs[234].page_content[:700])

214Is there a simple algorithm for intelligence?
behind the brain’s architecture.
In the last few paragraphs I’ve ignored the fact that 125 million bits merely quantiﬁes
the genetic difference between human and chimp brains. Not all our brain function is
due to those 125 million bits. Chimps are remarkable thinkers in their own right. Maybe
the key to intelligence lies mostly in the mental abilities (and genetic information) that
chimps and humans have in common. If this is correct, then human brains might be just a
minor upgrade to chimpanzee brains, at least in terms of the complexity of the underlying
principles. Despite the conventional human chauvinism about our unique capabilities, 


### Indexing: Split 

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

In [17]:
len(all_splits)

780

In [29]:
len(all_splits[234].page_content)

969

In [30]:
all_splits[234].metadata

{'source': 'Neural_Network_Excellent_Explanation.pdf',
 'page': 72,
 'start_index': 794}

### Indexing: Store

Now we need to index our text chunks so that we can search over them at runtime. The most common way to do this is to embed the contents of each document split and insert these embeddings into a vector database (or vector store). When we want to search over our splits, we take a text search query, embed it, and perform some sort of “similarity” search to identify the stored splits with the most similar embeddings to our query embedding. The simplest similarity measure is cosine similarity — we measure the cosine of the angle between each pair of embeddings (which are high dimensional vectors).

In [32]:
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

#### Retrieval: Retrieve


In [33]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [35]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [34]:
retrieved_docs = retriever.invoke("What is back propagation?")
print(retrieved_docs[0].page_content)

58How the backpropagation algorithm works
2


# Quiz Prompt Template

In [36]:
quiz_template = """
Context: {context}
You are an expert MCQ maker on the given {topic}. Given the above text, it is your job to\
create a quiz of {number} multiple choice questions for professionals in {difficulty} difficulty level
Make sure that questions are not repeated and check all the questions to be conforming to the text as well.
Make sure to format your response like the RESPONSE_JSON below and use it as a guide.\
Ensure to make the {number} MCQs.
### RESPONSE_JSON
{response_json}
"""

In [37]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["context", "topic", "difficulty", "number", "response_json"],
    template=quiz_template,
)

In [39]:
RESPONSE_JSON = {
    "1": {
        "no": "1",
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "no": "2",
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "no": "3",
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}


### Testing RAG Output

In [49]:
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback

In [50]:
quiz_chain = LLMChain(
    llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True
)
# This is the overall chain where we run these two chains in sequence.
generate_evaluate_chain = SequentialChain(
    chains=[quiz_chain],
    input_variables=["context", "topic", "difficulty", "number", "response_json"],
    # Here we return multiple variables
    output_variables=["quiz"],
    verbose=True,
)

In [54]:
mcq_count = 3
mcq_topic = "Back propagation"
difficulty = 'simple'

In [55]:
# count tokens and cost of api call
with get_openai_callback() as cb:
        response = generate_evaluate_chain(
        {
            "context": retriever | format_docs,
            "topic" : mcq_topic,
            "number": mcq_count,
            "difficulty": difficulty,
            "response_json": json.dumps(RESPONSE_JSON),
                    }
                )

  warn_deprecated(




[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Context: first=VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000028CFDB201D0>, search_kwargs={'k': 3}) last=RunnableLambda(format_docs)
You are an expert MCQ maker on the given Back propagation. Given the above text, it is your job tocreate a quiz of 3 multiple choice questions for professionals in simple difficulty level
Make sure that questions are not repeated and check all the questions to be conforming to the text as well.
Make sure to format your response like the RESPONSE_JSON below and use it as a guide.Ensure to make the 3 MCQs.
### RESPONSE_JSON
{"1": {"no": "1", "mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"no": "2", "mcq": "multiple choice ques

In [56]:
print(f"Total Tokens: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total Cost (USD): ${cb.total_cost}")


Total Tokens: 611
Prompt Tokens: 330
Completion Tokens: 281
Total Cost (USD): $0.0010570000000000002


In [57]:
 # Extract quiz data from the response
quiz = response.get("quiz", None)

In [58]:
quiz

'{"1": {"no": "1", "mcq": "What is Back propagation?", "options": {"a": "A machine learning algorithm used to train neural networks", "b": "A technique used to calculate the gradient of a loss function with respect to the weights of a neural network", "c": "A method for updating the weights of a neural network based on the error between the predicted and actual outputs", "d": "All of the above"}, "correct": "d"}, "2": {"no": "2", "mcq": "What is the purpose of Back propagation?", "options": {"a": "To minimize the error between the predicted and actual outputs of a neural network", "b": "To maximize the accuracy of a neural network", "c": "To calculate the gradient of a loss function with respect to the weights of a neural network", "d": "To update the weights of a neural network"}, "correct": "a"}, "3": {"no": "3", "mcq": "Which of the following is true about Back propagation?", "options": {"a": "It is only used in supervised learning", "b": "It is an unsupervised learning algorithm", 

In [60]:
if quiz is not None:
    table_data = get_table_data(quiz)
    if table_data is not None:
        df = pd.DataFrame(table_data)
        df.index = df.index + 1

# Streamlit Application

In [None]:
st.title("🦜⛓️ Quiz Generation for Educational Content")

# Create a form using st.form
with st.form("user_inputs"):
    # File upload
    #uploaded_file = st.file_uploader("Upload a pdf or text file")

    # Input fields
    mcq_count = st.number_input("No of MCQs", min_value=3, max_value=20,placeholder=3)
    topic = st.text_input("Provide a topic", max_chars=100,placeholder="backpropagation algorithm")
    difficulty = st.text_input("Provide Quiz difficulty", max_chars=100, placeholder="simple or complex")

    button = st.form_submit_button("Create quiz")


In [71]:
len(df)

3

In [None]:
def display_question(index, row):
    st.subheader(f"Q{index + 1}: {row['Questions']}")
    selected_option = st.radio("Choose your answer:", row['Choices'], index=0)
    return selected_option

def main(df):
    st.title("MCQ Quiz App")

    # Load data
    df = df.copy()

    # Display questions
    user_answers = []
    for index, row in df.iterrows():
        user_answer = display_question(index, row)
        user_answers.append(user_answer)

    # Submit button
    if st.button("Submit"):
        # Calculate score
        correct_answers = df['Correct_Answer'].tolist()
        score = sum(user_answer == correct_answer for user_answer, correct_answer in zip(user_answers, correct_answers))

        # Display score
        st.success(f"Your Score: {score}/{len(df)}")

In [None]:
# Check if the button is clicked and all fields have inputs
if button and uploaded_file is not None and topic and mcq_count and difficulty:
    with st.spinner("Loading..."):
        try:
            text = parse_file(uploaded_file)

            # count tokens and cost of api call
            with get_openai_callback() as cb:
                response = generate_evaluate_chain(
                    {
                        "text": text,
                        "number": mcq_count,
                        "grade": grade,
                        "tone": tone,
                        "response_json": json.dumps(RESPONSE_JSON),
                    }
                )
        except Exception as e:
            traceback.print_exception(type(e), e, e.__traceback__)
            st.error("Error")