# **MCQ Creator App**
**Quiz Creator App**

In [None]:
!pip install unstructured
!pip install tiktoken
!pip install pinecone-client
!pip install pypdf
!pip install OpenAI
!pip install langchain
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentencepiece (from sentence-transformers)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: sentence-transformers
  Building wheel for sentence-transformers (setup.py) ... [?25l[?25hdone
  Created wheel for sentence-transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125923 sha256=cdca5a4f0c2104881e00dcb1f0ff565a30804d3884563c8ac2465f65aacab89a
  Stored in directory: /root/.cache/pip/wheels/62/f2/10/1e606fd5f02395388f74e7462910fe851042f97238cbbd902f
Successfully built sentence-tr

In [None]:
import openai
import pinecone
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

In [None]:
import os
os.environ["OPENAI_API_KEY"] = 'YOUR_API_KEY'
os.environ["HUGGINGFACEHUB_API_TOKEN"] = 'YOUR_API_KEY'

# **Load Documents**
Load PDF files available with pypdf

In [None]:
def load_docs(directory):
  loader = PyPDFDirectoryLoader(directory)
  documents = loader.load()
  return documents

In [None]:
directory ='/content/'
documents = load_docs(directory)
len(documents)

34

# **Transformer Documents**
Split document into smaller chunks

In [None]:
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs

In [None]:
docs = split_docs(documents)
print(len(docs))

104


# **Generate Text Embeddings**


In [None]:
#embeddings = OpenAIEmbeddings(model_name='ada')

In [None]:
embeddings = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')

In [None]:
query_result = embeddings.embed_query('Tanzania')
len(query_result)

384

# **Vector Store - PINECONE**

Vector store, search database

In [None]:
pinecone.init(
    api_key='YOUR_API_KEY',
    environment='gcp-starter'
)

index_name='mcqcreator'

index = Pinecone.from_documents(docs, embeddings, index_name=index_name)

## **Retreive Answers**


In [None]:
#fetch all the top relevant documents from our vector store

def get_similar_docs(query, k=2):
  similar_docs= index.similarity_search(query, k)
  return similar_docs

# **Question Answer Chain**

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub

In [None]:
llm=HuggingFaceHub(repo_id='bigscience/bloom', model_kwargs={"temperature":1e-10})
llm



HuggingFaceHub(client=InferenceAPI(api_url='https://api-inference.huggingface.co/pipeline/text-generation/bigscience/bloom', task='text-generation', options={'wait_for_model': True, 'use_gpu': False}), repo_id='bigscience/bloom', model_kwargs={'temperature': 1e-10})

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
# get answers to the questions that we raise
def get_answer(query):
  relevant_docs = get_similar_docs(query)
  print(relevant_docs)
  response = chain.run(input_documents=relevant_docs, question=query)
  return response


In [None]:
# ask a query
our_query = "When is Kenya's rainy season"
answer = get_answer(our_query)
print(answer)

[Document(page_content="is February and March, leading into the season of the long rains, and the coldest is in July, until mid-August.[116]Climate change is posing an increasing threat to global socio-[117]economic development and environmental sustainability. Developing countries with low adaptive capacity and high vulnerability to the phenomenon are disproportionately affected. Climate change in Kenya is increasingly impacting the lives of Kenya's citizens and the environment.[117] Climate Change has led to more frequent extreme weather events like droughts which last longer than usual, irregular and unpredictable rainfall, flooding and increasing temperatures.", metadata={'page': 11.0, 'source': '/content/sample.pdf'}), Document(page_content='killed and another 600,000 internally displaced, making it the worst post-election violence in Kenya. To stop the death and displacement of people, Kibaki and Odinga agreed to work together, with the latter taking the position of a prime minis

# **Structure The Output**


In [None]:
import re
import json

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

In [None]:
response_schemas = [
    ResponseSchema(name='question', description='Question generated from provided input text data'),
   ResponseSchema(name='choices', description='Available options for a multiple choice question in a dictionary format'),
    ResponseSchema(name='answer', description='Correct answer for the asked question.')
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
output_parser

StructuredOutputParser(response_schemas=[ResponseSchema(name='question', description='Question generated from provided input text data', type='string'), ResponseSchema(name='choices', description='Available options for a multiple choice question in a dictionary format', type='string'), ResponseSchema(name='answer', description='Correct answer for the asked question.', type='string')])

In [None]:
# Fetch the instructions the langchain creates to fetch the response in the desired format
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"question": string  // Question generated from provided input text data
	"choices": string  // Available options for a multiple choice question in a dictionary format
	"answer": string  // Correct answer for the asked question.
}
```


In [None]:
chat_model = ChatOpenAI()

In [None]:
chat_model

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7a8e7ffbb3d0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7a8e7fdda8f0>, openai_api_key='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', openai_proxy='')

In [None]:
prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template("""
          When a text input is given by the user, please generate multiple choice questions from it along with the correct answer.
          \n{format_instructions}\n{user_prompt}""")
    ],
    input_variable=["user_prompt"],
    partial_variables={"format_instructions": format_instructions}
)

In [None]:
final_query = prompt.format_prompt(user_prompt = answer)
print(final_query)

messages=[HumanMessage(content=' \n          When a text input is given by the user, please generate multiple choice questions from it along with the correct answer.\n          \nThe output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"question": string  // Question generated from provided input text data\n\t"choices": string  // Available options for a multiple choice question in a dictionary format\n\t"answer": string  // Correct answer for the asked question.\n}\n```\n\nThe rainy season in Kenya is from March to May and from October to December.')]


In [None]:
final_query.to_messages()

[HumanMessage(content=' \n          When a text input is given by the user, please generate multiple choice questions from it along with the correct answer.\n          \nThe output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"question": string  // Question generated from provided input text data\n\t"choices": string  // Available options for a multiple choice question in a dictionary format\n\t"answer": string  // Correct answer for the asked question.\n}\n```\n\nThe rainy season in Kenya is from March to May and from October to December.')]

In [None]:
final_query_output = chat_model(final_query.to_messages())
print(final_query_output.content)

```json
{
	"question": "When is the rainy season in Kenya?",
	"choices": {
		"a": "January to February",
		"b": "March to May",
		"c": "June to September",
		"d": "October to December"
	},
	"answer": "b"
}
```


In [None]:
#Let's extract JSON data from markdown text
markdown_text = final_query_output.content
json_string = re.search(r'\{(?:[^{}]*\{[^{}]*\}[^{}]*)*\}', markdown_text, re.DOTALL).group(0)
print(json_string)

{
	"question": "When is the rainy season in Kenya?",
	"choices": {
		"a": "January to February",
		"b": "March to May",
		"c": "June to September",
		"d": "October to December"
	},
	"answer": "b"
}
