In [7]:
import os
import json
import pandas as pd
import traceback

In [8]:
from langchain.chat_models import ChatOpenAI

In [9]:
from dotenv import load_dotenv

load_dotenv()

True

In [10]:
KEY = os.getenv("OPENAI_API_KEY")

In [11]:
llm = ChatOpenAI(
    openai_api_key = KEY,
    model_name = "gpt-3.5-turbo",
    temperature = 0.5,
)

In [12]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2

In [13]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

In [14]:
TEMPLATE = """
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz of {number} multiple choice questions fro {subject} students in {tone} tone.
Make sure the questions are not repeated and check all the questions to be confirming the text as well.
Make sure to format your response like RESPONSE_JSON below and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}
"""

In [15]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE
)

In [16]:
quiz_chain = LLMChain(
    llm=llm,
    prompt=quiz_generation_prompt,
    output_key="quiz",
    verbose=True,
)

In [17]:
TEMPLATE2="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [18]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=["subject, quiz"],
    template=TEMPLATE
)

In [19]:
review_chain = LLMChain(
    llm=llm,
    prompt=quiz_generation_prompt,
    output_key="review",
    verbose=True,
)

In [20]:
generate_evaluate_chain = SequentialChain(
    chains=[quiz_chain, review_chain],
    input_variables=["text", "number", "subject", "tone", "response_json"],
    output_variables=["quiz", "review"],
    verbose=True
)

In [21]:
file_path = "../data.txt"

In [22]:
with open(file_path, 'r') as f:
    TEXT = f.read()

In [23]:
# serialize python dictionary into a JSON-formatted string

json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [24]:
NUMBER = 5
SUBJECT = "RAG using Llama Index"
TONE = "simple"

In [26]:
# How to setup token usage tracking in langchain

with get_openai_callback() as cb:
    response = generate_evaluate_chain(
        {
            "text":TEXT,
            "number":NUMBER,
            "subject":SUBJECT,
            "tone":TONE,
            "response_json":json.dumps(RESPONSE_JSON),
        }
    )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:Retrieval Augmented Generation (RAG)
LLMs are trained on enormous bodies of data but they aren’t trained on your data. Retrieval-Augmented Generation (RAG) solves this problem by adding your data to the data LLMs already have access to. You will see references to RAG frequently in this documentation.

In RAG, your data is loaded and prepared for queries or “indexed”. User queries act on the index, which filters your data down to the most relevant context. This context and your query then go to the LLM along with a prompt, and the LLM provides a response.

Even if what you’re building is a chatbot or an agent, you’ll want to know RAG techniques for getting data into your application.



Stages within RAG
There are five key stages within RAG, which in turn will be a part of any larger application you build. These are:

Loading: this refers to getting yo

In [44]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:3318
Prompt Tokens:2460
Completion Tokens:858
Total Cost:0.005406


In [28]:
response

{'text': 'Retrieval Augmented Generation (RAG)\nLLMs are trained on enormous bodies of data but they aren’t trained on your data. Retrieval-Augmented Generation (RAG) solves this problem by adding your data to the data LLMs already have access to. You will see references to RAG frequently in this documentation.\n\nIn RAG, your data is loaded and prepared for queries or “indexed”. User queries act on the index, which filters your data down to the most relevant context. This context and your query then go to the LLM along with a prompt, and the LLM provides a response.\n\nEven if what you’re building is a chatbot or an agent, you’ll want to know RAG techniques for getting data into your application.\n\n\n\nStages within RAG\nThere are five key stages within RAG, which in turn will be a part of any larger application you build. These are:\n\nLoading: this refers to getting your data from where it lives – whether it’s text files, PDFs, another website, a database, or an API – into your pip

In [30]:
quiz = response.get("quiz")

In [33]:
quiz = json.loads(quiz)

In [38]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option} : {option_value}"
            for option, option_value in value["options"].items()
        ]
    )
    correct = value["correct"]
    quiz_table_data.append(

       { "MCQ":mcq,
        "Choces":options,
        "Correct": correct
}
    )

In [39]:
quiz_table_data

[{'MCQ': 'What is the purpose of Retrieval-Augmented Generation (RAG)?',
  'Choces': 'a : To train LLMs on enormous bodies of data | b : To add user data to the data LLMs already have access to | c : To create vector embeddings for indexing data | d : To store and retrieve data efficiently',
  'Correct': 'b'},
 {'MCQ': 'What is the indexing stage in RAG?',
  'Choces': 'a : Loading data from different sources | b : Creating a data structure for querying the data | c : Storing the indexed data | d : Evaluating the effectiveness of the pipeline',
  'Correct': 'b'},
 {'MCQ': 'What are retrievers used for in the querying stage of RAG?',
  'Choces': 'a : To generate vector embeddings for data | b : To determine the best retrieval strategy | c : To apply transformations to retrieved nodes | d : To generate responses from an LLM',
  'Correct': 'b'},
 {'MCQ': 'What is the purpose of a chat engine in RAG?',
  'Choces': 'a : To ask questions over the data | b : To have a conversation with the dat

In [41]:
quiz_df = pd.DataFrame(quiz_table_data)

In [42]:
quiz_df

Unnamed: 0,MCQ,Choces,Correct
0,What is the purpose of Retrieval-Augmented Gen...,a : To train LLMs on enormous bodies of data |...,b
1,What is the indexing stage in RAG?,a : Loading data from different sources | b : ...,b
2,What are retrievers used for in the querying s...,a : To generate vector embeddings for data | b...,b
3,What is the purpose of a chat engine in RAG?,a : To ask questions over the data | b : To ha...,b
4,What is the main advantage of using an agent i...,a : It can train LLMs on enormous bodies of da...,d


In [43]:
quiz_df.to_csv("RAG.csv")

In [1]:
import logging
import os
from datetime import datetime

In [2]:
f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"

'12_30_2023_17_46_50.log'