In [None]:
# General stuff

In [1]:
from parser import parser
from llama_index.core import Document,Settings
import nest_asyncio
from markdown_parser import markdown_parser
from vector_storing import vector_storing
from vector_load import vector_load
from llama_index.llms.openai import OpenAI
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.tools import QueryEngineTool
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
    LLMSingleSelector, 
    LLMMultiSelector
)
from llama_index.core.node_parser import CodeSplitter
from llama_index.core import SimpleDirectoryReader
from llama_index.core.tools import QueryEngineTool


nest_asyncio.apply()
Settings.chunk_size = 1024

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# This function will get executed when upload button i clicked

parser("./2305.05176.pdf")

Started parsing the file under job_id ac46afc0-f485-4f39-88dc-ebca40edecd6


In [2]:
# Extracting the title of the research paper

current_pfile = './parsed_pdf.md'

with open(current_pfile, 'r', encoding="UTF-8") as f:
    parsed_text = f.read()

title = parsed_text[parsed_text.find(' '):parsed_text.find('\n')]

title = title.strip()

In [3]:
# Creating llama index document with paper name as metadata

parsed_doc = Document(text=parsed_text,
                      metadata = {"paper_name": title, "file_name": title})

parsed_doc.metadata # prints metadata of the document

{'paper_name': 'FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance',
 'file_name': 'FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance'}

# Top K Selector

In [5]:
# Generating Index nodes and Text nodes by using  llama index MarkdownElementNodeParser 

base_nodes, objects = markdown_parser([parsed_doc])

13it [00:00, ?it/s]
100%|██████████| 13/13 [00:08<00:00,  1.51it/s]


In [6]:
nodes = base_nodes + objects

In [7]:
# Storing nodes in Chroma DB

vector_index = vector_storing("./embeddings/top_k","text-embedding-3-small","vector_store",nodes,"VectorStoreIndex")

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x20f57532a10>

In [8]:
# Loading the stored nodes

vector_index = vector_load("./embeddings/top_k","text-embedding-3-small","vector_store","VectorStoreIndex")

In [9]:
vector_query_engine = vector_index.as_query_engine(similarity_top_k=15)

In [10]:
query = "Can you explain what table 1 is describing in the paper named Frugal GPT"

response_1 = vector_query_engine.query(query)
print(str(response_1))

Table 1 in the paper named FrugalGPT describes a comparison of different language models such as GPT-C, GPT-J, ChatGPT, and GPT-4 based on certain parameters. The table includes values for Approximation, GPT-J, ChatGPT, and GPT-4 across different columns, providing a comparison of these models in the context of the discussed parameters.


# Summariser

In [4]:
# Generating the summary documents list which stores each section of the research paper as a sepearte llama index document

summary_documents = []

for para in parsed_doc.text.split('##'):
    section = para[:para.find('\n')].strip()
    if section.lower() != 'references':
        print(section)
        doc = Document(text=para, metadata={"file_name": title, "paper_name": title, "section": section})
        summary_documents.append(doc)


FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance
# Introduction
Query
Query
Budget
Figure 1: Our vision for reducing LLM cost while improving accuracy.
Related Works
LLM APIs do not release their models.
Scope and Problem Statement
How to Use LLMs Affordably and Accurately
Strategy 1: Prompt adaptation
Strategy 2: LLM approximation
Strategy 3: LLM cascade

# Discussions, Limitations and Future Prospects


In [5]:
# Metadata that is storing by each llama index document inside summary_documents

summary_documents[1].metadata

{'file_name': 'FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance',
 'paper_name': 'FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance',
 'section': 'FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance'}

In [6]:
# Generating nodes from summary_documents

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(summary_documents)

In [7]:
vector_storing("./embeddings/summaries","text-embedding-3-small","summary",nodes,"SummaryIndex")

<llama_index.core.indices.list.base.SummaryIndex at 0x18e0575a260>

In [8]:
summary_index = vector_load("./embeddings/summaries","text-embedding-3-small","summary","SummaryIndex")

In [11]:
# Query Data from the persisted index
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize", use_async=True
)
response = summary_query_engine.query("Can you summarise the paper that is taking about Frugal GPT")
str(response)

In [7]:
# query_engine.retrieve("can you summarise this")

# Key word search indexer

In [None]:
vector_storing("./embeddings/key_words","text-embedding-3-small","key_words_store",nodes,"SimpleKeywordTableIndex")

In [None]:
key_words_index = vector_load("./embeddings/key_words","text-embedding-3-small","key_words_store","SimpleKeywordTableIndex")

In [None]:
key_words_query_engine = key_words_index.as_query_engine(similarity_top_k=15)

In [None]:
response = key_words_query_engine.query("can you compare Frugal GPT with cohere")

str(response)

# Query Fusion

In [60]:
query_fusion_retriever = QueryFusionRetriever(
    [vector_index.as_retriever()],
    similarity_top_k=5,
    num_queries=3,  # set this to 1 to disable query generation
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
    # query_gen_prompt="...",  # we could override the query generation prompt here
)

In [62]:
nodes_with_scores = query_fusion_retriever.retrieve("what is frugal GPT and it's score on different datasets compared to ChatGPT 3 and 4")

Generated queries:
1. Comparison of frugal GPT and ChatGPT 3/4 performance on various datasets
2. Frugal GPT vs ChatGPT 3/4: accuracy and efficiency analysis on different datasets


In [63]:

query_fusion_query_engine = RetrieverQueryEngine.from_args(query_fusion_retriever)

In [66]:
response = query_fusion_query_engine.query(
    "what is frugal GPT and it's score on different datasets compared to ChatGPT 3 and 4 name all the datasets that are used for comparison"
)

Generated queries:
1. Comparison of frugal GPT and ChatGPT 3/4 scores on various datasets
2. Datasets used to compare frugal GPT and ChatGPT 3/4 performance


In [67]:
str(response)

'FrugalGPT is a model that aims to reduce costs while maintaining accuracy by identifying queries that can be accurately answered by smaller language models. In comparison to ChatGPT, GPT-3 J1, and GPT-4, the datasets used for comparison include zero-shot, few-shot learning, and other scenarios like CoT.'

# Pipeline for code

In [None]:
splitter = CodeSplitter('python')

documents = SimpleDirectoryReader("./code").load_data()
nodes = splitter.get_nodes_from_documents(documents)

In [None]:
# Storing nodes in Chroma DB

code_index = vector_storing("./embeddings/code","text-embedding-3-small","vector_store",nodes,"VectorStoreIndex")

In [None]:
code_index = vector_load("./embeddings/code","text-embedding-3-small","vector_store","VectorStoreIndex")

In [None]:
code_query_engine = code_index.as_query_engine(similarity_top_k=15)

In [None]:
query = "Can you explain what table 1 is describing in the paper named Frugal GPT"

response_1 = code_query_engine.query(query)
print(str(response_1))

# Router

In [None]:
vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context"
    ),
)


summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization"
    ),
)

key_words_tool = QueryEngineTool.from_defaults(
    query_engine=key_words_query_engine,
    description=(
        "Useful for searching based on key words"
    ),
)


code_tool = QueryEngineTool.from_defaults(
    query_engine=code_query_engine,
    description=(
        "Useful for extacting code"
    ),
)

query_fusion_tool = QueryEngineTool.from_defaults(
    query_engine=query_fusion_query_engine,
    description=(
        "Split complex queries into sub queries"
    ),
)


In [None]:



query_engine = RouterQueryEngine(
    selector=PydanticMultiSelector.from_defaults(),
    query_engine_tools=[
        vector_tool,
        summary_tool,
        key_words_tool,
        code_tool,
        query_fusion_tool
    ],
)

In [None]:
response = query_engine.query("What is the summary of the document?")
print(str(response))