In [1]:
import langchain

# Load & process
from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import TokenTextSplitter

# Vector store & embeddings
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

# Conversations
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
)
from langchain.memory import ConversationBufferMemory
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain.llms import OpenAI

# Post-processing
import fitz

# Token counter
import tiktoken
encoder = tiktoken.encoding_for_model("text-embedding-ada-002")
from langchain.callbacks.manager import get_openai_callback

In [3]:
DELETE_THIS = "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"

In [4]:
import os
os.environ["OPENAI_API_KEY"] = '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'

In [19]:
llm=ChatOpenAI(model_name='gpt-3.5-turbo-16k',
               temperature=0)

In [5]:
# Load PDF
loader = PyPDFLoader('pdfs/2309.13963.pdf')
docs = loader.load()

In [6]:
# Split into chunks
text_splitter = TokenTextSplitter(
    chunk_size = 1000,
    chunk_overlap  = 200
)

chunks = text_splitter.split_documents(docs)

# Create embeddings from chunks
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(chunks, embeddings)

In [7]:
# LLM
chat_model = ChatOpenAI()
llm = OpenAI()

# Template prompt
template = """Based ONLY on the context below, answer the following question!
Context:
{context}

Question:
{question}

Remember to answer it ONLY from the given context!
"""

# Prompt
prompt = ChatPromptTemplate(
    messages=[
        SystemMessagePromptTemplate.from_template(
            "You are a nice chatbot having a conversation with a human."
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        HumanMessagePromptTemplate.from_template(template),
    ]
)

# Memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [8]:
def contexts_formatter(contexts):
    result = ""
    for i in range(len(contexts)):
        result += f"{i+1}. {contexts[i].page_content}\n\n\n"
    return result

In [9]:
chain = prompt | chat_model

In [84]:
# Query
question = "what is q-former"



# Retrievers

In [85]:
# Vanilla

contexts_vanilla = docsearch.similarity_search(question, k=1)
contexts_vanilla

[Document(page_content='Specifically, Q-Former, denoted as QF (Q,X), in this work consists\nof two Transformer decoder blocks [33] with the causal attention\nmasks removed. Here Qis used as the decoder inputs and Xas the\nencoder outputs in the standard Transformer.\n4. SEGMENT-LEVEL Q-FORMER\nTransformer-based speech encoders can have limitations on the in-\nput sequence duration [18]. To enable LLMs to process with longer', metadata={'doc_id': 'da9c286e-7d50-4809-91d5-0d8ed5a22bac', 'page': 1, 'source': 'pdfs/2309.13963.pdf'})]

In [92]:
# Multiquery Retriever

from langchain.retrievers.multi_query import MultiQueryRetriever

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=docsearch.as_retriever(), llm=llm
)
contexts_multiquery = retriever_from_llm.get_relevant_documents(query=question, k=1)
contexts_multiquery

[Document(page_content='6.3. Trainable queries of Q-Former\nRecapping Section 3.3, the number of trainable queries used in the\nQ-Former determines the number of its output tokens. This sec-\ntion compares Q-Formers with different numbers of queries. Table\n3 shows that increasing the number of queries to 80 can consider-\nably reduce WERs, which implies that using ∼80 tokens can retain', metadata={'doc_id': '94811155-4fb0-4ca7-9cef-52a551d1e25c', 'page': 2, 'source': 'pdfs/2309.13963.pdf'}),
 Document(page_content='Specifically, Q-Former, denoted as QF (Q,X), in this work consists\nof two Transformer decoder blocks [33] with the causal attention\nmasks removed. Here Qis used as the decoder inputs and Xas the\nencoder outputs in the standard Transformer.\n4. SEGMENT-LEVEL Q-FORMER\nTransformer-based speech encoders can have limitations on the in-\nput sequence duration [18]. To enable LLMs to process with longer', metadata={'doc_id': 'da9c286e-7d50-4809-91d5-0d8ed5a22bac', 'page': 1, '

In [94]:
# Contextual Compression

from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=docsearch.as_retriever())
contexts_compression = compression_retriever.get_relevant_documents(question, k=1)
contexts_compression



[Document(page_content='Q-Former, denoted as QF (Q,X), in this work consists\nof two Transformer decoder blocks [33] with the causal attention\nmasks removed. Here Qis used as the decoder inputs and Xas the\nencoder outputs in the standard Transformer.', metadata={'doc_id': 'da9c286e-7d50-4809-91d5-0d8ed5a22bac', 'page': 1, 'source': 'pdfs/2309.13963.pdf'}),
 Document(page_content='Q-Former, denoted as QF (Q,X), in this work consists\nof two Transformer decoder blocks [33] with the causal attention\nmasks removed.', metadata={'doc_id': 'da9c286e-7d50-4809-91d5-0d8ed5a22bac', 'page': 1, 'source': 'pdfs/2309.13963.pdf'}),
 Document(page_content='Q-Former results in the lowest WERs on both test sets by producing only 80 output tokens. Although fully connected layers with 300 output tokens (with m= 5 in Sec. 3.1) can achieve similar WERs to Q-Former, it requires much more calculations and memory usage.', metadata={'doc_id': '94811155-4fb0-4ca7-9cef-52a551d1e25c', 'page': 2, 'source': 'pdfs

In [95]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
import lark

document_content_description = "Sebuah researh paper"
metadata_field_info = [
    AttributeInfo(
        name="page",
        description="page number",
        type="integer"
    ),
    AttributeInfo(
        name="source",
        description="file path",
        type="string"
    )
]


retriever = SelfQueryRetriever.from_llm(
    llm,
    docsearch,
    document_content_description,
    metadata_field_info,
    enable_limit=True,

)

contexts_selfquery = retriever.get_relevant_documents("3 dokumen tentang Q-Former", k=1)
contexts_selfquery

[Document(page_content='Tspeech=MultiHead (H,E,E). (3)3.3. Q-Former\nQ-Former [20] is a Transformer-based module converting variable-\nlength input sequences into fixed-length output query representa-\ntions. It was initially proposed for visual-text modality alignment,\nand here is applied to audio-text alignment. In each Q-Former block,\ntrainable query embeddings Q∈Rnq×dqinteract with the input fea-', metadata={'doc_id': 'da9c286e-7d50-4809-91d5-0d8ed5a22bac', 'page': 1, 'source': 'pdfs/2309.13963.pdf'}),
 Document(page_content='and here is applied to audio-text alignment. In each Q-Former block,\ntrainable query embeddings Q∈Rnq×dqinteract with the input fea-\nturesXthrough multi-head self-attention and cross-attention layers,\nSpecifically, Q-Former, denoted as QF (Q,X), in this work consists\nof two Transformer decoder blocks [33] with the causal attention\nmasks removed. Here Qis used as the decoder inputs and Xas the', metadata={'doc_id': 'da9c286e-7d50-4809-91d5-0d8ed5a22bac',

In [96]:
retriever = docsearch.as_retriever(type="mmr")
retriever.get_relevant_documents(question, k=1)

[Document(page_content='Specifically, Q-Former, denoted as QF (Q,X), in this work consists\nof two Transformer decoder blocks [33] with the causal attention\nmasks removed. Here Qis used as the decoder inputs and Xas the\nencoder outputs in the standard Transformer.\n4. SEGMENT-LEVEL Q-FORMER\nTransformer-based speech encoders can have limitations on the in-\nput sequence duration [18]. To enable LLMs to process with longer', metadata={'doc_id': 'da9c286e-7d50-4809-91d5-0d8ed5a22bac', 'page': 1, 'source': 'pdfs/2309.13963.pdf'}),
 Document(page_content='and here is applied to audio-text alignment. In each Q-Former block,\ntrainable query embeddings Q∈Rnq×dqinteract with the input fea-\nturesXthrough multi-head self-attention and cross-attention layers,\nSpecifically, Q-Former, denoted as QF (Q,X), in this work consists\nof two Transformer decoder blocks [33] with the causal attention\nmasks removed. Here Qis used as the decoder inputs and Xas the', metadata={'doc_id': 'da9c286e-7d50-48

In [97]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain.text_splitter import RecursiveCharacterTextSplitter

child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
store = InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=docsearch,
    docstore=store,
    child_splitter=child_splitter,
)
retriever.add_documents(docs)

In [98]:
docsearch.similarity_search(question)

[Document(page_content='Specifically, Q-Former, denoted as QF (Q,X), in this work consists\nof two Transformer decoder blocks [33] with the causal attention\nmasks removed. Here Qis used as the decoder inputs and Xas the\nencoder outputs in the standard Transformer.\n4. SEGMENT-LEVEL Q-FORMER\nTransformer-based speech encoders can have limitations on the in-\nput sequence duration [18]. To enable LLMs to process with longer', metadata={'doc_id': '11526dca-9631-4c4c-98a6-42a8ecd2e2f3', 'page': 1, 'source': 'pdfs/2309.13963.pdf'}),
 Document(page_content='Specifically, Q-Former, denoted as QF (Q,X), in this work consists\nof two Transformer decoder blocks [33] with the causal attention\nmasks removed. Here Qis used as the decoder inputs and Xas the\nencoder outputs in the standard Transformer.\n4. SEGMENT-LEVEL Q-FORMER\nTransformer-based speech encoders can have limitations on the in-\nput sequence duration [18]. To enable LLMs to process with longer', metadata={'doc_id': 'da9c286e-7d50

In [99]:
retriever.get_relevant_documents(question)

[Document(page_content='3. MODULE CONNECTOR\nAs shown in Fig. 1, the proposed ASR model consists of three mod-\nules: a frozen speech encoder, a trainable module connector and\na frozen LLM. This section introduces three connectors, including\nfully connected layers, multi-head cross-attention and Q-Former.\n❄ Speech EncoderLinearMHCA\n❄ Large Language Model(a)(b)(c)Transcription\nQ-FormerQ-Former queryVicuna embeddingConv kernel\nLinear\nFig. 1 . Illustration of integrating a speech encoder and an LLM into\nan ASR system with a module connector of: (a) fully connected\nlayers, (b) multi-head cross-attention, and (c) Q-Former.\nFor clarity, some basic notations are defined as follows: X∈\nRnx×dxdenotes the speech features obtained from the speech en-\ncoder, and the module connector compresses XintoTspeech∈\nRnt×dtwhich are input to the LLM to produce ASR transcriptions.\nH∈Rnh×dhdenotes the hidden states in connectors while nandd\nare the numbers of vectors and hidden dimensions respe

In [None]:
# answer = ""
# for res in chain.stream({"context": contexts_formatter(contexts), "question": question, "chat_history": memory.buffer_as_messages}):
#     if res:
#         print(res.content, end="", flush=True)
#         answer += res.content

# # with get_openai_callback() as cb:
# #     res_invoke = chain.invoke({"context": contexts_formatter(contexts), "question": question, "chat_history": memory.buffer_as_messages})

## Experimental Eval

In [100]:
# contexts_vanilla
# contexts_multiquery
# contexts_compression
# contexts_selfquery

answer_vanilla = ""
answer_multiquery = ""
answer_compression = ""
answer_selfquery = ""

# VANILLA
for res in chain.stream({"context": contexts_formatter(contexts_vanilla), "question": question, "chat_history": memory.buffer_as_messages}):
    if res:
        print(res.content, end="", flush=True)
        answer_vanilla += res.content

# MULTIQUERY
for res in chain.stream({"context": contexts_formatter(contexts_multiquery), "question": question, "chat_history": memory.buffer_as_messages}):
    if res:
        print(res.content, end="", flush=True)
        answer_multiquery += res.content

# COMPRESSION
for res in chain.stream({"context": contexts_formatter(contexts_compression), "question": question, "chat_history": memory.buffer_as_messages}):
    if res:
        print(res.content, end="", flush=True)
        answer_compression += res.content


# SELFQUERY
for res in chain.stream({"context": contexts_formatter(contexts_selfquery), "question": question, "chat_history": memory.buffer_as_messages}):
    if res:
        print(res.content, end="", flush=True)
        answer_selfquery += res.content

Q-Former, denoted as QF (Q,X), in this context refers to a specific model consisting of two Transformer decoder blocks with the causal attention masks removed. It uses Q as the decoder inputs and X as the encoder outputs in the standard Transformer.Q-Former is a Transformer-based module that converts variable-length input sequences into fixed-length output query representations. It was initially proposed for visual-text modality alignment and is applied to audio-text alignment in this context. Q-Former consists of two Transformer decoder blocks with the causal attention masks removed. It uses trainable query embeddings Q to interact with the input features X through multi-head self-attention and cross-attention layers. The number of trainable queries used in the Q-Former determines the number of its output tokens. Increasing the number of queries to 80 can considerably reduce word error rates (WERs).Based on the given context, Q-Former refers to a model or system that consists of two T

In [101]:
print("--Vanilla--\n",answer_vanilla)
print("\n--Multiquery--\n",answer_multiquery)
print("\n--Compression--\n",answer_compression)
print("\n--SelfQuery--\n",answer_selfquery)

--Vanilla--
 Q-Former, denoted as QF (Q,X), in this context refers to a specific model consisting of two Transformer decoder blocks with the causal attention masks removed. It uses Q as the decoder inputs and X as the encoder outputs in the standard Transformer.

--Multiquery--
 Q-Former is a Transformer-based module that converts variable-length input sequences into fixed-length output query representations. It was initially proposed for visual-text modality alignment and is applied to audio-text alignment in this context. Q-Former consists of two Transformer decoder blocks with the causal attention masks removed. It uses trainable query embeddings Q to interact with the input features X through multi-head self-attention and cross-attention layers. The number of trainable queries used in the Q-Former determines the number of its output tokens. Increasing the number of queries to 80 can considerably reduce word error rates (WERs).

--Compression--
 Based on the given context, Q-Former 

In [107]:
from ragas.metrics import faithfulness, answer_relevancy, context_relevancy, context_recall
from ragas.langchain import RagasEvaluatorChain


# make eval chains
eval_retrieve = {
    m.name: RagasEvaluatorChain(metric=m) 
    for m in [context_relevancy]
}

eval_generate = {
    n.name: RagasEvaluatorChain(metric=n) 
    for n in [faithfulness, answer_relevancy]

}

### Retrieval Eval

In [108]:
# Creating the dictionary
vanilla = {'source_documents': contexts_vanilla, 'query': question, 'result': answer_vanilla}
multiquery = {'source_documents': contexts_multiquery, 'query': question, 'result': answer_multiquery}
compression = {'source_documents': contexts_compression, 'query': question, 'result': answer_compression}
selfquery = {'source_documents': contexts_selfquery, 'query': question, 'result': answer_selfquery}

In [110]:
eval_retrieve = {
    m.name: RagasEvaluatorChain(metric=m) 
    for m in [context_relevancy]
}

for name, eval_retrieve in eval_retrieve.items():   
    score_name = f"{name}_score"
    print(f"VANILLA {score_name}: {eval_retrieve(vanilla)[score_name]}")

eval_retrieve = {
    m.name: RagasEvaluatorChain(metric=m) 
    for m in [context_relevancy]
}
    
for name, eval_retrieve in eval_retrieve.items():   
    score_name = f"{name}_score"
    print(f"MULTIQUERY {score_name}: {eval_retrieve(multiquery)[score_name]}") 

eval_retrieve = {
    m.name: RagasEvaluatorChain(metric=m) 
    for m in [context_relevancy]
}


for name, eval_retrieve in eval_retrieve.items():   
    score_name = f"{name}_score"
    print(f"COMPRESSION{score_name}: {eval_retrieve(compression)[score_name]}")


eval_retrieve = {
    m.name: RagasEvaluatorChain(metric=m) 
    for m in [context_relevancy]
}

for name, eval_retrieve in eval_retrieve.items():   
    score_name = f"{name}_score"
    print(f"SELFQUERY{score_name}: {eval_retrieve(selfquery)[score_name]}")   



VANILLA context_relevancy_score: 0.0
MULTIQUERY context_relevancy_score: 0.023255813953488372
COMPRESSIONcontext_relevancy_score: 0.4166666666666667
SELFQUERYcontext_relevancy_score: 0.11538461538461539


### Generation Eval

In [112]:
eval_generate = {
    n.name: RagasEvaluatorChain(metric=n) 
    for n in [faithfulness, answer_relevancy]

}
for name, eval_generate in eval_generate.items():   
    score_name = f"{name}_score"
    print(f"VANILLA {score_name}: {eval_generate(vanilla)[score_name]}")


eval_generate = {
    n.name: RagasEvaluatorChain(metric=n) 
    for n in [faithfulness, answer_relevancy]

}
    
for name, eval_generate in eval_generate.items():   
    score_name = f"{name}_score"
    print(f"MULTIQUERY {score_name}: {eval_generate(multiquery)[score_name]}") 

eval_generate = {
    n.name: RagasEvaluatorChain(metric=n) 
    for n in [faithfulness, answer_relevancy]

}

for name, eval_generate in eval_generate.items():   
    score_name = f"{name}_score"
    print(f"COMPRESSION {score_name}: {eval_generate(compression)[score_name]}")


eval_generate = {
    n.name: RagasEvaluatorChain(metric=n) 
    for n in [faithfulness, answer_relevancy]

}

for name, eval_generate in eval_generate.items():   
    score_name = f"{name}_score"
    print(f"SELFQUERY {score_name}: {eval_generate(selfquery)[score_name]}")   



VANILLA faithfulness_score: 1.0
VANILLA answer_relevancy_score: 0.8682710544670492
MULTIQUERY faithfulness_score: 1.0
MULTIQUERY answer_relevancy_score: 0.8624036284465717
COMPRESSIONfaithfulness_score: 1.0
COMPRESSIONanswer_relevancy_score: 0.8320173329152141
SELFQUERYfaithfulness_score: 1.0
SELFQUERYanswer_relevancy_score: 0.8169073597617537


---

# Citation Finder

In [9]:
# Template prompt
template_reference = """Context:
{context}

Question:
{question}

Answer:
{answer}

Based on the question and answer pair above, find where the answer originates from within the given context.

Return a JSON object with the following keys:
    `page`: (the value is in the form of a list of page numbers, can be more than one page)
    `source`: (the value is in the form of a list of sentences used as a reference for the answer, write exactly as it appears in the context including the in-text citation, using the language used in the context)
    `in-text citation` : (list of in-text citation appears in contexts used as reference for the answer)
"""

In [10]:
# Prompt
prompt_reference = PromptTemplate.from_template(template_reference)

# Output parser
json_parser = SimpleJsonOutputParser()

# LCEL
reference_chain = prompt_reference | llm | json_parser

In [11]:
reference = reference_chain.invoke({
    "context": contexts_formatter(contexts), 
    "question": question, 
    "answer": answer
})

In [32]:
from difflib import SequenceMatcher
import pandas as pd

matches = list()
pages = list()
for i in range(3):
    m = [m for m in SequenceMatcher(None, contexts[i].page_content, reference["source"][0]).get_matching_blocks() if m.size > 15]
    if m:
        # m = contexts[i].page_content[m[0].a : m[-1].a + m[-1].size]
        # matches.append(m)
        for j in range(len(m)):
            matches.append(contexts[i].page_content[m[j].a : m[j].a + m[j].size])
            pages.append(contexts[i].metadata["page"])

df_sources = pd.DataFrame({"match": matches, "page": pages})

# Post Pro

In [13]:
import json

def dict_to_string(input_dict):
    json_string = json.dumps(input_dict, indent=2)  # indent for pretty formatting (optional)
    return json_string

In [None]:
from forex_python.converter import CurrencyRates

def get_cost(
        model="gpt-3.5-turbo", 
        prompt_formatted=prompt.format(context=contexts_formatter(contexts), question=question, chat_history=memory.buffer_as_messages),
        output_from_llm=dict_to_string(res)
):
    curr = CurrencyRates()
    encoder = tiktoken.encoding_for_model(model)
    input_tokens_used = len(encoder.encode(prompt_formatted)) + 7 # Jaga-jaga
    output_tokens_used = len(encoder.encode(output_from_llm))
    total_token = input_tokens_used + output_tokens_used

    input_price = round((0.0015/1000) * input_tokens_used, 8)
    output_price = round((0.002/1000) * output_tokens_used, 8)
    total_price_usd = round(input_price + output_price, 8)
    total_price_idr = curr.convert('USD', 'IDR', total_price_usd)


    return f"""Tokens Used: {total_token}
        Prompt Tokens: {input_tokens_used}
        Completion Tokens: {output_tokens_used}
    Total Cost (USD): ${total_price_usd}
    Total Cost (IDR): Rp{total_price_idr}
    """

In [15]:
# Highlighting Sources
def get_matches(source, contexts=contexts, k=3):
    for i in range(k):
        idx_awal = contexts[i].page_content.find(source)
        if idx_awal != -1:
            idx_akhir = contexts[i].page_content[idx_awal:].find(".")
            idx_akhir += idx_awal
        
            match_ = contexts[i].page_content[idx_awal:idx_akhir]
            if len(match_) > 10:
                page_num = contexts[i].metadata["page"]
                return contexts[i].page_content[idx_awal:idx_akhir], page_num
    else:
        return None
    
def highlight_pdf(path, match_, page_num, output_path):
    pdf = fitz.open(path)
    page = pdf[page_num]

    matches = page.search_for(match_.replace("-\n", ""))

    for m in matches:
        page.add_highlight_annot(m)

    pdf.save(output_path)
    pdf.close()

def highlight_pdf_v2(path, df_sources, output_path):
    for i in range(len(df_sources)):
        if i == 0:
            pdf = fitz.open(path)
            page = pdf[df_sources["page"].loc[i]]

            matches = page.search_for(df_sources["match"].loc[i].replace("-\n", ""))

            for m in matches:
                page.add_highlight_annot(m)

        else:
            pdf = fitz.open(path)
            page = pdf[df_sources["page"].loc[i]]

            matches = page.search_for(df_sources["match"].loc[i].replace("-\n", ""))

            for m in matches:
                page.add_highlight_annot(m)

In [16]:
# Get references
def get_reference(docs, in_text_citation):
    model = OpenAI()

    get_citation_template = """From the reference list below, rewrite the specified references!
    References:
    {references}

    Please rewrite the references related to the following numbers:
    {in_text_citation}
    """

    GET_CITATION_PROMPT = PromptTemplate.from_template(get_citation_template)

    get_reference_chain = chain = GET_CITATION_PROMPT | model

    for page_number in range(len(docs)):
        page = docs[page_number]
        text = page.page_content

        if "References" in text or "REFERENCES" in text:
            references_text = text.split("References")[1] if "References" in text else text.split("REFERENCES")[1]

    result = get_reference_chain.invoke({"references": references_text, "in_text_citation":in_text_citation})
    return result

In [None]:
if res["source"]:
    match_, page_num = get_matches(res["source"][:15], contexts)

    if match_:
        highlight_pdf("pdfs/2309.13963.pdf", match_, page_num, "pdfs/highlighted/high_pdf.pdf")

In [24]:
if reference["in-text citation"]:
    ref_result = get_reference(docs, reference["in-text citation"]).strip()
print(ref_result)

[20] J. Li, D. Li, S. Savarese, and S. Hoi, “BLIP-2: Bootstrapping Language-Image Pre-Training with Frozen Image Encoders and Large Language Models,” in Proc. ICML , Vienna, 2023.
