In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from llama_index.llms.openai import OpenAI
from llama_index.core.query_engine import CitationQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
    PromptTemplate,
)

from llama_index.core import Settings
import torch

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import AutoTokenizer




In [2]:
tokenizer = AutoTokenizer.from_pretrained(
"meta-llama/Meta-Llama-3-8B-Instruct"
)

stopping_ids = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
stopping_ids

[128009, 128009]

In [4]:
llm = HuggingFaceLLM(
    model_name="meta-llama/Meta-Llama-3-8B-Instruct",
    tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
    context_window=4096,
    max_new_tokens=512,
    model_kwargs={'trust_remote_code':True},
    generate_kwargs={"do_sample": False},
    device_map="auto",
    stopping_ids=stopping_ids,
)

embed_model= HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
Settings.embed_model = embed_model
Settings.llm = None

LLM is explicitly disabled. Using MockLLM.


In [6]:
from llama_index.core import ServiceContext, VectorStoreIndex

documents = SimpleDirectoryReader("./book/").load_data()

In [7]:
len(documents)

533

In [7]:
#create senetence window node parser with default settings
from llama_index.core.node_parser import SentenceWindowNodeParser,SimpleNodeParser
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

# Create sentence window node parser with default settings
sentence_node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text"
)

# Parse documents into nodes
sentence_nodes = sentence_node_parser.get_nodes_from_documents(documents)
sentence_index = VectorStoreIndex(sentence_nodes)

In [8]:
query_engine = CitationQueryEngine.from_args(
    sentence_index,
    citation_chunk_size=512,
    similarity_top_k=2,
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
    llm=llm,
)

In [9]:
response = query_engine.query("What is taming intuitive predictions ?")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [10]:
print(response.response.split("\n")[0])

 Taming intuitive predictions requires a significant effort of self-monitoring and self-control [2]. It involves accepting the overall forecast of economists and not making unwarranted causal inferences [1]. It also involves recognizing the limitations of intuitive predictions and the importance of discipline in constraining our beliefs by the logic of probability [2]. 


In [14]:
response.source_nodes[0].metadata.keys()

dict_keys(['window', 'original_text', 'page_label', 'file_name', 'file_path', 'file_type', 'file_size', 'creation_date', 'last_modified_date'])

In [22]:
metakeys = ['page_label', 'file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date']
meta_text = ""
for key in metakeys:
    meta_text+=f"{key}: {response.source_nodes[0].metadata[key]}\n"

In [23]:
print(meta_text)

page_label: 180
file_name: Daniel Kahneman-Thinking, Fast and Slow  .pdf
file_type: application/pdf
file_size: 3675247
creation_date: 2024-07-22
last_modified_date: 2024-07-22



In [None]:
What is taming intuitive predictions ?

In [26]:
import gradio as gr
import re

# Global variable to store citation_text temporarily
temporary_citation_text = ""

# Function to handle query and return response and citations
def search_query(query):
    global temporary_citation_text
    
    response = query_engine.query(query)
    response_text = response.response.split("\n")[0]

    pattern = r'\[(\d+)\]'
    # Find all matches
    matches = re.findall(pattern, response_text)

    citation_idx = set()
    for match in matches:
        citation_idx.add(int(match)-1)
        
    citation_idx = list(citation_idx)
    print(citation_idx)
    
    citations = []
    metakeys = ['page_label', 'file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date']
    for cit in citation_idx:
        meta_text = ""
        for key in metakeys:
            meta_text+=f"{key}: {response.source_nodes[cit].metadata[key]}\n"
        citations.append(response.source_nodes[cit].get_text()+"\n"+meta_text)
        
    citation_text = "\n===========================================\n".join([f"{citation}" for i, citation in enumerate(citations)])
    
    # Store the citation text temporarily
    temporary_citation_text = citation_text
    
    # Return response text and make the button interactive
    return response_text, gr.update(interactive=True)

# Function to display citations when the button is clicked
def show_citations():
    global temporary_citation_text
    return temporary_citation_text

# Gradio Interface
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            query_input = gr.Textbox(label="Query")
            response_text = gr.Textbox(label="Response", interactive=False)
        with gr.Column():
            citation_button = gr.Button("Show Citations", interactive=False)
            citation_text = gr.Textbox(label="Citations", interactive=False)
    
    # Set the query input to trigger the search_query function
    query_input.submit(search_query, inputs=query_input, outputs=[response_text, citation_button])
    
    # Set the button to show the citations when clicked
    citation_button.click(show_citations, outputs=citation_text)

# Launch the interface
demo.launch(server_name="192.168.0.44", server_port=8895)

Running on local URL:  http://192.168.0.44:8895

To create a public link, set `share=True` in `launch()`.




Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[0, 1]


In [None]:
What is taming intuitive predictions ?