In [1]:
import os
from dotenv import load_dotenv
load_dotenv(dotenv_path=".env")
from llama_index.llms.openai import OpenAI

openai_api_key = os.getenv("OPENAI_API_KEY")
llm = OpenAI(model="gpt-3.5-turbo", openai_api_key=openai_api_key)


In [2]:
input_dir_path = "/Users/ramin/Documents/RAG/data/"

In [4]:
from llama_index.core import SimpleDirectoryReader
# load data
loader = SimpleDirectoryReader (
    input_dir = input_dir_path,
    required_exts=[".pdf"],
    recursive=True
)

docs = loader.load_data()


In [4]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# embed_model = HuggingFaceEmbedding( model_name="Snowflake/snowflake-arctic-embed-m", trust_remote_code=True)

You try to use a model that was created with version 2.7.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





In [5]:
from llama_index.core import Settings, VectorStoreIndex
# Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(docs)

In [52]:
index.storage_context.persist(persist_dir='index_storage')

In [53]:
from llama_index.core import StorageContext
from llama_index.core import load_index_from_storage

storage_context = StorageContext.from_defaults(persist_dir="index_storage")
loaded_index = load_index_from_storage(storage_context)

In [35]:
from langchain.prompts import PromptTemplate
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.llms.openai import OpenAI

query_str = "list highlights of this reservation"

retriever = index.as_retriever()
query_engine = RetrieverQueryEngine.from_args(retriever=retriever)

qa_prompt_tmpl_str = (
    "Context information is below. \n"
    "----\n"
    "{context_str}\n"
    "----\n"
    "Given the context information above I want you In"
    "to think step by step to answer the query in a crisp In"
    " manner, incase case you don't know the answer say \n"
    "'I don't know!'. \n"
    "Query: {query_str}\n"
    "Answer: "
)

# qa_prompt_tmpl = PromptTemplate.from_template(qa_prompt_tmpl_str)
# query_engine.update_prompts ({"response_synthesizer: text_qa_template": qa_prompt_tmpl})
# query_engine.llm = OpenAI(model="gpt-3.5-turbo-instruct")
# response = query_engine.query ( query_str)

# retrieved_nodes = retriever.retrieve(query_str)
# context_str = "\n\n".join([r.get_content() for r in retrieved_nodes])

# qa_prompt = PromptTemplate.from_template(
#     """\
# Context information is below.
# ---------------------
# {context_str}
# ---------------------
# Given the context information and not prior knowledge, answer the query.
# Query: {query_str}
# Answer: \
# """
# )

# fmt_qa_prompt = qa_prompt.format(
#     context_str=context_str, query_str=query_str
# )

# llm = OpenAI(model="gpt-3.5-turbo")
# response = llm.complete(fmt_qa_prompt)


# print (response)

# Settings.llm = llm
# query_engine = index.as_query_engine(similarity_top_k=5)
# response_2 = query_engine.query(query_str)
# print(response_2)

chat_engine = index.as_chat_engine(similarity_top_k=20, chunk_size=100)
response_3 = chat_engine.query(query_str)
print(response_3)

# print(f'chat resp = {resp1}')
# print('-------Context----------')
# print(context_str)


Here are some highlights of the reservation:
1. Vehicles with seating capacity for 10 people or more will have a range of $7.99 to $15.67 per day.
2. Roadside Assistance Protection (RAP) can be purchased for an additional fee, providing 24/7 roadside assistance including key replacement, tire service, lockout service, jumpstarts, and fuel delivery.
3. RAP services are available in the United States and Canada, but not in Mexico. Standard charges apply for roadside assistance if RAP is not purchased or invalidated. Keys are not covered by RAP in CA, KS, MO, NV, and NY.


In [35]:
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama

# llm = Ollama(model="llama3", request_timeout=120)
llm = OpenAI(model="gpt-3.5-turbo-instruct")

Settings.llm = llm
query_engine = index.as_query_engine(streaming=False, similarity_top_k=4)
response = query_engine.query ( query_str)

print (response)



If you want to keep the car for 2 more days, you will need to make a new reservation with the updated information and then cancel your original reservation. Payment information is not required to confirm this reservation, and the reservation can be cancelled at any time without penalty before pickup of the vehicle. However, please note that the original rental rate is subject to change for any changes made to the reservation. Additionally, early or late pickups, drop-offs, and returns to alternate locations may result in additional charges or penalties paid by the renter directly through the rental car company. It is recommended to check your insurance policy and/or credit card agreement for rental vehicle coverage.


In [66]:
import streamlit as st
from PyPDF2 import PdfReader

st.title('Question Answering App')

# File upload
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

if uploaded_file is not None:
    pdf_reader = PdfReader(uploaded_file)
    
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()

    # Create an index from the PDF text
    index = GPTSimpleVectorIndex(text)

    # Get query from user
    query_str = st.text_input('Enter your question')

    if query_str:
        retrieved_nodes = retriever.retrieve(query_str)
        context_str = "\n\n".join([r.get_content() for r in retrieved_nodes])

        fmt_qa_prompt = qa_prompt.format(
            context_str=context_str, query_str=query_str
        )

        response = llm.complete(fmt_qa_prompt)

        st.write('Response:')
        st.write(response)

        # Also query using the query engine
        response2 = query_engine.query(query_str)
        
        st.write('Query Engine Response:') 
        st.write(response2)


2024-04-21 15:11:23.108 
  command:

    streamlit run /Users/ramin/opt/anaconda3/envs/RAG/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]


In [38]:
from PyPDF2 import PdfReader
pdf_reader = PdfReader("./data/car.pdf")

docs = ""
for page in pdf_reader.pages:
    docs += page.extract_text()

In [40]:
from llama_index.core.schema import Document
doc_obj = Document(docs)


TypeError: BaseModel.__init__() takes 1 positional argument but 2 were given

In [17]:
def get_doc(fname):
    import PyPDF2

    # Open the PDF file in read-binary mode
    with open(fname, 'rb') as file:
        # Create a PDF reader object
        reader = PyPDF2.PdfReader(file)

        # Iterate through the pages of the PDF
        for page in reader.pages:
            # Extract the text from the page
            text = page.extract_text()


In [18]:
text = get_doc('data/car.pdf')


9/14/23, 1 1:26 AM Booking Conﬁrmation | Costco Travel
https://www .costcotravel.com/itineraryActivity/C4281 19660?printItinerary=true&_uid=1694715956065_561.8593210687219 1/11
Booking Date:  September 14, 2023
Costco Membership #:  111831007899
Member Name:   POP AK ROSH AN
Pick-up Date:  September 16, 2023
Drop-off Date:  September 21, 2023
 
Costco Travel Con rmation Number: C428119660
 
Enterprise Con rmation Number: 2063152596
Need help?
Befor e your trip: 1-866-328-1385 M-F 7:00am - 7:00pm PT  | Sat & Sun 8:00am - 5:00pm PT
During and after y our trip: 1-866-317-4711 M-F 7:00am - 7:00pm PT  | Sat & Sun 8:00am - 5:00pm PT
Enterprise -   Full-siz e Pickup
Driver Name
Ramin Rezaiifar
One additional driv er fee will be waiv ed for Costco members at locations in the US, UK, Spain, Puer to
Rico, Ir eland, Germany , France and Canada. Age r estrictions and r enter r equir ements ma y apply .Driver Information
Itiner ary
Full-siz e Pickup
Reser ve Now , Pay Later
9/14/23, 1 1:26 AM Booki

In [37]:
from llama_index.core import VectorStoreIndex, Document
import PyPDF2


# Open the PDF file and extract the text
with open('data/irs.pdf', 'rb') as file:
    reader = PyPDF2.PdfReader(file)
    pages = [page.extract_text() for page in reader.pages]

# Create a list of documents from the extracted text
documents = [Document(text=page) for page in pages]
# Create a vector store index
index = VectorStoreIndex.from_documents(documents)

# Query the index
query_engine = index.as_query_engine()
response = query_engine.query("What's the payment amount")
print(response)

The payment amount is $11,000.00.
