In [None]:
!pip install llama-index
!pip install llama-index-llms-openai
!pip install llama-index-vector-stores-qdrant
!pip install qdrant-client
!pip install sentence-transformers

In [None]:
!pip install llama-index-embeddings-huggingface

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "Insert_your_OpenAI_key"

Ollama instead

In [None]:
!pip install ollama notebook


In [None]:
import nest_asyncio

nest_asyncio.apply()

SQL Database

In [None]:
from llama_index.core import SQLDatabase, Settings 
from llama_index.llms.openai import OpenAI
from sqlalchemy import (
    create_engine,
    MetaData,
    Table,
    Column,
    String,
    Integer,
)

Settings.llm = OpenAI("gpt-3.5-turbo")


engine = create_engine("sqlite:///:memory:", future=True)
metadata_obj = MetaData()

# create city SQL table
table_name = "city_stats"
city_stats_table = Table(
    table_name,
    metadata_obj,
    Column("city_name", String(16), primary_key=True),
    Column("population", Integer),
    Column("state", String(16), nullable=False),
)

metadata_obj.create_all(engine)

In [None]:
from sqlalchemy import insert

rows = [
    {"city_name": "New York City", "population": 8336000, "state": "New York"},
    {"city_name": "Los Angeles", "population": 3822000, "state": "California"},
    {"city_name": "Chicago", "population": 2665000, "state": "Illinois"},
    {"city_name": "Houston", "population": 2303000, "state": "Texas"},
    {"city_name": "Miami", "population": 449514, "state": "Florida"},
    {"city_name": "Seattle", "population": 749256, "state": "Washington"},
]
for row in rows:
    stmt = insert(city_stats_table).values(**row)
    with engine.begin() as connection:
        cursor = connection.execute(stmt)

with engine.connect() as connection:
    cursor = connection.exec_driver_sql("SELECT * FROM city_stats")
    print(cursor.fetchall())

Creating Query Engine Based on SQL Database

In [None]:
from llama_index.core.query_engine import NLSQLTableQueryEngine

sql_database = SQLDatabase(engine, include_tables=["city_stats"])
sql_query_engine = NLSQLTableQueryEngine(
    sql_database=sql_database,
    tables=["city_stats"]
)

Creating index locally on Using

In [None]:
# LlamaIndex imports (updated for newer versions)
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document, StorageContext, Settings
from llama_index.core.tools import BaseTool, FunctionTool
from llama_index.core.llms import ChatMessage
from llama_index.core.llms.llm import ToolSelection, LLM
from llama_index.core.workflow import (
    Workflow,
    Event,
    StartEvent,
    StopEvent,
    step,
)
from llama_index.core.workflow.context import Context
from llama_index.core.base.response.schema import Response

# Import Qdrant vector store from the correct package
from llama_index.vector_stores.qdrant import QdrantVectorStore

# Import OpenAI LLM
from llama_index.llms.openai import OpenAI

# Qdrant imports
from qdrant_client import QdrantClient
from qdrant_client.http import models as qdrant_models
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from qdrant_client import QdrantClient
from IPython.display import Markdown, display

In [None]:
pip install llama-index qdrant-client torch transformers

In [None]:
#Setting up the embedding model
embed_model = HuggingFaceEmbedding(
    model_name="Snowflake/snowflake-arctic-embed-m",
    trust_remote_code=True
)
Settings.embed_model = embed_model

In [None]:
#Load PDF documents from directory
pdf_dir = "C:/Users/prita/Documents/DailyDose"
# Change this to your PDF directory path
loader = SimpleDirectoryReader(
    input_dir=pdf_dir,
    required_exts=[".pdf"],
    recursive=True
)

In [None]:
documents = loader.load_data()
print(f"Loaded {len(documents)} PDF documents")

In [None]:
# Initialize Qdrant vector store (in-memory for simplicity)
client = QdrantClient(":memory:")
vector_store = QdrantVectorStore(
    client=client,
    collection_name="pdf_collection"
)

In [None]:
# Create vector index
index = VectorStoreIndex.from_documents(
    documents,
    vector_store=vector_store
)
print("Documents indexed successfully")

In [None]:
rag_query_engine = index.as_query_engine(similarity_top_k=3)

In [None]:
from llama_index.core.tools import QueryEngineTool

sql_tool = QueryEngineTool.from_defaults(
    query_engine=sql_query_engine,
    description=(
        "Useful for translating a natural language query into a SQL query over"
        " a table containing: city_stats, containing the population/state of"
        " each city located in the USA."
    ),
    name="sql_tool"
)

cities = ["New York City", "Los Angeles", "Chicago", "Houston", "Miami", "Seattle"]
rag_tool = QueryEngineTool.from_defaults(
    query_engine=rag_query_engine,
    description=(
        f"Useful for answering semantic questions about certain cities in the US."
    ),
    name="llama_cloud_tool"
)

In [None]:
from typing import List
import os
from llama_index.core.tools import QueryEngineTool
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI

# Assuming you already have the tools defined as in your example:
# sql_tool and rag_tool

def create_agent(tools: List[QueryEngineTool], model_name: str = "gpt-3.5-turbo"):
    """Create a simple ReAct agent with the provided tools."""
    # Initialize the LLM
    llm = OpenAI(model=model_name)
    
    # Create the agent with the tools
    agent = ReActAgent.from_tools(
        tools=tools,
        llm=llm,
        verbose=True,  # Show the agent's thought process
    )
    
    return agent

In [None]:
agent = create_agent(tools=[sql_tool, rag_tool])

In [None]:
def process_query(query: str):
    """Process a query using our agent and display the result."""
    try:
        response = agent.query(query)
        return response
    except Exception as e:
        return f"Error: {e}"

In [None]:
sql_query = "What is the population of Miami?"
print(f"Query: {sql_query}")
print(f"Response: {process_query(sql_query)}")

In [None]:
query = input("Enter your question: ")
print(f"Response: {process_query(query)}")

Designing a streamlit UI

In [None]:
%%writefile rag_sql_app_upload.py
import streamlit as st
import os

import tempfile
from llama_index.core import VectorStoreIndex, ServiceContext
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter

# Set page config
st.set_page_config(
    page_title="Magic of RAG and Text to SQL",
    layout="centered"
)

# Title and description
st.title("Magic of RAG and Text to SQL")
st.write("Upload PDFs and ask questions using RAG or SQL")

# Initialize session state variables
if 'index' not in st.session_state:
    st.session_state.index = None
if 'processed' not in st.session_state:
    st.session_state.processed = False

def process_documents(directory):
    try:
        # Set up embedding model
        embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
        
        # Set up OpenAI LLM
        llm = OpenAI(model="gpt-3.5-turbo", temperature=0)
        
        # Create node parser
        node_parser = SentenceSplitter(chunk_size=1024, chunk_overlap=20)
        
        # Load documents
        documents = SimpleDirectoryReader(directory).load_data()
        
        # Create index with components directly
        index = VectorStoreIndex.from_documents(
            documents,
            llm=llm,
            embed_model=embed_model,
            transformations=[node_parser]
        )
        
        return index
    except Exception as e:
        st.error(f"Error processing documents: {str(e)}")
        return None


# Option to select an existing directory or upload files
option = st.radio("Choose an option:", ["Select directory", "Upload PDF files"])

if option == "Select directory":
    directory = st.text_input("Enter the directory path containing PDFs:")
    
    if directory and os.path.isdir(directory):
        st.success(f"Directory selected: {directory}")
        
        # Process PDFs button
        if st.button("Process PDFs"):
            with st.spinner("Processing PDFs..."):
                st.session_state.index = process_documents(directory)
                if st.session_state.index is not None:
                    st.session_state.processed = True
                    st.success("PDFs processed successfully!")
else:
    uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])
    
    if uploaded_files:
        if st.button("Process Uploaded PDFs"):
            # Create a temporary directory to store the uploaded files
            with tempfile.TemporaryDirectory() as temp_dir:
                # Save uploaded files to the temporary directory
                for uploaded_file in uploaded_files:
                    file_path = os.path.join(temp_dir, uploaded_file.name)
                    with open(file_path, "wb") as f:
                        f.write(uploaded_file.getbuffer())
                
                with st.spinner("Processing PDFs..."):
                    st.session_state.index = process_documents(temp_dir)
                    if st.session_state.index is not None:
                        st.session_state.processed = True
                        st.success("PDFs processed successfully!")

# Show chatbot interface if PDFs are processed
if st.session_state.processed and st.session_state.index is not None:
    # Query type selection
    query_type = st.radio("Select query type:", ["RAG", "SQL"], horizontal=True)
    
    # Query input
    query = st.text_input("Your question:")
    
    if query:
        with st.spinner("Generating answer..."):
            try:
                if query_type == "RAG":
                    # RAG query
                    query_engine = st.session_state.index.as_query_engine()
                    response = query_engine.query(query)
                    st.write("### Answer:")
                    st.write(response.response)
                else:
                    # SQL query
                    # Replace with your SQL implementation
                    st.write("### Answer:")
                    st.write("SQL query execution feature is coming soon.")
            except Exception as e:
                st.error(f"Error generating answer: {str(e)}")


In [None]:
!streamlit run rag_sql_app_upload.py