In [1]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("D:\Data\LLM\llamaindex").load_data()

In [2]:
documents

[Document(id_='5b1cdd41-548d-44e3-a684-6bb325b81138', embedding=None, metadata={'page_label': '1', 'file_name': 'Indian_Economy_Overview.pdf', 'file_path': 'D:\\Data\\LLM\\llamaindex\\Indian_Economy_Overview.pdf', 'file_type': 'application/pdf', 'file_size': 7918, 'creation_date': '2024-06-09', 'last_modified_date': '2024-06-09'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Indian Economy: An Overview\nIndian Economy: An Overview\nPage 1', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 Document(id_='e4e17029-7649-475d-a97a-869310ed40e5', embedding=None, metadata={'page_label': '2', 'file_name': 'Indian_Economy_Overview.pdf', 'file_path': 'D:\\Data\\LL

In [3]:
from llama_index.core import download_loader

In [6]:
#pip install llama-index-readers-database

In [7]:
from llama_index.readers.database import DatabaseReader

In [8]:
#!pip install llama-index pymysql -q

In [9]:
db_user = "root"
db_password = "tigerjadi"
db_host = "localhost"
db_name = "classicmodels"

In [12]:
from sqlalchemy import create_engine, text

# Construct the connection string
connection_string = f"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}"

# Create an engine instance
engine = create_engine(connection_string)

# Test the connection using raw SQL
with engine.connect() as connection:
    result = connection.execute(text("select * from payments limit 3"))
    for row in result:
        print(row)

(103, 'HQ336336', datetime.date(2004, 10, 19), Decimal('6066.78'))
(103, 'JM555205', datetime.date(2003, 6, 5), Decimal('14571.44'))
(103, 'OM314933', datetime.date(2004, 12, 18), Decimal('1676.14'))


## High-Level Transformation

In [22]:
from llama_index.core import Settings

In [23]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [24]:
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")



In [26]:
from llama_index.llms.ollama import Ollama

In [27]:
Settings.llm = Ollama(model="llama3", request_timeout=360.0)

In [28]:
from llama_index.core import VectorStoreIndex
index = VectorStoreIndex.from_documents(documents)

In [30]:
query_engine = index.as_query_engine()
response = query_engine.query("What are the Key Sectors of the Indian Economy?")
print(response)

The three main sectors of the Indian economy are Agriculture, Industry, and Services.


In [31]:
response = query_engine.query("What is the Capital of India?")
print(response)

The capital of India is not mentioned in this specific document or page. However, it can be inferred that the capital city is an important aspect of a country's economy, as it often serves as the hub for many economic activities and industries.


In [32]:
response = query_engine.query("What are the Key Sectors of the Indian Economy?")
print(response)

The key sectors of the Indian economy include Agriculture, Industry, and Services.


In [33]:
# Another Way giving the chank size

In [34]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)

# global
from llama_index.core import Settings

Settings.text_splitter = text_splitter

# per-index
index1 = VectorStoreIndex.from_documents(
    documents, transformations=[text_splitter]
)

In [35]:
query_engine1 = index1.as_query_engine()
response = query_engine1.query("What are the Key Sectors of the Indian Economy?")
print(response)

The three key sectors of the Indian economy are Agriculture, Industry, and Services.


## Lower-Level Transformation 

In [37]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import TokenTextSplitter

# documents = SimpleDirectoryReader("./data").load_data()

pipeline = IngestionPipeline(transformations=[TokenTextSplitter(),])

nodes = pipeline.run(documents=documents)

In [40]:
index2 = VectorStoreIndex(nodes)

In [41]:
query_engine2 = index2.as_query_engine()
response = query_engine2.query("What are the Key Sectors of the Indian Economy?")
print(response)

Agriculture, Industry, and Services.


In [42]:
response = query_engine2.query("What are the Key Sectors of the Indian Economy?")
print(response)

Agriculture, Industry, and Services are the key sectors of the Indian economy.


## Persisting to disk

In [43]:
index.storage_context.persist(persist_dir="D:\Data\LLM\VectorStoreIndex")

In [44]:
from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="D:\Data\LLM\VectorStoreIndex")

# load index
index5 = load_index_from_storage(storage_context)

In [45]:
query_engine5 = index5.as_query_engine()
response = query_engine5.query("What are the Key Sectors of the Indian Economy?")
print(response)

Agriculture, Industry, and Services are the key sectors of the Indian economy.


## chromadb

In [52]:
#pip install chromadb
#!pip install llama-index-vector-stores-chroma

In [53]:
import chromadb
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

In [54]:
db = chromadb.PersistentClient(path="D:\Data\LLM\chroma_db")

In [55]:
chroma_collection = db.get_or_create_collection("quickstart")

In [56]:
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [57]:
index_chromadb = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

In [59]:
query_engine6 = index_chromadb.as_query_engine()
response = query_engine6.query("What are the Key Sectors of the Indian Economy?")
print(response)

Agriculture, Industry, and Services.


## Querying

In [60]:
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

In [61]:
index_query = VectorStoreIndex.from_documents(documents)

In [62]:
retriever = VectorIndexRetriever(
    index=index_query,
    similarity_top_k=3,
)

In [63]:
response_synthesizer = get_response_synthesizer()

In [64]:
query_engine_res = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.8)],
)

In [65]:
response = query_engine_res.query("What are the Challenges Facing the Indian Economy?")
print(response)

Unemployment, income inequality, agricultural distress, infrastructure deficit, and regulatory hurdles are the challenges facing the Indian economy.
