In [1]:
import os
import textwrap
from dotenv import load_dotenv
import re

# Load environment variables
load_dotenv()

# Fetch and set API keys
openai_api_key = os.getenv("OPENAI_API_KEY")
active_loop_token = os.getenv("ACTIVELOOP_TOKEN")
dataset_path = os.getenv("DATASET_PATH")



In [3]:
from llama_index import download_loader

WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()

documents = loader.load_data(pages=['Delhi', 'Mumbai'])
print(len(documents))

2


In [4]:
from llama_index.node_parser import SimpleNodeParser


# Initialize the parser
parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=20)

# Parse documents into nodes
nodes = parser.get_nodes_from_documents(documents)
print(len(nodes))

146


In [5]:
from llama_index.vector_stores import DeepLakeVectorStore

my_activeloop_org_id = ""
my_activeloop_dataset_name = "LlamaIndex-101"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"

# Create an index over the documnts
vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=False)

Your Deep Lake dataset has been successfully created!


-

In [9]:
from llama_index.storage.storage_context import StorageContext
from llama_index import VectorStoreIndex

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

In [8]:
query_engine = index.as_query_engine()
response = query_engine.query("What is historical significance of Delhi?")
print( response.response )

Delhi has been historically significant as it has served as the capital of various empires and kingdoms throughout history. It has been a prominent political, cultural, and commercial center in India for centuries. Delhi's historical significance is rooted in its role as a seat of power, witnessing the rise and fall of different dynasties and playing a crucial part in shaping the country's history.
