# Query Web Pages

## Step-1: Configuration

In [1]:
DB_URI = './rag_web.db'  # For embedded instance
COLLECTION_NAME = 'pages'
EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
EMBEDDING_LENGTH = 384
LLM_MODEL = "meta/meta-llama-3-8b-instruct"

In [2]:
import os,sys
## Load Settings from .env file
from dotenv import find_dotenv, load_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file


REPLICATE_API_TOKEN = os.environ.get("REPLICATE_API_TOKEN")

if  REPLICATE_API_TOKEN:
    print ("✅ config REPLICATE_API_TOKEN found")
else:
    raise Exception ("'❌ REPLICATE_API_TOKEN' is not set.  Please set it above to continue...")


✅ config REPLICATE_API_TOKEN found


In [3]:
# If connection to https://huggingface.co/ failed, uncomment the following path
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding(
    model_name = EMBEDDING_MODEL
)

  from .autonotebook import tqdm as notebook_tqdm


## Step-2: Connect to Vector DB

In [4]:
# connect to vector db
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.milvus import MilvusVectorStore

vector_store = MilvusVectorStore(
    uri = DB_URI ,
    dim = EMBEDDING_LENGTH , 
    collection_name = COLLECTION_NAME,
    overwrite=False
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

print ("✅ Connected Llama-index to Milvus instance: ", DB_URI, ", collection: ", COLLECTION_NAME )

✅ Connected Llama-index to Milvus instance:  ./rag_web.db , collection:  pages


## Step-3: Load Document Index from DB

In [5]:
%%time

from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, storage_context=storage_context)

print ("✅ Loaded index from vector db:", DB_URI , ",  collection: ", COLLECTION_NAME)

✅ Loaded index from vector db: ./rag_web.db ,  collection:  pages
CPU times: user 102 ms, sys: 9.34 ms, total: 112 ms
Wall time: 111 ms


## Step-4: Setup LLM

In [6]:
from llama_index.llms.replicate import Replicate
from llama_index.core import Settings

llm = Replicate(
    model= LLM_MODEL,
    temperature=0.1
)

Settings.llm = llm

## Step-5: Query

In [None]:
query_engine = index.as_query_engine()

In [11]:
res = query_engine.query("Who is teaching the 'Networking in the Cloud' class?")
print(res)



According to the context information, the instructor for the 'Networking in the Cloud' class is Scott Taylor, Internet2 Network Services.


In [12]:
res = query_engine.query("When is Sujee Maniyam teaching the class?")
print(res)



I'm happy to help! However, I don't see any information about Sujee Maniyam teaching a class in the provided context. The text only mentions a workshop called "Developing Intelligent Systems with LLMs and RAG" on October 9, but it doesn't mention Sujee Maniyam as the instructor. If you could provide more context or clarify the question, I'd be happy to try and assist you further!


In [14]:
res = query_engine.query("What class teaches generative AI")
print(res)



Based on the provided context information, it appears that the class that teaches generative AI is the "Cloud Learning and Skills Sessions (CLASS)" offered by Internet2. The CLASS program focuses on Large Language Models (LLMs) and Retrieval-Augmented Generation (RAG) applications, which are related to generative AI.
