In [1]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
import os

In [2]:
!pip install -r azure-search-vector-python-llamaindex-sample-requirements.txt --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
!pip install -q llama-index-readers-web


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
load_dotenv(override=True) # take environment variables from .env.

# Make sure your .env file has values for the following environment variables
endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
credential = AzureKeyCredential(os.environ["AZURE_SEARCH_ADMIN_KEY"]) if len(os.environ["AZURE_SEARCH_ADMIN_KEY"]) > 0 else DefaultAzureCredential()
# index_name = os.environ["AZURE_SEARCH_INDEX"]
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
# Llama Index does not support RBAC authentication, an API key is required
azure_openai_key = os.environ["AZURE_OPENAI_KEY"]
if len(azure_openai_key) == 0:
    raise Exception("API key required")
azure_openai_embedding_model = os.environ["AZURE_OPENAI_EMBEDDING_MODEL"]
azure_openai_embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
azure_openai_chatgpt_deployment = os.environ["AZURE_OPENAI_CHATGPT_DEPLOYMENT"]
azure_openai_api_version = os.environ["AZURE_OPENAI_API_VERSION"]
# embedding_dimensions = int(os.getenv("AZURE_OPENAI_EMBEDDING_DIMENSIONS", 1536))

In [5]:
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
embeddings = AzureOpenAIEmbedding(
    model_name=azure_openai_embedding_model,
    deployment_name=azure_openai_embedding_deployment,
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key
)

In [6]:
from llama_index.llms.azure_openai import AzureOpenAI
llm = AzureOpenAI(
    deployment_name=azure_openai_chatgpt_deployment,
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key
)

In [7]:
from llama_index.readers.remote_depth import RemoteDepthReader

loader = RemoteDepthReader()
web_documents = loader.load_data(
    url="https://www.jainsocietyhouston.org/"
)

# Extract the content from the website data document
html_content = web_documents[0].text

  from .autonotebook import tqdm as notebook_tqdm


Reading links at depth 0...


  3%|▎         | 1/31 [00:00<00:03,  8.94it/s]

Loading link: https://www.jainsocietyhouston.org
Loading link: https://www.jainsocietyhouston.org/members-area


  6%|▋         | 2/31 [00:00<00:07,  3.97it/s]

Loading link: https://www.jainsocietyhouston.org/minutesofmeetings


 10%|▉         | 3/31 [00:00<00:07,  3.77it/s]

Loading link: https://www.jainsocietyhouston.org/donate


 13%|█▎        | 4/31 [00:01<00:08,  3.31it/s]

Loading link: https://www.jainsocietyhouston.org/sponsorlunch
Loading link: http://www.jainsocietyhouston.org/


 23%|██▎       | 7/31 [00:01<00:04,  5.39it/s]

Loading link: https://www.jainsocietyhouston.org/about_us
Loading link: https://www.jainsocietyhouston.org/management


 29%|██▉       | 9/31 [00:01<00:03,  6.53it/s]

Loading link: https://www.jainsocietyhouston.org/constitution
Loading link: https://www.jainsocietyhouston.org/activity-committees


 39%|███▊      | 12/31 [00:02<00:02,  7.94it/s]

Loading link: https://www.jainsocietyhouston.org/jshemailarchive
Loading link: https://www.jainsocietyhouston.org/jaina
Loading link: https://www.jainsocietyhouston.org/jsheventcalendar


 42%|████▏     | 13/31 [00:02<00:03,  5.54it/s]

Loading link: https://www.jainsocietyhouston.org/pathshala
Loading link: https://jainsocietyhouston.app.neoncrm.com/login


 52%|█████▏    | 16/31 [00:03<00:03,  4.49it/s]

Loading link: https://www.jainsocietyhouston.org/pathshalacalendar
Loading link: https://forms.gle/MgEz5fFLBjv9yWndA


 61%|██████▏   | 19/31 [00:04<00:03,  3.01it/s]

Loading link: https://www.jainsocietyhouston.org/sponsorship
Loading link: https://www.jainsocietyhouston.org/jab
Loading link: https://rtsp.me/embed/FR8NYFzs/


 65%|██████▍   | 20/31 [00:04<00:03,  3.50it/s]

Loading link: https://jainsocietyhouston.app.neoncrm.com/np/clients/jainsocietyhouston/eventRegistration.jsp


 68%|██████▊   | 21/31 [00:05<00:03,  2.97it/s]

Loading link: https://www.instagram.com/wixhttps://www.instagram.com/jainsocietyofhouston/


 71%|███████   | 22/31 [00:05<00:02,  3.20it/s]

Loading link: https://www.facebook.com/JainsInHouston/


 74%|███████▍  | 23/31 [00:06<00:02,  2.79it/s]

Loading link: https://www.linkedin.com/company/jainsocietyofhouston/


 77%|███████▋  | 24/31 [00:06<00:02,  3.16it/s]

Loading link: https://www.youtube.com/channel/UC97GEZVx3L2R1kD4K5MYOKw


 81%|████████  | 25/31 [00:07<00:03,  1.69it/s]

Loading link: https://www.jainsocietyhouston.org/new-to-jsh
Loading link: http://bit.ly/2022JSHVotingGuideVideo1


 87%|████████▋ | 27/31 [00:08<00:01,  2.11it/s]

Loading link: https://jainelibrary.org/


 90%|█████████ | 28/31 [00:08<00:01,  2.14it/s]

Loading link: http://www.jainpanchang.org/


 94%|█████████▎| 29/31 [00:09<00:01,  1.71it/s]

Loading link: https://www.youtube.com/c/JSHHouston/videos


 97%|█████████▋| 30/31 [00:10<00:00,  1.33it/s]

Loading link: https://www.youtube.com/watch


100%|██████████| 31/31 [00:11<00:00,  2.61it/s]


Reading links at depth 1...


  0%|          | 0/91 [00:00<?, ?it/s]

Loading link: https://www.jainsocietyhouston.org/management
Loading link: https://www.youtube.com/about/copyright/
Loading link: https://jainsocietyhouston.app.neoncrm.com/login
Loading link: https://drive.google.com/drive/folders/1pyc1toC0IIvn7vOwVzssYQ3ovn6hp3Vd


  5%|▌         | 5/91 [00:00<00:13,  6.27it/s]

Loading link: https://rtsp.me/
Loading link: https://jainsocietyhouston.app.neoncrm.com/requestPassword


  7%|▋         | 6/91 [00:01<00:22,  3.85it/s]

Loading link: https://drive.google.com/file/d/19JoTmar94ZZkqYMbnu2_fPLptlPCOAbu/view


  8%|▊         | 7/91 [00:02<00:31,  2.66it/s]

Loading link: http://www.jainpanchang.org/
Loading link: https://www.youtube.com/about/press/


 10%|▉         | 9/91 [00:02<00:24,  3.34it/s]

Loading link: https://jainelibrary.org/
Loading link: https://www.jainsocietyhouston.org/constitution
Loading link: https://forms.gle/MgEz5fFLBjv9yWndA
Loading link: https://www.google.com/forms/about/
Loading link: https://docs.google.com/forms/d/e/1FAIpQLSeBBQx7lAVpq3NicCCt4nRuNCKiEcORujjrhDyzFunC2S_N5Q/viewform


 15%|█▌        | 14/91 [00:03<00:14,  5.22it/s]

Loading link: http://www.jainpanchang.org/khat_muhurats.html


 16%|█▋        | 15/91 [00:03<00:18,  4.12it/s]

Loading link: http://bit.ly/2022JSHVotingGuideVideo1
Loading link: https://accounts.google.com/ServiceLogin


 19%|█▊        | 17/91 [00:04<00:17,  4.20it/s]

Loading link: https://www.jainsocietyhouston.org/pathshalacalendar
Loading link: https://www.youtube.com/


 21%|██        | 19/91 [00:05<00:24,  2.96it/s]

Loading link: https://www.youtube.com/ads/
Loading link: https://drive.google.com/drive/folders/15egBFIjKFH1BCevB2EghyC8KSxbktJzu


 23%|██▎       | 21/91 [00:05<00:24,  2.92it/s]

Loading link: https://www.jainsocietyhouston.org/minutesofmeetings
Loading link: http://www.jainpanchang.org/index.php


 25%|██▌       | 23/91 [00:06<00:22,  3.00it/s]

Loading link: http://www.jainpanchang.org/panchangganit_author.html


 26%|██▋       | 24/91 [00:06<00:22,  2.97it/s]

Loading link: https://www.facebook.com/JainsInHouston/
Loading link: https://www.youtube.com/t/contact_us/


 29%|██▊       | 26/91 [00:07<00:17,  3.76it/s]

Loading link: https://drive.google.com/file/d/1grEQzxHZm1gXZ6bLEbDSYgAaRlQVDHvX/view


 30%|██▉       | 27/91 [00:07<00:22,  2.90it/s]

Loading link: https://drive.google.com/file/d/1POffNqwCUN367yVy-AZHlW2nYd3-eyS3/view


 31%|███       | 28/91 [00:08<00:31,  2.03it/s]

Loading link: http://www.jainpanchang.org/shilanyas_muhurats.html


 32%|███▏      | 29/91 [00:09<00:28,  2.18it/s]

Loading link: https://www.jainsocietyhouston.org/sponsorlunch
Loading link: https://www.jainsocietyhouston.org/pathshala
Loading link: https://www.youtube.com/creators/
Loading link: https://www.jainsocietyhouston.org
Loading link: https://www.jainsocietyhouston.org/about_us
Loading link: http://www.jainpanchang.org/about.html


 38%|███▊      | 35/91 [00:09<00:12,  4.48it/s]

Loading link: https://www.jainsocietyhouston.org/pathshaladayone2015


 40%|███▉      | 36/91 [00:11<00:19,  2.76it/s]

Loading link: https://www.jainsocietyhouston.org/bhavna
Loading link: https://jainsocietyhouston.app.neoncrm.com/np/clients/jainsocietyhouston/eventRegistration.jsp
Loading link: https://www.instagram.com/wixhttps://www.instagram.com/jainsocietyofhouston/
Loading link: https://forms.gle/reportabuse


 44%|████▍     | 40/91 [00:11<00:11,  4.42it/s]

Loading link: https://www.youtube.com/t/terms


 45%|████▌     | 41/91 [00:11<00:11,  4.37it/s]

Loading link: http://www.jainpanchang.org/panchak_vinchudo_pushya.html


 46%|████▌     | 42/91 [00:11<00:12,  4.06it/s]

Loading link: https://www.youtube.com/t/privacy


 47%|████▋     | 43/91 [00:12<00:15,  3.02it/s]

Loading link: https://drive.google.com/file/d/1IRqh3yoqudpvqTHMPJZo3gh4OJDnuxKC/view


 48%|████▊     | 44/91 [00:13<00:23,  1.98it/s]

Loading link: http://jain-houston.org/jsgh/EmailList.asp


 49%|████▉     | 45/91 [00:14<00:23,  1.95it/s]

Loading link: http://www.jainpanchang.org/sitemap.html


 51%|█████     | 46/91 [00:14<00:21,  2.12it/s]

Loading link: http://www.jainpanchang.org/samyadosh_grahvakri.html


 52%|█████▏    | 47/91 [00:15<00:22,  2.00it/s]

Loading link: https://www.jainsocietyhouston.org/members-area
Loading link: https://www.jainsocietyhouston.org/jshemailarchive
Loading link: http://www.myriadsolutionz.com


 56%|█████▌    | 51/91 [00:17<00:21,  1.84it/s]

Loading link: https://drive.google.com/open
Loading link: https://jainsocietyhouston.app.neoncrm.com/np/clients/jainsocietyhouston/neonPage.jsp


 57%|█████▋    | 52/91 [00:18<00:20,  1.90it/s]

Loading link: https://sites.google.com/jain-houston.org/pathshala-register


 58%|█████▊    | 53/91 [00:18<00:17,  2.15it/s]

Loading link: https://www.youtube.com/howyoutubeworks
Loading link: http://www.jainpanchang.org/contact.html


 60%|██████    | 55/91 [00:18<00:14,  2.44it/s]

Loading link: https://docs.google.com/forms/d/e/1FAIpQLSe3uaAkcXneGJRoSmLLqnlRTTVDv6weQsaK_d4w6BPD9-oUhQ/viewform


 62%|██████▏   | 56/91 [00:19<00:16,  2.15it/s]

Loading link: https://neonone.com/productterms/
Loading link: https://developers.google.com/youtube


 64%|██████▎   | 58/91 [00:20<00:12,  2.71it/s]

Loading link: http://www.jainpanchang.org/chaturmas_loch_muhurats.html


 65%|██████▍   | 59/91 [00:20<00:11,  2.76it/s]

Loading link: https://www.mozilla.org/en-US/firefox/


 67%|██████▋   | 61/91 [00:21<00:10,  2.83it/s]

Loading link: https://www.apple.com/safari/
Loading link: https://www.microsoft.com/en-us/edge


 68%|██████▊   | 62/91 [00:24<00:30,  1.05s/it]

Loading link: https://www.jainsocietyhouston.org/activity-committees
Loading link: https://neonone.com/
Loading link: https://www.youtube.com/watch
Loading link: https://jainsocietyhouston.app.neoncrm.com/forms/createaccount


 73%|███████▎  | 66/91 [00:25<00:13,  1.81it/s]

Loading link: https://www.google.com/chrome/
Loading link: http://www.jshconnect.org


 75%|███████▍  | 68/91 [00:26<00:12,  1.83it/s]

Loading link: https://neonone.com/privacypolicy/
Loading link: https://policies.google.com/privacy


 77%|███████▋  | 70/91 [00:26<00:10,  2.07it/s]

Loading link: https://www.youtube.com/c/JSHHouston/videos
Loading link: https://www.youtube.com/new


 79%|███████▉  | 72/91 [00:27<00:08,  2.14it/s]

Loading link: https://www.linkedin.com/company/jainsocietyofhouston/
Loading link: https://policies.google.com/terms


 81%|████████▏ | 74/91 [00:28<00:06,  2.48it/s]

Loading link: https://www.jainsocietyhouston.org/jaina
Loading link: http://p1.vresp.com/zRTHOi)


 82%|████████▏ | 75/91 [02:37<00:33,  2.11s/it]


ConnectTimeout: HTTPConnectionPool(host='p1.vresp.com', port=80): Max retries exceeded with url: /zRTHOi) (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7f81c31471c0>, 'Connection to p1.vresp.com timed out. (connect timeout=None)'))

In [8]:
from bs4 import BeautifulSoup
from llama_index.core import Document
# Parse the data.
soup = BeautifulSoup(html_content, 'html.parser')
p_tags = soup.findAll('p')
text_content = ""
for each in p_tags:
    text_content += each.text + "\n"

# Convert back to Document format
documents = [Document(text=text_content)]

In [9]:
from azure.search.documents.indexes import SearchIndexClient

# Index name to use
index_name = "llamaindex-vector-jsh-website"

# Use index client to demonstrate creating an index
index_client = SearchIndexClient(
    endpoint=endpoint,
    credential=credential,
)

In [10]:
from llama_index.vector_stores.azureaisearch import AzureAISearchVectorStore
from llama_index.vector_stores.azureaisearch import (
    IndexManagement,
    MetadataIndexFieldType,
)

metadata_fields = {

}

vector_store = AzureAISearchVectorStore(
    search_or_index_client=index_client,
    filterable_metadata_field_keys=metadata_fields,
    index_name=index_name,
    index_management=IndexManagement.CREATE_IF_NOT_EXISTS,
    id_field_key="id",
    chunk_field_key="chunk",
    embedding_field_key="embedding",
    embedding_dimensionality=1536,
    metadata_string_field_key="metadata",
    doc_id_field_key="doc_id",
    language_analyzer="en.lucene",
    vector_algorithm_type="exhaustiveKnn")

In [11]:
from llama_index.core import (
    SimpleDirectoryReader,
    StorageContext,
    VectorStoreIndex,
)
from llama_index.core.settings import Settings

storage_context = StorageContext.from_defaults(vector_store=vector_store)

Settings.llm = llm
Settings.embed_model = embeddings
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

In [12]:
from llama_index.core.vector_stores.types import VectorStoreQueryMode

default_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.DEFAULT
)
response = default_retriever.retrieve("Tell me about Jain Society of Houston")

# Loop through each NodeWithScore in the response
for node_with_score in response:
    node = node_with_score.node  # The TextNode object
    score = node_with_score.score  # The similarity score
    chunk_id = node.id_  # The chunk ID

    # Extract the relevant metadata from the node
    file_name = node.metadata.get("file_name", "Unknown")
    file_path = node.metadata.get("file_path", "Unknown")

    # Extract the text content from the node
    text_content = node.text if node.text else "No content available"

    # Print the results in a user-friendly format
    print(f"Score: {score}")
    print(f"File Name: {file_name}")
    print(f"Id: {chunk_id}")
    print("\nExtracted Content:")
    print(text_content)
    print("\n" + "=" * 40 + " End of Result " + "=" * 40 + "\n")

Score: 0.89288765
File Name: Unknown
Id: 66609556-c4f6-4823-9171-5ec651e275b1

Extracted Content:
Home
About Us
Events
Pathshala
More
 
Motto of Jain Center: To promote the philosophy and teachings of Jain religion and to establish a platform for worship, discussion and teaching of Jain rituals, ideals, and principles of Jain religion, to celebrate auspicious Jain events and festivals.
 
Jainism is one of the oldest religious traditions of India, and has existed side by side with Hinduism throughout its long history. The basic philosophy of Jainism is non Violence and Anekantvad (Multiplicity of viewpoint). Jain Samaj is active in Houston since early 70s. Jain Society of Houston was formed in 1982.
​Jain Center Houston has over 800 families as members, consisting of all sects of Jains like Deravasi, Sthanakvasi, Shwentamber, Digamber etc. JHS works under the guidance of  JAINA (The Federation of Jain Associations in North America).
 
We have Mahavir Swami (Mul Nayak), Shankeshwar Parsh

In [None]:
hybrid_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.SEMANTIC_HYBRID
)
hybrid_response = hybrid_retriever.retrieve("Tell me about pathshala")

# Loop through each NodeWithScore in the response
for node_with_score in hybrid_response:
    node = node_with_score.node  # The TextNode object
    score = node_with_score.score  # The similarity score
    chunk_id = node.id_  # The chunk ID

    # Extract the relevant metadata from the node
    file_name = node.metadata.get("file_name", "Unknown")
    file_path = node.metadata.get("file_path", "Unknown")

    # Extract the text content from the node
    text_content = node.text if node.text else "No content available"

    # Print the results in a user-friendly format
    print(f"Score: {score}")
    print(f"File Name: {file_name}")
    print(f"Id: {chunk_id}")
    print("\nExtracted Content:")
    print(text_content)
    print("\n" + "=" * 40 + " End of Result " + "=" * 40 + "\n")

In [None]:
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata

# setup base query engine as tool
query_engine_tools = [
    QueryEngineTool(
        query_engine=index.as_query_engine(),
        metadata=ToolMetadata(
            name="jsh_about",
            description="Jain Society of Houston (JSH)",
        ),
    ),
]
# build a sub-question query engine over this tool
# this allows decomposing the question down into sub-questions which then execute against the tool
query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
   use_async=False,
)

response = query_engine.query("When does monthly Bhavna take place?")

In [13]:
query_engine = index.as_query_engine()
response = query_engine.query("When was JSH established?")
print(response)
response = query_engine.query("What are the activities for young adults?")
print(response)

Jain Society of Houston was formed in 1982.
For young adults between the ages of 13 and 22, the Jain Fellowship of Houston carries out many activities.


In [19]:
response = query_engine.query("Summarize JSH activities")
print(response)

Jain Society of Houston (JSH) engages in a variety of activities to promote Jain philosophy and teachings. They organize weekly Pathshala sessions for school-age children to learn about Jainism and languages like Hindi and Gujarati, and provide a light lunch on these days. JSH also hosts monthly Digambar Vidhi Puja and a month-end Bhavna followed by a communal lunch. Additionally, they invite Jain scholars and spiritual leaders throughout the year for further enrichment. For young adults, the Jain Fellowship of Houston offers numerous activities. The center also maintains a library with a vast collection of books and multimedia resources related to Jainism.
