In [1]:
!pip3 uninstall llama-index
!pip3 install llama-index --upgrade --no-cache-dir --force-reinstall

# ------------------------------------------------------------------------------------ #

!pip3 install -U weaviate-client
!pip3 install llama-index-vector-stores-weaviate

# ------------------------------------------------------------------------------------ #

!pip3 install python-dotenv torch sentence-transformers

[0mCollecting llama-index
  Downloading llama_index-0.10.29-py3-none-any.whl.metadata (11 kB)
Collecting llama-index-agent-openai<0.3.0,>=0.1.4 (from llama-index)
  Downloading llama_index_agent_openai-0.2.2-py3-none-any.whl.metadata (677 bytes)
Collecting llama-index-cli<0.2.0,>=0.1.2 (from llama-index)
  Downloading llama_index_cli-0.1.11-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core<0.11.0,>=0.10.29 (from llama-index)
  Downloading llama_index_core-0.10.29-py3-none-any.whl.metadata (3.6 kB)
Collecting llama-index-embeddings-openai<0.2.0,>=0.1.5 (from llama-index)
  Downloading llama_index_embeddings_openai-0.1.7-py3-none-any.whl.metadata (603 bytes)
Collecting llama-index-indices-managed-llama-cloud<0.2.0,>=0.1.2 (from llama-index)
  Downloading llama_index_indices_managed_llama_cloud-0.1.5-py3-none-any.whl.metadata (3.8 kB)
Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama-index)
  Downloading llama_index_legacy-0.9.48-py3-none-any.whl.metadata (8.5 kB)
Co

In [6]:
!pip install gradio

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting gradio
  Downloading gradio-4.26.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting altair<6.0,>=4.2.0 (from gradio)
  Downloading altair-5.3.0-py3-none-any.whl.metadata (9.2 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.1-py3-none-any.whl.metadata (24 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting gradio-client==0.15.1 (from gradio)
  Downloading gradio_client-0.15.1-py3-none-any.whl.metadata (7.1 kB)
Collecting importlib-resources<7.0,>=1.3 (from gradio)
  Downloading importlib_resources-6.4.0-py3-none-any.whl.metadata (3.9 kB)
Collecting matplotlib~=3.0 (from gradio)
  Downloading matplotlib-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.0-cp310-cp310-manylin

In [8]:
import llama_index
import weaviate
from importlib.metadata import version
import gradio as gr
print(f"LlamaIndex version: {version('llama_index')}")
print(f"Weaviate version: {version('weaviate-client')}")

"""### Set your OpenAI API key

This tutorial uses an embedding model and LLM from OpenAI, for which you will need an API key set as an evironment variable.
"""

import os
from dotenv import load_dotenv,find_dotenv

# Use this line of code if you have a local .env file
#load_dotenv(find_dotenv())
os.environ["OPENAI_API_KEY"] = "YOUR KEY"
#print(os.environ["OPENAI_API_KEY"])

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.settings import Settings

Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
Settings.embed_model = OpenAIEmbedding()

LlamaIndex version: 0.10.29
Weaviate version: 3.26.2


In [3]:
"""## Step 2: Load data


"""

from llama_index.core import SimpleDirectoryReader

# Load data
# documents = SimpleDirectoryReader(
#         input_files=["./data/paul_graham_essay.txt"]
# ).load_data()
documents = SimpleDirectoryReader('moondream_responses').load_data()
documents

"""## Step 3: Chunk documents into Nodes


"""

from llama_index.core.node_parser import SentenceWindowNodeParser

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

# Extract nodes from documents
nodes = node_parser.get_nodes_from_documents(documents)

i=15
print(f"Text: \n{nodes[i].text}")
print("------------------")
print(f"Window: \n{nodes[i].metadata['window']}")

Text: 
4. 
------------------
Window: 
Proper cable management: Ensure that all cables, including the ones connecting the keyboard, mouse, and monitor, are managed and organized to avoid tangling and potential hazards.

 3.  Ergonomics: Use an ergonomic chair and keyboard to maintain a comfortable posture while working for extended periods.

 4.  Regular breaks: Take regular breaks to avoid eye strain, fatigue, and maintain focus.

 5.  Monitor placement: Position the monitor at an appropriate height and distance to reduce eye strain and maintain a comfortable viewing angle.




In [4]:
"""## Step 4: Build the index



"""

import weaviate

# Connect to your Weaviate instance
client = weaviate.Client(
    embedded_options=weaviate.embedded.EmbeddedOptions(),
)

print(f"Client is ready: {client.is_ready()}")

# Print this line to get more information about the client
# client.get_meta()

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.weaviate import WeaviateVectorStore

index_name = "MyExternalContext"

# Construct vector store
vector_store = WeaviateVectorStore(
    weaviate_client = client,
    index_name = index_name
)

# Set up the storage for the embeddings
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# If an index with the same index name already exists within Weaviate, delete it
if client.schema.exists(index_name):
    client.schema.delete_class(index_name)

# Setup the index
# build VectorStoreIndex that takes care of chunking documents
# and encoding chunks to embeddings for future retrieval
index = VectorStoreIndex(
    nodes,
    storage_context = storage_context,
)

import json
response = client.schema.get(index_name)

print(json.dumps(response, indent=2))

from llama_index.core.postprocessor import MetadataReplacementPostProcessor

# The target key defaults to `window` to match the node_parser's default
postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window"
)

"""### Add a re-ranker

"""

from llama_index.core.postprocessor import SentenceTransformerRerank

# BAAI/bge-reranker-base
# link: https://huggingface.co/BAAI/bge-reranker-base
rerank = SentenceTransformerRerank(
    top_n = 2,
    model = "BAAI/bge-reranker-base"
)

Binary /root/.cache/weaviate-embedded did not exist. Downloading binary from https://github.com/weaviate/weaviate/releases/download/v1.23.0/weaviate-v1.23.0-Linux-amd64.tar.gz
Started /root/.cache/weaviate-embedded: process ID 760


{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2024-04-15T02:34:21Z"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2024-04-15T02:34:21Z"}
{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2024-04-15T02:34:21Z"}
{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50060","time":"2024-04-15T02:34:21Z"}
{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://127.0.0.1:8079","time":"2024-04-15T02:34:21Z"}
            Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.
{"level":"info","msg":"Created shard

Client is ready: True


{"level":"info","msg":"Created shard myexternalcontext_YDpfFX9dYlUi in 3.934011ms","time":"2024-04-15T02:34:33Z"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-04-15T02:34:33Z","took":74991}


{
  "class": "MyExternalContext",
  "description": "This property was generated by Weaviate's auto-schema feature on Mon Apr 15 02:34:33 2024",
  "invertedIndexConfig": {
    "bm25": {
      "b": 0.75,
      "k1": 1.2
    },
    "cleanupIntervalSeconds": 60,
    "stopwords": {
      "additions": null,
      "preset": "en",
      "removals": null
    }
  },
  "multiTenancyConfig": {
    "enabled": false
  },
  "properties": [
    {
      "dataType": [
        "text"
      ],
      "description": "This property was generated by Weaviate's auto-schema feature on Mon Apr 15 02:34:33 2024",
      "indexFilterable": true,
      "indexSearchable": true,
      "name": "original_text",
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "description": "This property was generated by Weaviate's auto-schema feature on Mon Apr 15 02:34:33 2024",
      "indexFilterable": true,
      "indexSearchable": true,
      "name": "last_modified_date",
      "tokenizat

config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

In [5]:
"""
 set `vector_store_query_mode` to `"hybrid"`+`alpha` parameter to control the weighting between semantic and keyword based search.
"""

# The QueryEngine class is equipped with the generator
# and facilitates the retrieval and generation steps
query_engine = index.as_query_engine(
    similarity_top_k = 6,
    vector_store_query_mode="hybrid",
    alpha=0.5,
    node_postprocessors = [postproc, rerank],
)

# Use your Default RAG
response = query_engine.query(
    "list all objects that are good for coding but bad for"
)
print(str(response))

window = response.source_nodes[0].node.metadata["window"]
sentence = response.source_nodes[0].node.metadata["original_text"]

print(f"Window: {window}")
print("------------------")
print(f"Original Sentence: {sentence}")

Nintendo Wii remote, laptop, keyboard
Window: By implementing these suggestions, the gaming and coding experience in the room can be enhanced, leading to increased productivity and enjoyment.
 [Frame 140] [2024-04-14 04:24:54] The image features a dark room with a blue wall, creating a cool and calming ambience.  The room is illuminated by blue lights, which contribute to the overall atmosphere.
 [Frame 150] [2024-04-14 04:24:58] In the image, there are two objects that can be good or bad for gaming and coding.  The first object is a Nintendo Wii remote, which is a gaming controller that allows for motion-based gameplay and interaction.  This can be beneficial for gaming and coding as it encourages physical activity and can help improve hand-eye coordination.

 The second object is a Wii remote control, which is a controller for the Wii gaming console. 
------------------
Original Sentence: [Frame 150] [2024-04-14 04:24:58] In the image, there are two objects that can be good or bad fo

In [10]:
def chatbot(input_text):
    response = query_engine.query(input_text)
    return str(response)

iface = gr.Interface(
    fn=chatbot,
    inputs=gr.components.Textbox(lines=7, label="Enter your text"),
    outputs="text",
    title="Chatbot",
)

iface.launch(share=True)

        on_event is deprecated, use lifespan event handlers instead.

        Read more about it in the
        [FastAPI docs for Lifespan Events](https://fastapi.tiangolo.com/advanced/events/).
        
  @app.on_event("startup")
        on_event is deprecated, use lifespan event handlers instead.

        Read more about it in the
        [FastAPI docs for Lifespan Events](https://fastapi.tiangolo.com/advanced/events/).
        
  return self.router.on_event(event_type)
  s = socket.socket()


Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://d6be112d57da719ae8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Replace `TemplateResponse(name, {"request": request})` by `TemplateResponse(request, name)`.
