# RAG Testbed

### RAG preparation

In [1]:
%pip install llama-index-vector-stores-weaviate
%pip install llama-index-embeddings-ollama
%pip install llama-index-embeddings-langchain
%pip install llama-index-llms-ollama

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting llama-index-embeddings-langchain
  Downloading llama_index_embeddings_langchain-0.1.1-py3-none-any.whl (2.7 kB)
Installing collected packages: llama-index-embeddings-langchain
Successfully installed llama-index-embeddings-langchain-0.1.1
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import weaviate

from llama_index.core import StorageContext, ServiceContext, VectorStoreIndex, Document, get_response_synthesizer, Settings
from llama_index.vector_stores.weaviate import WeaviateVectorStore
from llama_index.core.embeddings import resolve_embed_model
from llama_index.core import DocumentSummaryIndex
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import PromptTemplate


import box
import yaml
import warnings

import logger as log

In [None]:
# logger = log.init_logger(__name__)
# logger.debug("start")


In [3]:
config_path='./.venv/config.yml'
with open(config_path, 'r', encoding='utf8') as ymlfile:
    cfg = box.Box(yaml.safe_load(ymlfile))

In [4]:
client = weaviate.Client(cfg.WEAVIATE_URL)

            Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.


In [5]:
Settings.embed_model = OllamaEmbedding(cfg.LLM)

In [6]:
llm = Ollama(
            model=cfg.LLM,
            base_url=cfg.OLLAMA_BASE_URL,
            temperature=cfg.TEMPERATURE
        )

In [7]:
index_name = "A1150"

In [8]:
vector_store = WeaviateVectorStore(
    weaviate_client=client,
    # index_name=cfg.INDEX_NAME
    index_name = index_name
)

In [9]:
storage_context = StorageContext.from_defaults(
    vector_store=vector_store
) 

In [10]:
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
    use_async=False,
    llm = llm
)    

In [None]:
from llama_index.core import VectorStoreIndex


In [11]:
index_main = VectorStoreIndex.from_vector_store(
    vector_store = vector_store,
)

In [16]:
retriever = VectorIndexRetriever(
    index=index_main,
    similarity_top_k=3
) 

In [17]:
# zulip_query_engine = index_main.as_query_engine()
zulip_query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.5)],
    
)

In [14]:
#### sample code to load a document into the current index (index_main)
# with open("data/_announcements/(Fibery) Brainstorm calls migration", 'r') as file:
#     text = " ".join(line.rstrip() for line in file)
# document = Document(text=text, doc_id="brainstorm", metadata={"stream": "_announcements", "doc_name": "brainstorm"})
# documents=[document]
# index_main.insert(document, show_progress=True)

  from .autonotebook import tqdm as notebook_tqdm
Generating embeddings: 100%|██████████| 9/9 [00:04<00:00,  2.05it/s]


In [18]:
response = zulip_query_engine.query("What are the imlications of scheduling a brainstorm about partners")
print(response)
for node in response.source_nodes:
    print(node.node_id, node.score, node.get_text())

Empty Response


### Creating a custom prompt

In [44]:
from llama_index.core import PromptTemplate
poem_template = (
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Provide a poem with the informaton given."
    "answer the question: {query_str}\n"
    "Answer: "
)
poem_prompt_template = PromptTemplate(poem_template)

summary_template = (
    "context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "There are different topics discussed in the information provided.\n"
    "For each topic create a markdown output with the following structure:\n"
    "## Topic:\n"
    "### Keypoints:\n"
    "### Decissions and actions:\n"
    "answer the question: {query_str}\n"
    "Answer: "
)
summary_prompt_template = PromptTemplate(summary_template)

In [45]:
response_synthesizer2 = get_response_synthesizer(
    response_mode="tree_summarize",
    use_async=False,
    service_context=service_context,
    summary_template=summary_prompt_template
)


In [46]:
zulip_query_engine2 = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer2,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.3)],
    
)

In [47]:
prompt_dict = zulip_query_engine2.get_prompts()
for k,p in prompt_dict.items():
    text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
    print(text_md)
    # OUTPUT: **Prompt Key**: response_synthesizer:summary_template<br>**Text:** <br>

**Prompt Key**: response_synthesizer:summary_template<br>**Text:** <br>


In [48]:
query_engine = RetrieverQueryEngine(retriever, response_synthesizer2)

In [49]:
doc_summary_index2 = DocumentSummaryIndex.from_documents(
    documents=documents,
    storage_context=storage_context,
    service_context=service_context,
    response_synthesizer=response_synthesizer2,
    show_progress=True,
   )

Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 106.45it/s]
Summarizing documents:   0%|          | 0/1 [00:00<?, ?it/s]

current doc id: brainstorm


Summarizing documents: 100%|██████████| 1/1 [00:17<00:00, 17.79s/it]
Generating embeddings: 100%|██████████| 1/1 [00:01<00:00,  1.75s/it]


In [50]:
response = doc_summary_index2.get_document_summary("brainstorm")
print(f'{response}')


## Topic: Organization of marketing and CS meetings

### Keypoints:

* There is a need to organize the marketing and CS meetings in a more efficient way.
* The current setup has some meetings duplicating each other's efforts.
* There are different aspects of marketing and CS that need to be addressed separately.

### Decisions and actions:

* Create a dedicated meeting for partnership projects/activities instead of discussing everything under a Marketing meeting.
* Discuss the differences between marketing and CS meetings and whether they ought to be combined from an organization perspective or not.
* Consider adding tags to tasks in Fibery, such as `aspect/marketing` and `product/balenaOS`, etc., to better filter and organize data.

## Topic: Practical organization in Fibery

### Keypoints:

* There is a need for a more practical organization structure in Fibery.
* The current setup may not be efficient and may require changes.
* Implementing tags to tasks can help with filtering and

In [None]:
raise SystemExit

### Summary: using DocumentSummaryIndex and persisting in filesystem

In [None]:
doc_summary_index = DocumentSummaryIndex.from_documents(
    documents=documents,
    storage_context=storage_context,
    service_context=service_context,
    response_synthesizer=response_synthesizer,
    show_progress=True
   )

In [None]:
# persist summary in storage
doc_summary_index.storage_context.persist(persist_dir="./storage")

In [51]:
# retrieve summary from storage
from llama_index.core import load_index_from_storage
doc_summary_index2 = load_index_from_storage(storage_context=storage_context, service_context=service_context)

ValueError: Expected to load a single index, but got 4 instead. Please specify index_id.

In [None]:
print(doc_summary_index2.get_document_summary("brainstorm"))

## Topic: Organizing Tasks with Tags

### Keypoints:

* Adding tags to tasks to indicate their relevance to specific aspects or products is a great idea.
* Examples of tags that could be used include `aspect/marketing` for tasks related to marketing efforts, and `product/balenaOS` for tasks related to the BalenaOS product.
* Using tags in this way can help us organize and filter our tasks more effectively.

### Decisions and actions:

* Implement the "process/partnerships" tag for Brainstorm calls related to partnerships.
* Consider adding additional tags for other areas where a filter might be useful (e.g. security, device support).


### Generating a summary of an existing stored document using it's title
I haven't been able to retreive the summary index. The index is not working. I've tried also persist in storage_context(storage_dir="./storage"), but this is not storing or loading any indexes.
Shame...

from llama_index.indices.document_summary import DocumentSummaryIndexLLMRetriever
from llama_index.indices.loading import load_index_from_storage, load_indices_from_storage
doc_summary_index = load_indices_from_storage(storage_context=storage_context, index_id=index_name)