In [1]:
import weaviate

from llama_index import StorageContext, SimpleDirectoryReader, ServiceContext, VectorStoreIndex, Document, get_response_synthesizer
from llama_index.vector_stores import WeaviateVectorStore
from llama_index.embeddings import LangchainEmbedding, OllamaEmbedding
from llama_index.indices.document_summary import DocumentSummaryIndex
from llama_index.llms import Ollama
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.postprocessor import SimilarityPostprocessor
from llama_index.retrievers import VectorIndexRetriever


import box
import yaml
import warnings

import logger as log

In [2]:
# logger = log.init_logger(__name__)
# logger.debug("start")


In [3]:
config_path='./.venv/config.yml'
with open(config_path, 'r', encoding='utf8') as ymlfile:
    cfg = box.Box(yaml.safe_load(ymlfile))

In [4]:
client = weaviate.Client(cfg.WEAVIATE_URL)

In [5]:
llm = Ollama(
            model=cfg.LLM,
            base_url=cfg.OLLAMA_BASE_URL,
            temperature=cfg.TEMPERATURE
        )

In [6]:
embeddings = OllamaEmbedding(model_name=cfg.LLM)

In [7]:
service_context = ServiceContext.from_defaults(
    embed_model=embeddings,
    llm=llm
)    

In [8]:
index_name = "A1148"

In [9]:
vector_store = WeaviateVectorStore(
    weaviate_client=client,
    # index_name=cfg.INDEX_NAME
    index_name = index_name
)

In [10]:
storage_context = StorageContext.from_defaults(
    vector_store=vector_store
) 

In [11]:
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
    use_async=False,
    service_context=service_context
)    

In [12]:
from llama_index import VectorStoreIndex

In [13]:
index_main = VectorStoreIndex.from_vector_store(
    vector_store = vector_store,
    service_context = service_context
)

In [14]:
retriever = VectorIndexRetriever(
    index=index_main,
    similarity_top_k=4,
) 

In [15]:
# zulip_query_engine = index_main.as_query_engine()
zulip_query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.3)]
)

In [16]:
response = zulip_query_engine.query("What are the imlications of scheduling a brainstorm about partners")
print(response)

The implications of scheduling a brainstorm about partners are:

1. Efficient organization: Scheduling a brainstorm specifically for partnership projects/activities can help ensure that these tasks are efficiently organized and prioritized within the Marketing meeting. This can help avoid confusion or overlap with other marketing activities.
2. Clear communication: By dedicating a separate meeting to partnership projects/activities, it ensures that there is clear communication and focus on these tasks without distractions from other marketing areas.
3. Better categorization: Using tags such as `aspect/marketing` and `product/balenaOS` can help categorize tasks more effectively within Fibery, making it easier to filter and organize them.
4. Improved navigation: Having a convention for naming streams and tags can help people navigate and find the information they need more efficiently.
5. Handling high-touch customers: By dedicating a separate meeting to partnership projects/activities, 

In [17]:
# for node in response.source_nodes:
#     print(node.node_id, node.score, node.get_text())

In [18]:
#### sample code to load a document into the current index (index_main)
with open("data/_announcements/(Fibery) Brainstorm calls migration", 'r') as file:
    text = " ".join(line.rstrip() for line in file)
document = Document(text=text, doc_id="brainstorm2", metadata={"stream": "_announcements", "doc_name": "brainstorm2"})
documents=[document]
index_main.insert(document, show_progress=True)

  from .autonotebook import tqdm as notebook_tqdm
Generating embeddings: 100%|██████████| 8/8 [00:04<00:00,  1.99it/s]


In [19]:
doc_summary_index = DocumentSummaryIndex.from_documents(
    documents=documents,
    storage_context=storage_context,
    service_context=service_context,
    response_synthesizer=response_synthesizer,
    show_progress=True
   )

Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 86.53it/s]
Summarizing documents:   0%|          | 0/1 [00:00<?, ?it/s]

current doc id: brainstorm2


Summarizing documents: 100%|██████████| 1/1 [00:14<00:00, 14.38s/it]
Generating embeddings: 100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


In [20]:
doc_summary_index.storage_context.persist(persist_dir="./storage")

In [21]:
from llama_index.indices.loading import load_index_from_storage
doc_summary_index2 = load_index_from_storage(storage_context=storage_context, service_context=service_context)

ValueError: 
******
Could not load OpenAI model. If you intended to use OpenAI, please check your OPENAI_API_KEY.
Original error:
No API key found for OpenAI.
Please set either the OPENAI_API_KEY environment variable or openai.api_key prior to initialization.
API keys can be found or created at https://platform.openai.com/account/api-keys

To disable the LLM entirely, set llm=None.
******

### Generating a summary of an existing stored document using it's title
I haven't been able to retreive the summary index. The index is not working. I've tried also persist in storage_context(storage_dir="./storage"), but this is not storing or loading any indexes.
Shame...

# doc_summary_index = DocumentSummaryIndex(service_context=service_context,
from llama_index.indices.document_summary import DocumentSummaryIndexLLMRetriever
from llama_index.indices.loading import load_index_from_storage, load_indices_from_storage
doc_summary_index = load_indices_from_storage(storage_context=storage_context, index_id=index_name)