In [1]:
from meeting_mate.mongo.mongo import PLAIN_INSTANCE as mongo
from meeting_mate.llm.models import EmbeddingModels, EmbeddingsModel, LangchainEmbeddingsModel
from langchain_mongodb import MongoDBAtlasVectorSearch


In [2]:
collection = mongo.db["facts"]
user_id = "106936893069932136953"

In [3]:
model = EmbeddingsModel(EmbeddingModels.NOMIC_EMBED_TEXT_1_5)
embeddings = LangchainEmbeddingsModel(model=model, user=user_id, purpose="data science")
vectorSearch = MongoDBAtlasVectorSearch(collection=collection, embedding=embeddings, index_name="vector_index", text_key="facts")

In [4]:
question = "What is technical background of the Vista Retail team?"

In [5]:
vectorSearch.similarity_search(query=question, pre_filter={"user_id": user_id})

[Document(page_content="* Vista Retail Solutions currently uses a Relational DB, primarily MySQL.\n* Vista Retail Solutions has high write loads and needs real-time analytics for user activity and product tracking.\n* Erin Smith is an IT Security Officer at Vista Retail Solutions, focusing on data security.\n* Erin Smith is responsible for data security at Vista Retail Solutions.\n* Vista Retail Solutions needs to comply with GDPR due to its European market presence.\n* Vista Retail Solutions needs to consider HIPAA compliance for handling employee health information in benefits management.\n* TODO: Erin Smith will review MongoDB's compliance certifications and prepare a security audit checklist.", metadata={'_id': {'$oid': '665eceee8802b366155d855a'}, 'doc_id': '1T7MonCnvmvLUN0PABmwsQsg0CiqcFJjWgsUiQCi9hO4', 'user_id': '106936893069932136953', 'organizations': ['MongoDB', 'MongoDB Atlas', 'MongoDB Inc.', 'Vista Retail Solutions']}),
 Document(page_content='* Vista Retail Solutions cur

In [6]:
import importlib
import meeting_mate.mongo.retrieval as retrieval
importlib.reload(retrieval)

retriever = retrieval.Retriever(colname="facts", embeddingModel=model, vector_index="vector_index", embedding_field="embedding", text_index="text_index", text_field="facts", user_field="user_id")
#retriever.vector_search(query=question, user=user_id, purpose="data science")

In [7]:
retriever.facet(user=user_id, field="organizations")

['MongoDB',
 'MongoDB Inc.',
 'Vista Retail Solutions',
 'MongoDB Atlas',
 'Hellevators',
 'Contonso',
 'Kont√§xt Software',
 'Tech Solutions']

In [8]:
results, pipe = retriever.keyword_search(query=question, user=user_id, orgs=["Vista Retail Solutions"])
results


[{'_id': ObjectId('665eceee8802b366155d8559'),
  'doc_id': '1T7MonCnvmvLUN0PABmwsQsg0CiqcFJjWgsUiQCi9hO4',
  'user_id': '106936893069932136953',
  'organizations': ['MongoDB',
   'MongoDB Atlas',
   'MongoDB Inc.',
   'Vista Retail Solutions'],
  'facts': '* Bob Lee is a Project Manager at Vista Retail Solutions, interested in modernizing their database.\n* Charlie Kim is a Software Engineer at Vista Retail Solutions, looking for scalable solutions.\n* Bob Lee and Charlie Kim work for Vista Retail Solutions.\n* Bob Lee is the Project Manager, and Charlie Kim is a Software Engineer.\n* TODO: Bob Lee will share existing schema and top queries.\n* Charlie Kim is a Software Engineer with a focus on performance details.\n* TODO: Charlie Kim will prototype a MongoDB shard with sample product data.\n* Bob Lee is a Project Manager at Vista Retail Solutions, focusing on compliance.\n* Bob Lee is responsible for compliance at Vista Retail Solutions.\n* TODO: Bob Lee will organize a compliance re

In [9]:
results, pipe = retriever.vector_search(query=question, user=user_id, purpose="data science", orgs=["Vista Retail Solutions"])
results

[{'_id': ObjectId('665eceee8802b366155d855a'),
  'doc_id': '1T7MonCnvmvLUN0PABmwsQsg0CiqcFJjWgsUiQCi9hO4',
  'user_id': '106936893069932136953',
  'organizations': ['MongoDB',
   'MongoDB Atlas',
   'MongoDB Inc.',
   'Vista Retail Solutions'],
  'facts': "* Vista Retail Solutions currently uses a Relational DB, primarily MySQL.\n* Vista Retail Solutions has high write loads and needs real-time analytics for user activity and product tracking.\n* Erin Smith is an IT Security Officer at Vista Retail Solutions, focusing on data security.\n* Erin Smith is responsible for data security at Vista Retail Solutions.\n* Vista Retail Solutions needs to comply with GDPR due to its European market presence.\n* Vista Retail Solutions needs to consider HIPAA compliance for handling employee health information in benefits management.\n* TODO: Erin Smith will review MongoDB's compliance certifications and prepare a security audit checklist.",
  'vectorSearchScore': 0.8426724672317505},
 {'_id': Object

In [28]:
def build_context(chunks, question):
    facts = [chunk["facts"] for chunk in chunks]
    context = "Facts:\n"
    context+= "\n".join(facts)
    context+= f"\nQuestion: {question}\n"
    return context

system_prompt = """You are a question answering assistant. You will be provided with a set of facts extracted from documents, followed by a question.
Answer the question based on the provided facts. Disregard any facts or statements irrelevant to the question. Feel free to answer in markdown!"""

In [22]:
build_context(results, question)

"Facts:\n* Vista Retail Solutions currently uses a Relational DB, primarily MySQL.\n* Vista Retail Solutions has high write loads and needs real-time analytics for user activity and product tracking.\n* Erin Smith is an IT Security Officer at Vista Retail Solutions, focusing on data security.\n* Erin Smith is responsible for data security at Vista Retail Solutions.\n* Vista Retail Solutions needs to comply with GDPR due to its European market presence.\n* Vista Retail Solutions needs to consider HIPAA compliance for handling employee health information in benefits management.\n* TODO: Erin Smith will review MongoDB's compliance certifications and prepare a security audit checklist.\n* Vista Retail Solutions currently uses a Relational DB, primarily MySQL, with heavy joins and slow queries under load.\n* Vista Retail Solutions has high write loads and needs real-time analytics for user activity and product tracking.\n* The data patterns discussed were mostly writes with occasional heavy

In [29]:
from meeting_mate.llm.models import ChatModel, ChatModels
from langchain_core.messages import HumanMessage, SystemMessage

sm = SystemMessage(system_prompt)
um = HumanMessage(build_context(results, question))

chatModel = ChatModel(ChatModels.LLAMA3_8B_INSTRUCT)
response = chatModel.invoke([sm, um], "For science", user_id)

In [24]:
from IPython.display import display, Markdown

display(Markdown(response))

Based on the provided facts, we know that Vista Retail Solutions is building an e-commerce shop that requires real-time analytics for user activity and product tracking, high write loads, and dynamic pricing adjustments. The app also needs to handle employee health information in benefits management, which requires consideration of HIPAA compliance. Additionally, the app needs to comply with GDPR due to its presence in the European market.

In [30]:
system_prompt

'You are a question answering assistant. You will be provided with a set of facts extracted from documents, followed by a question.\nAnswer the question based on the provided facts. Disregard any facts or statements irrelevant to the question. Feel free to answer in markdown!'

In [31]:
chatModel = ChatModel(ChatModels.LLAMA3_8B_INSTRUCT)
def get_answer(question:str, user_id:str, orgs:list[str]=None):
    results, pipe = retriever.vector_search(query=question, user=user_id, purpose="data science", orgs=orgs)
    context = build_context(results, question)
    sma = SystemMessage(system_prompt)
    um = HumanMessage(context)
    response = chatModel.invoke([sm, um], "For science", user_id)
    return response

In [32]:
question = "What do we know about the app they're building?"
display(Markdown(get_answer(question, user_id, ["Hellevators"])))

Here's what we know about the app they're building:
* The new platform aims to collect sensor information to analyze and predict failures.
* The new platform will enable pro-active maintenance based on wear & tear.
* The app will collect metrics and periodically push data to the cloud.
* The team is planning to deploy small edge devices with a local database running to collect metrics.
* The team is exploring a combination of time series databases and Postgres for miscellaneous data.
* The app will have dashboards, and Ted Bill, a full stack developer, is in charge of building them.
* The team is considering moving into the cloud and using something native like Aurora.
* The team is interested in exploring edge servers for sync to Atlas.
* The app will have a focus on IoT and event platform development, as Steve Stevenson, the Product Owner, has a background in this area.