In [18]:
# imports
# !pip install langchain-chroma

In [15]:

import os
import glob
from dotenv import load_dotenv
import gradio as gr
from langchain_chroma import Chroma
from openai import OpenAI
import pandas as pd
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
import numpy as np
# import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [16]:
# Load environment variables in a file called .env
# price is a factor for our company, so we're going to use a low cost model

MODEL = "gpt-4o-mini"
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
openai = OpenAI()
db_name = "vector_db_new2"

In [17]:
loader = CSVLoader(file_path='vm_data.csv')
docs = loader.load_and_split()

In [18]:


# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk
# Chroma is a popular open source Vector Database based on SQLLite

embeddings = OpenAIEmbeddings()

# If you would rather use the free Vector Embeddings from HuggingFace sentence-transformers
# Then replace embeddings = OpenAIEmbeddings()
# with:
# from langchain.embeddings import HuggingFaceEmbeddings
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Delete if already exists

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create vectorstore

vectorstore = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

Vectorstore created with 100 documents


In [19]:
# create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

In [20]:
retriever = vectorstore.as_retriever()

In [21]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [22]:
system_prompt=(""" You are helpful assistant expert in AppBrewery's VMdata below is metadata for the VMs
    1. VM -> Name of the virtual machine.
    2. State -> Indicates whether the VM is ON or OFF.
    3. Status -> Describes the VM's condition (Normal or Special).
    4. Host -> The physical server hosting the VM.
    5. Cluster -> The group of hosts managing the VM.
    6. Provisioned_Space -> Total storage allocated to the VM.
    7. Used_Space -> Storage currently used by the VM.
    8. HostCPU -> CPU allocation on the host machine.
    9. HostMem -> Memory allocated to the VM.
    10. KN_SNC_DT -> Known since date of the VM record.
    11. Decom_date -> Planned decommissioning date of the VM.
    {context}""")
    
    
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
    ])


In [23]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [30]:
answer= rag_chain.invoke({"input": "What is KN_SNC_DT ?"})
answer['answer']

'KN_SNC_DT stands for "Known Since Date." It indicates the date on which the virtual machine record was first created or known in the system. This date is often used for tracking purposes and can provide insights into how long the VM has been in service or under management. The format is typically in YYYYMMDD.'

In [31]:
answer= rag_chain.invoke({"input": "What is EAPSIPVXT9753 ?"})
answer['answer']

'EAPSIPVXT9753 is the name of a cluster, which is a group of physical servers that manage and host virtual machines (VMs). Clusters are used to provide resource pooling, load balancing, and redundancy for VMs, ensuring efficient management and high availability of services. In the provided metadata, there are two VMs that belong to this cluster: test92.fixaa.net and dev75.fixaa.net.'

In [32]:
answer= rag_chain.invoke({"input": "How many VMs are there in AppBrewery ?"})
answer['answer']

'There are a total of 4 virtual machines (VMs) in AppBrewery.'

In [24]:
def chat(question, history):
    result = rag_chain.invoke({"input": question})
    return result["answer"]

In [25]:
view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.
