# RAG skeleton 
In the following we'll have the skeleton of the RAG system. It is going to be a very basic implementation, that we are going to expand on later milestones.

In [6]:
import os
from pathlib import Path
from llama_index.llms.ollama import Ollama
from llama_index.core import VectorStoreIndex
from llama_index.core.embeddings import resolve_embed_model
from llama_index.readers.json import JSONReader
from llama_index.core.node_parser import JSONNodeParser
from llama_index.readers.file import FlatReader

### VectorDB

In [40]:
import psycopg2

db_name = "vector_db"
host = "localhost"
password = "password"
port = "5432"
user = "superuser"
# conn = psycopg2.connect(connection_string)
conn = psycopg2.connect(
    dbname="postgres",
    host=host,
    password=password,
    port=port,
    user=user,
)
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    c.execute(f"CREATE DATABASE {db_name}")

In [41]:
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore

vector_store = PGVectorStore.from_params(
    database=db_name,
    host=host,
    password=password,
    port=port,
    user=user,
    table_name="json_data",
    embed_dim=1024,  # openai embedding dimension
)

#### Loading and Indexing
Load the data in order to make the documents' embeddings

In [36]:
embed_model = resolve_embed_model("local:BAAI/bge-m3")

  from .autonotebook import tqdm as notebook_tqdm


In [100]:
# set a path to folder containing all the json files
DATA_PATH = "./data/"

# setting up reader, parser, and llm
reader = JSONReader()

# parser = JSONNodeParser()     # if we want to split the documents into nodes
llm = Ollama(model="mistral", request_timeout=180.0)

In [8]:
# creating the documents out of the json files
documents = []
for filename in os.listdir(DATA_PATH):
    if filename.endswith(".json"):
        file_path = os.path.join(DATA_PATH, filename)
        #documents.extend(FlatReader().load_data(Path(file_path)))     # if we want to load the data to then split it into nodes
        documents.extend(reader.load_data(input_file=file_path))

# nodes = parser.get_nodes_from_documents(documents)            # if we want to split documents into nodes


### Document splitting

if you want to use a simple node parser

In [29]:
from llama_index.core.node_parser import SimpleNodeParser

parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(documents)

In [31]:
len(nodes)

21

if you want to have control on the entire pipeline (can also choose the chunk size)

In [32]:
from llama_index.core.node_parser import SentenceSplitter

text_parser = SentenceSplitter(
    chunk_size=512,
    # separator=" ",
)

text_chunks = []
# maintain relationship with source doc index, to help inject doc metadata in (3)
doc_idxs = []
for doc_idx, doc in enumerate(documents):
    cur_text_chunks = text_parser.split_text(doc.text)
    text_chunks.extend(cur_text_chunks)
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))

In [33]:
from llama_index.core.schema import TextNode

nodes = []
for idx, text_chunk in enumerate(text_chunks):
    node = TextNode(
        text=text_chunk,
    )
    src_doc = documents[doc_idxs[idx]]
    node.metadata = src_doc.metadata
    nodes.append(node)

In [34]:
len(nodes)

31

In [37]:
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

### Storing
Load into the vectorDB

In [42]:
vector_store.add(nodes)

['6d3f8ad6-855a-4994-8a8e-03faf0157b9e',
 'ce5ec7fc-ee5c-4ddf-b411-ec1ea7c20474',
 '22bd8ef6-46c6-4267-8444-db2eb02ca47d',
 '6d14b33f-2fde-449c-9b99-e64642ced000',
 'b2f9110b-ab12-4aaf-b680-09e247086e33',
 '9b2b46ae-60bc-45a7-a7d7-785a865938b4',
 'a413db72-d177-4707-82a2-0b63a545962d',
 '2de6ef55-b4c6-43ce-a910-39040cc769b3',
 '04ce9204-aada-4e36-ac38-94185b414349',
 'df81d1c5-6657-449e-a11c-94fce69546d9',
 'da78a9c8-9e39-4050-991e-38a30e451abb',
 '1ac1b262-d91a-4440-82aa-a2425945241c',
 '39b23c95-ff12-44c4-992b-577e7056ba78',
 '678da755-3560-4040-af62-a24bb71e8a28',
 '29bfb6e3-246a-4ea3-a57d-461976d30d9c',
 'ccf47206-b2c1-44f0-83ee-5af3d84a22c9',
 'c9f1f034-bc60-4221-843f-6255e8634691',
 'bc2abe0a-f015-44ae-83af-bd101c2b8ad4',
 '8ca641d7-a132-4ffd-a501-c14d7bece750',
 'e74e8c8d-20f5-4518-a0a1-071edf454f97',
 '7d388cdd-c6e5-4590-b261-ee42aefaa74e',
 '373bc565-e329-4638-87df-b79678197694',
 '3f163df4-8bb3-4313-ac4d-6235e0300c98',
 '956a96f8-fe57-4e9b-b10c-968d67325dc0',
 '56cdfae8-760f-

### Querying (milestone 2)

In [91]:
query_str = "General Summarized Overview Large Capacity Cutting Machine 2?"

query_embedding = embed_model.get_query_embedding(query_str)

In [92]:
# construct vector store query
from llama_index.core.vector_stores import VectorStoreQuery

query_mode = "default"
# query_mode = "sparse"
# query_mode = "hybrid"

vector_store_query = VectorStoreQuery(
    query_embedding=query_embedding, similarity_top_k=2, mode=query_mode
)

In [93]:
# returns a VectorStoreQueryResult
query_result = vector_store.query(vector_store_query)
print(query_result.nodes[0].get_content())

"machine_id": "Large Capacity Cutting Machine 2",
"month": "2024-03",
"KPIs": {
"average_cycle_time": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"bad_cycles": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"consumption": {
"average": 0.0009502908983908073,
"min": 0.0,
"max": 0.15497427194254568
"consumption_idle": {
"average": 0.0006513621264504764,
"min": 0.0,
"max": 0.03249318468435469
"consumption_working": {
"average": 0.00152712326133702,
"min": 0.0,
"max": 0.03329324345041983
"cost": {
"average": 0.000789856282015753,
"min": 0.0,
"max": 0.0
"cost_idle": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"cost_working": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"cycles": {
"average": 0.7419354838709677,
"min": 0.0,
"max": 18860.0
"good_cycles": {
"average": 651.0967741935484,
"min": 0.0,
"max": 18860.0
"idle_time": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"offline_time": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"power": {
"average": 0.0025818193261147593,
"min": 0.0,
"max": 0.07239881195819103
"work

In [94]:
from llama_index.core.schema import NodeWithScore
from typing import Optional

nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
    score: Optional[float] = None
    if query_result.similarities is not None:
        score = query_result.similarities[index]
    nodes_with_scores.append(NodeWithScore(node=node, score=score))

In [95]:
from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetriever
from typing import Any, List


class VectorDBRetriever(BaseRetriever):
    """Retriever over a postgres vector store."""

    def __init__(
        self,
        vector_store: PGVectorStore,
        embed_model: Any,
        query_mode: str = "default",
        similarity_top_k: int = 2,
    ) -> None:
        """Init params."""
        self._vector_store = vector_store
        self._embed_model = embed_model
        self._query_mode = query_mode
        self._similarity_top_k = similarity_top_k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve."""
        query_embedding = embed_model.get_query_embedding(
            query_bundle.query_str
        )
        vector_store_query = VectorStoreQuery(
            query_embedding=query_embedding,
            similarity_top_k=self._similarity_top_k,
            mode=self._query_mode,
        )
        query_result = vector_store.query(vector_store_query)

        nodes_with_scores = []
        for index, node in enumerate(query_result.nodes):
            score: Optional[float] = None
            if query_result.similarities is not None:
                score = query_result.similarities[index]
            nodes_with_scores.append(NodeWithScore(node=node, score=score))

        return nodes_with_scores

In [96]:
retriever = VectorDBRetriever(
    vector_store, embed_model, query_mode="default", similarity_top_k=2
)

In [97]:
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)

In [101]:
response = query_engine.query(query_str)

In [102]:
response

Response(response=' In March 2024, the Large Capacity Cutting Machine 2 has a total of 651 good cycles recorded. The average consumption during these cycles is approximately 0.00153 kWh, with a minimum and maximum consumption of 0.0 and 0.033 respectively. The cost associated with these cycles is not explicitly mentioned in the context provided. However, the consumption data can be used to calculate the cost based on the cost per unit of electricity, if that information is available. The machine has no recorded idle time or offline time for this month. There are no bad cycles or any KPIs related to these in the given context.', source_nodes=[NodeWithScore(node=TextNode(id_='6d14b33f-2fde-449c-9b99-e64642ced000', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='"machine_id": "Large Capacity Cutting Machine 2",\n"month": "2024-03",\n"KPIs": {\n"average_cycle_time": {\n"average": 0.0,\n"min": 0.0,\n"max": 0.0\n"bad_cycles

### Querying strategy

In [9]:
# if we work with nodes
#vector_index = VectorStoreIndex.from_documents(nodes, embed_model=embed_model)

In [12]:
# if we work with documents
vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model, show_progress=True)

Parsing nodes: 100%|████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 168.36it/s]
Generating embeddings: 100%|█████████████████████████████████████████████████████████████| 7/7 [00:21<00:00,  3.01s/it]


we use top-k similarity strategy to get the k most similar documents

In [13]:
query_engine = vector_index.as_query_engine(llm=llm, verbose=True, similarity_top_k=2)
retriever = vector_index.as_retriever(verbose=True)


### Evaluation
We test the RAG system with some queries regarding the data in the json files

In [14]:
result = query_engine.query("What was the average  of Assembly Machines?")
print(result)

 The average consumption for Testing Machine 1 is approximately 1.7630247e-06, while the average consumption for Riveting Machine is around 6.5934167e-05. It's important to note that these values are not directly comparable due to their different scales and units.


In [23]:
result = query_engine.query("What was the average consumption of machines?")
print(result)

 The average consumption for "Testing Machine 1" is 1.7630247097690623e-06 and for "Medium Capacity Cutting Machine 2", it is 0.0023731402496540857.


In [14]:
result = query_engine.query("List the conspumption for each machine in March 2024?")
print(result)

 The consumption for each machine in March 2024 is as follows:

1. Assembly Machine 1: 0.0
2. Assembly Machine 2: 0.0
3. Assembly Machine 3: 0.0
4. Large Capacity Cutting Machine 1: 0.0021111835419563543
5. Large Capacity Cutting Machine 2: 0.00152712326133702


In [17]:
result = query_engine.query("General Summarized Overview Large Capacity Cutting Machine 2?")
print(result)

 In March 2024, the Large Capacity Cutting Machine 2 has an average number of cycles per month of approximately 742, with a range from 0 to 18,860. The average cycle time is 0 seconds, with a minimum and maximum not provided. The machine had no bad cycles during this period.

The consumption for this machine averages at 0.0009503 kWh, with a range from 0 to 0.15497427 kWh. This includes an average idle consumption of 0.0006514 kWh and working consumption of 0.0015271 kWh.

The cost for this machine is approximately 0.0007899 USD, with no recorded costs related to idle or working time.

The number of good cycles in March was about 651, with a range from 0 to 18,860. The machine did not have any offline time or idle time during this period, and the working time is not provided. The power consumption averages at 0.0025818 kW, with a range from 0 to 0.0723988 kW.

The Medium Capacity Cutting Machine 2 has similar KPIs for the same month but different values due to differences in the machin

In [20]:
result = query_engine.query("Which machine has higher idle time")
print(result)

 The machine with higher idle time is "Assembly Machine 1". This can be inferred from the context data where the average idle time for "Assembly Machine 1" is 4175.258064516129 hours, while for "Large Capacity Cutting Machine 1", it is 0.0 hours.


In [13]:
retriever.retrieve("General Summarized Overview Assembly Machine 1?")


[NodeWithScore(node=TextNode(id_='7a3f1ecb-1cb2-4aa0-9cf9-e1ace4e882a2', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='6ee8dc74-411b-4b67-bba4-83d3ec586f9a', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='34496ed108b9f8ca29ad1e0bf7cb60af6c22315f29d8d73ac50bdc3339d33446')}, text='"data_structure_overview": {\n"machines": [\n"category": "Metal Cutting Machines",\n"machines": [\n"Large Capacity Cutting Machine 1",\n"Large Capacity Cutting Machine 2",\n"Medium Capacity Cutting Machine 1",\n"Medium Capacity Cutting Machine 2",\n"Medium Capacity Cutting Machine 3",\n"Low Capacity Cutting Machine 1"\n"category": "Laser Welding Machines",\n"machines": [\n"Laser Welding Machine 1",\n"Laser Welding Machine 2"\n"category": "Assembly Machines",\n"machines": [\n"Assembly Machine 1",\n"Assembly Machine 2",\n"Assembly Machine 3"\n"category": "Testing Machines",\n"machines":

In [18]:
result = query_engine.query("Which one was more effective and productive: Medium Capacity machine 1 vs Medium Capacity machine 2?")
print(result)

 Based on the provided KPIs for both machines, we can calculate certain metrics to compare their effectiveness and productivity. However, please note that this comparison is based on the numerical data given in the context, and other factors such as the nature of work, maintenance requirements, etc., might influence their overall performance.

To make a comparison, we can focus on the following key points:

1. Number of good cycles: Medium Capacity Cutting Machine 1 had an average of 1935.8064516129032 good cycles per month, while Medium Capacity Cutting Machine 2 had an average of 1822.032258064516 good cycles per month. Machine 1 seems to have a slightly higher number of good cycles per month.

2. Consumption: The average consumption for Machine 1 is 0.0025908645679010044, while the average consumption for Machine 2 is 0.0023731402496540857. Machine 1 consumes slightly more resources on average compared to Machine 2.

3. Power: The average power consumed by Machine 1 is 0.00600611930