# RAG skeleton 
In the following we'll have the skeleton of the RAG system. It is going to be a very basic implementation, that we are going to expand on later milestones.

In [9]:
import os
import json
from pathlib import Path
from llama_index.llms.ollama import Ollama
from llama_index.core import VectorStoreIndex
from llama_index.core.embeddings import resolve_embed_model
from llama_index.readers.json import JSONReader
from llama_index.core.node_parser import JSONNodeParser
from llama_index.readers.file import FlatReader

### VectorDB ONLINE

In [2]:
"""import psycopg2

with open('./db.txt', "r") as file:
    connection_string = file.read()

conn = psycopg2.connect(connection_string)

conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    c.execute(f"CREATE DATABASE {db_name}")"""

'import psycopg2\n\nwith open(\'./db.txt\', "r") as file:\n    connection_string = file.read()\n\nconn = psycopg2.connect(connection_string)\n\nconn.autocommit = True\n\nwith conn.cursor() as c:\n    c.execute(f"DROP DATABASE IF EXISTS {db_name}")\n    c.execute(f"CREATE DATABASE {db_name}")'

In [3]:
"""from sqlalchemy import make_url
from llama_index.core import SimpleDirectoryReader, StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore
import textwrap


url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="rag",
    embed_dim=1024,  # openai embedding dimension
)
"""

'from sqlalchemy import make_url\nfrom llama_index.core import SimpleDirectoryReader, StorageContext\nfrom llama_index.core import VectorStoreIndex\nfrom llama_index.vector_stores.postgres import PGVectorStore\nimport textwrap\n\n\nurl = make_url(connection_string)\nvector_store = PGVectorStore.from_params(\n    database=db_name,\n    host=url.host,\n    password=url.password,\n    port=url.port,\n    user=url.username,\n    table_name="rag",\n    embed_dim=1024,  # openai embedding dimension\n)\n'

## VectorDB OFFLINE

In [4]:
import psycopg2

db_name = "vector_db"
host = "localhost"
password = "password"
port = "5432"
user = "irene" #<-------------------------------------------- change this to your username !!!
# conn = psycopg2.connect(connection_string)
conn = psycopg2.connect(
    dbname="postgres",
    host=host,
    password=password,
    port=port,
    user=user,
)
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    c.execute(f"CREATE DATABASE {db_name}")

In [1]:
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore

db_name = "vector_db"
host = "localhost"
password = "password"
port = "5432"
user = "superuser" #<-------------------------------------------- change this to your username !!!

vector_store = PGVectorStore.from_params(
    database=db_name,
    host=host,
    password=password,
    port=port,
    user=user,
    table_name="json_data",
    embed_dim=1024,  # openai embedding dimension
)

#### Loading and Indexing
Load the data in order to make the documents' embeddings

In [10]:
embed_model = resolve_embed_model("local:BAAI/bge-m3")

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# set a path to folder containing all the json files
DATA_PATH = "./data/"

# setting up reader, parser, and llm
reader = JSONReader()

# parser = JSONNodeParser()     # if we want to split the documents into nodes
llm = Ollama(model="llama3.2", request_timeout=180.0) 

In [8]:

# creating the documents out of the json files
documents = []
for filename in os.listdir(DATA_PATH):
    if filename.endswith(".json"):
        file_path = os.path.join(DATA_PATH, filename)
        documents.extend(FlatReader().load_data(Path(file_path)))     # if we want to load the data to then split it into nodes
        # documents.extend(reader.load_data(input_file=file_path))
parser = JSONNodeParser(include_metadata=True,
                        include_prev_next_rel=True)

# nodes = parser.get_nodes_from_documents(documents)            # if we want to split documents into nodes


In [9]:
len(documents)

6

### Document splitting

if you want to use a simple node parser

if you want to have control on the entire pipeline (can also choose the chunk size)

In [10]:
import nest_asyncio
import nltk
nltk.download('punkt_tab')
nest_asyncio.apply()

[nltk_data] Downloading package punkt_tab to /Users/irene/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [11]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import (
    SummaryExtractor,
    QuestionsAnsweredExtractor,
    TitleExtractor,
    KeywordExtractor,
)
from llama_index.extractors.entity import EntityExtractor

from llama_index.core.node_parser import TokenTextSplitter

from llama_index.core.ingestion import IngestionPipeline


text_splitter = TokenTextSplitter(
    # separator=" ", 
    chunk_size=512, 
    chunk_overlap=128
)

#if you wanna create some custom extractor

# class CustomExtractor(BaseExtractor):
#     def extract(self, nodes):
#         metadata_list = [
#             {
#                 "custom": (
#                     node.metadata["document_title"]
#                     + "\n"
#                     + node.metadata["excerpt_keywords"]
#                 )
#             }
#             for node in nodes
#         ]
#         return metadata_list

transformations = [
    text_splitter,
    # TitleExtractor(nodes=3,llm=llm),
    # QuestionsAnsweredExtractor(questions=2,llm=llm),
    # SummaryExtractor(summaries=["prev", "self"],llm=llm),
    # KeywordExtractor(keywords=4,llm=llm),
    EntityExtractor(prediction_threshold=0.5,llm=llm),
]


pipeline = IngestionPipeline(
    transformations=transformations
)

nodes = pipeline.run(
    documents=documents,
    in_place=True,
    show_progress=True,
)


The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Parsing nodes:   0%|          | 0/6 [00:00<?, ?it/s]

Extracting entities:   0%|          | 0/108 [00:00<?, ?it/s]

In [12]:
len(nodes)

108

In [13]:
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

### Storing
Load into the vectorDB

In [14]:
vector_store.add(nodes)

['77b85c82-c5c5-4fb7-87f2-919bd8888340',
 '879ca5d3-517b-4a57-9c36-768c72588184',
 '1f9abdd0-100e-442f-95bc-26d4bab016a3',
 'a595b82e-871c-44d9-bf4c-df4391845e40',
 'ec171f89-665c-4816-a79b-747dfdd8f0f6',
 'f78785f9-72f3-491b-97ca-d1cb00370d3f',
 '293878cd-9ce5-40ad-a98f-d89d7aff907d',
 '9c660470-d17a-420c-a4cd-8fbfa6b5162e',
 '0258b000-2433-42e4-9d91-ca93762ef233',
 '12879108-96df-4777-8d19-0e64ce126c7f',
 'c82c3913-86c2-441e-87c6-adc36ac3d9fc',
 '9e62a165-6a47-4503-8b89-efa6efbd65a0',
 '2279b86e-94d7-4e61-8c93-a228cf863c35',
 '452f6cf7-1d44-4118-a990-00a80350a4cc',
 'f09ff58e-ced2-4efb-914c-5b7c3a56ed0d',
 '8b494d2b-cf9d-484e-bc2a-656fc5d9b74f',
 '69957942-2c4e-497c-af22-d61906ef05b2',
 'f62a5327-7b50-4d8c-8334-e5a724822051',
 'dcbfa98e-f742-412f-8635-5f1781f4eee7',
 'cff0a871-61c1-4e4b-915a-755a340d84c4',
 '234b5d11-0df1-40fb-8318-eb32be7a9b4f',
 '58cc9b9e-762d-4695-8582-6597bdb833dc',
 '174c77fe-224f-4966-8460-275a554e1053',
 '789a62c9-4a7a-4495-ac37-3586fe886ea1',
 '0fa0c139-fc98-

### Querying (milestone 2)

In [15]:
query_str = "General Summarized Overview Large Capacity Cutting Machine 2?"

query_embedding = embed_model.get_query_embedding(query_str)

In [16]:
# construct vector store query
from llama_index.core.vector_stores import VectorStoreQuery

query_mode = "default"
# query_mode = "sparse"
# query_mode = "hybrid"

vector_store_query = VectorStoreQuery(
    query_embedding=query_embedding, similarity_top_k=2, mode=query_mode
)

In [17]:
# returns a VectorStoreQueryResult
query_result = vector_store.query(vector_store_query)
print(query_result.nodes[0].get_content())

}
            ],
            "max": [
                {
                    "machine": "Large Capacity Cutting Machine 2",
                    "month": "2024-03",
                    "value": 0.03329324345041983
                }
            ]
        },
        "cost": {
            "average": [
                {
                    "machine": "Large Capacity Cutting Machine 2",
                    "month": "2024-03",
                    "value": 0.000789856282015753
                }
            ],
            "min": [
                {
                    "machine": "Large Capacity Cutting Machine 2",
                    "month": "2024-03",
                    "value": 0.0
                }


In [None]:
from llama_index.core.schema import NodeWithScore
from typing import Optional

nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
    score: Optional[float] = None
    if query_result.similarities is not None:
        score = query_result.similarities[index]
    nodes_with_scores.append(NodeWithScore(node=node, score=score))

### Simpler retriever (using hybrid search) --> not important

In [None]:
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=host,
    password=password,
    port=port,
    user=user,
    table_name="json_data",
    embed_dim=1024,  # openai embedding dimension
    hybrid_search=True
)

In [None]:
from llama_index.core.vector_stores.types import VectorStoreQueryMode
from llama_index.core import VectorStoreIndex

vector_store_index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
retriever = vector_store_index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.HYBRID
)

In [None]:
nodes = retriever.retrieve("machine")

### Custom retriever

Connect first to postgresql (also start postgresql service) 

--> using brew: brew services start postgresql@14

In [None]:
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=host,
    password=password,
    port=port,
    user=user,
    table_name="json_data",
    embed_dim=1024,  # openai embedding dimension
)

In [4]:
from llama_index.core.schema import NodeWithScore
from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetriever
from typing import Any, List


class VectorDBRetriever(BaseRetriever):
    """Retriever over a postgres vector store."""

    def __init__(
        self,
        vector_store: PGVectorStore,
        embed_model: Any,
        query_mode: str = "default",
        similarity_top_k: int = 2,
    ) -> None:
        """Init params."""
        self._vector_store = vector_store
        self._embed_model = embed_model
        self._query_mode = query_mode
        self._similarity_top_k = similarity_top_k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve."""
        query_embedding = embed_model.get_query_embedding(
            query_bundle.query_str
        )
        vector_store_query = VectorStoreQuery(
            query_embedding=query_embedding,
            similarity_top_k=self._similarity_top_k,
            mode=self._query_mode,
        )
        query_result = vector_store.query(vector_store_query)

        nodes_with_scores = []
        for index, node in enumerate(query_result.nodes):
            score: Optional[float] = None
            if query_result.similarities is not None:
                score = query_result.similarities[index]
            nodes_with_scores.append(NodeWithScore(node=node, score=score))

        return nodes_with_scores

In [20]:
retriever = VectorDBRetriever(
    vector_store, embed_model, query_mode="default", similarity_top_k=14
)

all the nodes are accessible through the retriever object, or through the query_engine defined below

In [21]:
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)

In [22]:
response = query_engine.query(query_str)

In [23]:
response

Response(response='Here is a summarized overview of the data related to the Large Capacity Cutting Machine 2:\n\n**Performance Metrics**\n\n* Average values:\n\t+ Maximum production capacity: 0.03329324345041983\n\t+ Average consumption working: 0.0025818193261147593\n\t+ Average cycles: 0.7419354838709677\n\t+ Average good cycles: 651.0967741935484\n* Minimum values:\n\t+ Maximum production capacity: 0.0\n\t+ Average consumption idle: 0.0\n\t+ Average cycles: 0.0\n\t+ Average good cycles: 0.03249318468435469\n\n**Energy Consumption**\n\n* Maximum energy consumption during working hours: 18860.0\n* Minimum energy consumption during idle hours: 0.0\n* Maximum energy consumption during idle hours: 0.0\n\n**Downtime and Offline Time**\n\n* Average offline time: 0.15497427194254568 seconds\n* Minimum offline time: 0.0\n* Maximum offline time: 0.15497427194254568 seconds\n\nOverall, the Large Capacity Cutting Machine 2 has excellent performance metrics, with high average production capacity

### Querying strategy

In [24]:
# if we work with nodes
#vector_index = VectorStoreIndex.from_documents(nodes, embed_model=embed_model)

In [25]:
# if we work with documents
#vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model, show_progress=True)

we use top-k similarity strategy to get the k most similar documents

In [26]:
#query_engine = vector_index.as_query_engine(llm=llm, verbose=True, similarity_top_k=2)
#retriever = vector_index.as_retriever(verbose=True)

### Evaluation
We test the RAG system with some queries regarding the data in the json files

In [27]:
result = query_engine.query("What was the average  of Assembly Machines?")
print(result)

There is no information provided in the given documents about an entity called "Assembly Machine". All entities mentioned are related to specific types of machines such as Cutting Machines (Medium Capacity, Large Capacity), Laser Cutter, Riveting Machine. Therefore, it can be concluded that there is no available data for Assembly Machines based on the provided context.


In [28]:
result = query_engine.query("What was the average consumption of machines?")
print(result)

There is no specific data provided for the average consumption of machines across all entities in the given excerpt. However, we can look at the machine-specific data to identify any patterns or notable values.

From the data, we see that some machines have non-zero average consumption values:
- Medium Capacity Cutting Machine 1 has an average consumption of 0.0025908645679010044.
- Riveting Machine has an average consumption of 6.593416685402065e-05.
- Testing Machine 1 has an average consumption of 1.7630247097690623e-06.

It is also worth noting that the units of these values are not explicitly stated in the provided excerpt, and it would be necessary to consult additional data or context for a more accurate interpretation of these values as "average consumption".


In [29]:
result = query_engine.query("List the conspumption for each machine in March 2024?")
print(result)

Here is the consumption data for each machine in March 2024:

* Riveting Machine:
	+ Average: 6.593416685402065e-05
	+ Min: 0.0
	+ Max: 11780.0
* Testing Machine 1:
	+ Average: 9.673769021249745e-07
	+ Min: 0.0
	+ Max: 3.493676502145316e-05


In [30]:
result = query_engine.query("General Summarized Overview Large Capacity Cutting Machine 2?")
print(result)

Based on the provided data, here is a summarized overview of the Large Capacity Cutting Machine 2:

**Performance Metrics:**

* Maximum production capacity: 0.03249318468435469 units per month
* Average consumption working: 0.00152712326133702 units per month
* Maximum consumption idle: 0.07239881195819103 units per month
* Minimum consumption idle: 0.0 units per month

**Energy Consumption:**

* Average energy consumption while working: 0.0 units per month
* Maximum energy consumption while working: 0.0 units per month
* Average energy consumption during idle time: 0.0006513621264504764 units per month
* Minimum energy consumption during idle time: 0.0 units per month

**Machine Utilization:**

* Average cycles performed: 0.7419354838709677 cycles per month
* Maximum cycles performed: 0.0 cycles per month
* Minimum cycles performed: 0.0 cycles per month
* Total working hours: 18860 hours

**Downtime and Idle Time:**

* Average offline time: 0.15497427194254568 minutes per month
* Maxi

In [31]:
result = query_engine.query("Which machine has higher idle time")
print(result)

Riveting Machine.


In [32]:
retriever.retrieve("General Summarized Overview Assembly Machine 1?")


[NodeWithScore(node=TextNode(id_='c82c3913-86c2-441e-87c6-adc36ac3d9fc', embedding=None, metadata={'filename': 'restructured_monthly_smart_app_data_Medium%20Capacity%20Cutting%20Machine%201.json', 'extension': '.json', 'entities': ['Medium Capacity Cutting Machine']}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='337407bf-d246-49ba-bb05-aa7dfbf87cf5', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'filename': 'restructured_monthly_smart_app_data_Medium%20Capacity%20Cutting%20Machine%201.json', 'extension': '.json'}, hash='11abb855bbc38a9460b39352c0c09ab6dd96746af9938e7f62bd58f1d1e37da4'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='12879108-96df-4777-8d19-0e64ce126c7f', node_type=<ObjectType.TEXT: '1'>, metadata={'filename': 'restructured_monthly_smart_app_data_Medium%20Capacity%20Cutting%20Machine%201.json', 'extension': '.json'}, hash='7090540ddf5bb57e94f1af7405b47561e4374aed48c8

In [33]:
result = query_engine.query("Which one was more effective and productive: Medium Capacity machine 1 vs Medium Capacity machine 2?")
print(result)

Based on the provided data, it appears that both machines have similar performance metrics for most parameters. However, some key differences can be observed:

*   For "consumption_working", Medium Capacity Machine 1 has a higher average value (0.005278610700021035) compared to Machine 2 (0.003713141006560274).
*   For "bad_cycles", both machines have similar performance metrics with an average value of 0.
*   For other parameters like "cost_working" and "power", the values for Medium Capacity Machine 1 are also higher than those of Medium Capacity Machine 2 in some cases, but not consistently.

Given these observations, it's difficult to declare a clear winner without more comprehensive data. However, if we consider the "consumption_working" parameter as an indicator of productivity and effectiveness, Medium Capacity Machine 1 seems to be slightly more productive compared to Medium Capacity Machine 2.


### JSON Outputs
The idea is to define 2 prompts, one for each type of query: `report generation` and `KPI suggestion`.

#### Prompt Definition

In [47]:
# 1. Prompt for Generating New KPIs:
kpi_json_prompt = """
You are a specialized assistant that only outputs answers in JSON format. 

Analyze the documents and provide new KPI suggestions. Use the JSON format below:
{
  "KPIs": [
    {
      "name": "<KPI Name>",
      "description": "<Brief description of the new KPI>",
      "formula": "<Mathematical formula to calculate the new KPI using already available KPIs>"
    },
    ...
  ]
}

Consider that the available KPIs are:
- average_cycle_time
- bad_cycles
- consumption
- consumption_idle
- consumption_working
- cost
- cost_idle
- cost_working
- cycles
- good_cycles
- idle_time
- offline_time
- power
- working_time
"""

In [35]:
# 2. Prompt for Generating Machine Reports:
report_json_prompt = """
You are a specialized assistant that only outputs answers in JSON format.

Based on the monthly aggregated KPI data provided, generate a detailed report for the specified machine and month. Use the JSON format below:
{
    "MachineBehaviorReport": {
        "machine": "<Machine Name>",
        "month": "<Month>",
        "kpi_analysis": [
            {
                "kpi": "<KPI Name>",
                "analysis": "<Analysis of the KPI based on min, max, and average values>"
            },
            ...
        ],
        "overall_summary": "<High-level summary of machine performance>"
    }
}
"""

In [36]:
report_prompt = """
Based on the monthly aggregated KPI data provided, generate a detailed report for the specified machine and month. 
Use a format that is easily convertible to a pdf or word document.
"""

In [37]:
# More detailed version of the prompt for generating machine reports
report_json_prompt2 = """
You are a specialized assistant that only outputs answers in JSON format.

Based on the monthly aggregated KPI data provided, generate a detailed report for the specified machine and month. The report must include the following:

1. The name of the machine and the month being analyzed.
2. An analysis of each KPI (listed below), comparing its minimum, maximum, and average values. Identify:
   - Notable patterns or trends (e.g., consistently high or low values).
   - Significant deviations (e.g., high max values with low averages).
   - Missing or zero values, and their implications.
3. An overall summary of the machine's performance for the month, including conclusions about efficiency, potential issues, and general observations.

KPIs to analyze:
- average_cycle_time
- bad_cycles
- consumption
- consumption_idle
- consumption_working
- cost
- cost_idle
- cost_working
- cycles
- good_cycles
- idle_time
- offline_time
- power
- working_time

Respond in the following JSON format:

{
    "MachineBehaviorReport": {
        "machine": "<Machine Name>",
        "month": "<Month>",
        "kpi_analysis": [
            {
                "kpi": "<KPI Name>",
                "analysis": "<Analysis of the KPI based on min, max, and average values>"
            },
            ...
        ],
        "overall_summary": "<High-level summary of machine performance>"
    }
}
"""

#### Query Classification
Given a query we should find a way to choose the proper prompt.

In [48]:
def classify_query(query):
    kpi_keywords = ["KPI", "KPIs", "metrics", "new", "suggest"]
    report_keywords = ["report", "behavior", "trend", "machine"]

    if any(keyword in query.lower() for keyword in kpi_keywords):
        return "kpi"
    elif any(keyword in query.lower() for keyword in report_keywords):
        return "report"
    else:
        return "unknown"

#### Response Function

In [49]:
def get_response(query):
    # Classify the query type
    query_type = classify_query(query)
    
    # Select the appropriate prompt
    if query_type == "kpi":
        prompt = kpi_json_prompt
    elif query_type == "report":
        prompt = report_prompt
    else:
        prompt = """ """  # no prompt if uncertain / a different type of query was made

    # Pass the query and prompt to the model
    response = query_engine.query(prompt + "\nQuery: " + query)

    # # Validate the response if it's a 'kpi' or 'report' query
    # if query_type in ["kpi", "report"]:
    #     try:
    #         # Try parsing the response as JSON
    #         response_json = json.loads(response)
    #         return response_json
    #     except json.JSONDecodeError:
    #         # Return an error if the response is not valid JSON
    #         return {"error": "Invalid JSON response from model.", "raw_response": response}
    
    # If the query is not of type 'kpi' or 'report', return the raw response
    return response


#### Examples of Usage

In [40]:
query = "Generate a performance report for the machine 'Large Capacity Cutting Machine 2' for March 2024."
response = get_response(query)

print(response)  

**Detailed Report for Large Capacity Cutting Machine 2 in March 2024**

**Executive Summary**

This report provides an overview of the key performance indicators (KPIs) for Large Capacity Cutting Machine 2 in March 2024. The data is based on monthly aggregated KPI values and offers insights into machine efficiency, productivity, and maintenance requirements.

**Machine Performance**

The average cycle time for Large Capacity Cutting Machine 2 in March 2024 was 0 hours, indicating optimal machine utilization. However, the minimum value (0) suggests that some machines may have been idle or underutilized during this period.

**Productivity Metrics**

* **Average Good Cycles**: The average number of good cycles per month was 651.0967741935484.
* **Average Cycles**: The average number of cycles per month was 0.7419354838709677, indicating a high level of productivity.
* **Good Cycles Percentage**: The percentage of good cycles out of total cycles was 100%, suggesting that all cycles were pr

**Detailed Report for Large Capacity Cutting Machine 2 in March 2024**

**Executive Summary**

This report provides an overview of the key performance indicators (KPIs) for Large Capacity Cutting Machine 2 in March 2024. The data is based on monthly aggregated KPI values and offers insights into machine efficiency, productivity, and maintenance requirements.

**Machine Performance**

The average cycle time for Large Capacity Cutting Machine 2 in March 2024 was 0 hours, indicating optimal machine utilization. However, the minimum value (0) suggests that some machines may have been idle or underutilized during this period.

**Productivity Metrics**

* **Average Good Cycles**: The average number of good cycles per month was 651.0967741935484.
* **Average Cycles**: The average number of cycles per month was 0.7419354838709677, indicating a high level of productivity.
* **Good Cycles Percentage**: The percentage of good cycles out of total cycles was 100%, suggesting that all cycles were productive.

**Energy Consumption and Costs**

* **Consumption Working Average**: The average energy consumption during working hours was 0.00152712326133702 kWh, indicating a low energy consumption rate.
* **Cost Working Average**: The average cost associated with working hours was $0.0025818193261147593, which is relatively low compared to other machines.

**Maintenance and Downtime**

* **Offline Time**: There were no reported offline times for Large Capacity Cutting Machine 2 in March 2024.
* **Consumption Idle Average**: The average energy consumption during idle hours was 0.0006513621264504764 kWh, indicating a low energy consumption rate.

**Comparison to Baseline Values**

The KPI values for Large Capacity Cutting Machine 2 in March 2024 are compared to baseline values as follows:

* **Average Cycle Time**: The average cycle time is below the baseline value of 0.1 hours.
* **Good Cycles Percentage**: The good cycles percentage is above the baseline value of 90%.
* **Consumption Working Average**: The consumption working average is lower than the baseline value of 0.01 kWh.

**Conclusion**

Based on the KPI data, Large Capacity Cutting Machine 2 performed exceptionally well in March 2024. The low energy consumption rates and high productivity metrics indicate optimal machine utilization and efficiency. However, it is essential to monitor these metrics closely to ensure that the machine continues to perform at this level.

**Recommendations**

1. Continuously monitor machine performance to identify areas for improvement.
2. Conduct regular maintenance checks to prevent downtime and optimize machine efficiency.
3. Analyze energy consumption patterns to reduce costs and minimize waste.

Note: This report is in a detailed format and can be easily converted to a PDF or Word document format.

In [50]:
query = "Suggest new KPIs based on the provided data."
response = get_response(query)

print(response)

{
  "KPIs": [
    {
      "name": "Average Cycle Efficiency",
      "description": "Average efficiency of machine cycles, calculated as (good_cycles / cycles) * 100%",
      "formula": "(sum([good_cycles]) / sum([cycles])) * 100"
    },
    {
      "name": "Idle Time Percentage",
      "description": "Percentage of idle time, calculated as (idle_time / working_time) * 100",
      "formula": "((sum([idle_time]) / sum([working_time])) * 100)"
    },
    {
      "name": "Power Consumption per Cycle",
      "description": "Average power consumption per cycle, calculated as (consumption / cycles)",
      "formula": "(sum([consumption]) / sum([cycles]))"
    },
    {
      "name": "Cost per Working Hour",
      "description": "Average cost per working hour, calculated as (cost_working / working_time)",
      "formula": "(sum([cost_working]) / sum([working_time]))"
    }
  ]
}


In [51]:
query = "Generate a new KPI to monitor the production efficiency."
response = get_response(query)

print(response)

{
  "KPIs": [
    {
      "name": "production_efficiency",
      "description": "Monitor the ratio of good cycles to total cycles",
      "formula": "(good_cycles / (good_cycles + bad_cycles)) * 100"
    }
  ]
}


In [43]:
query = "What was the behavior of the Laser Cutter in March 2024?"
response = get_response(query)

print(response)

**Monthly Aggregated Report for Laser Cutter - March 2024**

**Overview**

The following report provides an overview of the Laser Cutter's performance in March 2024, based on the monthly aggregated KPI data.

**Average Cycle Time**
------------------------

*   **Total Average Cycle Time:** N/A
*   **Machine Performance:** N/A

No cycle time data is available for the specified machine and month.

**Bad Cycles**
-----------------

*   **Total Bad Cycles:** N/A
*   **Percentage of Total Cycles:** N/A
*   **Detailed Breakdown:**

    *   Month: March 2024
    *   Number of Bad Cycles: 1.2258064516129032
    *   Percentage of Total Cycles: 15.28%

**Power Consumption**
--------------------

*   **Total Average Power Consumption:** 9.65957169325545e-05
*   **Machine Performance:** N/A

No power consumption data is available for the specified machine and month.

**Working Time**
-----------------

*   **Total Working Time:** 0 hours
*   **Percentage of Total Time:** N/A
*   **Detailed Breakd

In [44]:
query = "General Summarized Overview of the Laser Cutter?" # this won't be classified as 'report'
response = get_response(query)

print(response)

The data provided offers a comprehensive view of the performance and operational metrics of the Laser Cutter for the month of March 2024. 

Some key insights from the data include:

- Average power usage during this period was minimal, indicating efficient operation.
- The average number of cycles performed was lower than expected but within a reasonable range.
- Consumption-related metrics (working and idle) show zero values, suggesting no energy consumption during this month's operations.
- Cost calculations reveal an average cost of $0 for both working and idle periods, implying negligible expenses during the evaluated period.

However, some metrics are not provided or show maximum values at 0.00, indicating either a lack of data or ideal conditions, respectively. 

Overall, these statistics point to the Laser Cutter's minimal energy usage, absence of operational costs, and normal operation without any notable issues.


In [45]:
query = "Generate a report on the general behavior of the Riveting Machine and Laser Cutter in March 2024."
response = get_response(query)

print(response)

**Detailed Report for Laser Cutter, March 2024**

**Overview**

This report provides an in-depth analysis of the Laser Cutter's performance in March 2024. The data is based on monthly aggregated KPIs.

**KPI Summary**

* **Average Cycle Time**: 0.00 seconds
	+ Minimum: 0.00 seconds
	+ Maximum: 0.00 seconds
* **Bad Cycles**: 0 (no bad cycles reported)
* **Working Time**: 0 hours
* **Consumption Working**: 0 units
* **Cost**: 0 units
* **Power**: 9.65957169325545e-05 kW
* **Cycles**: 4168.00 cycles
* **Good Cycles**: 269.06451612903226 good cycles

**Analysis**

The Laser Cutter's average cycle time is significantly low, indicating efficient operation. There were no bad cycles reported in March 2024.

However, the working time and consumption working values are both zero, suggesting that the machine was idle for the entire month. This may be due to various factors such as maintenance or shutdowns.

The cost value is also zero, which could indicate low energy consumption during operation.

In [46]:
query = "How many machines are there?"
response = get_response(query)

print(response)

**Detailed Report for Large Capacity Cutting Machine 2 - March 2024**

**Introduction**
----------------

This report provides an overview of the key performance indicators (KPIs) for Large Capacity Cutting Machine 2 in March 2024. The data is aggregated from various sources and is subject to quality control measures.

**Average Cycle Time**
----------------------

The average cycle time for Large Capacity Cutting Machine 2 in March 2024 is:

*   Average: **0.0006513621264504764** minutes
*   Minimum: **0.0** minutes
*   Maximum: **0.03249318468435469** minutes

**Bad Cycles**
----------------

The average number of bad cycles for Large Capacity Cutting Machine 2 in March 2024 is:

*   Average: **0.0006513621264504764**

**Consumption Working**
----------------------

The average consumption working time for Large Capacity Cutting Machine 2 in March 2024 is:

*   Average:
    *   Value: **0.0** minutes
    *   Total consumption working hours: **0.0** hours
*   Maximum:
    *   Value: *