In [84]:
from llama_index.readers.json import JSONReader
from llama_index.core.node_parser import JSONNodeParser
import os
import json
from pathlib import Path



reader = JSONReader()
folder_path = ".data/"
parser = JSONNodeParser()

In [85]:
class CustomJSONReader(JSONReader):
    def load_data(self, file_path):
        documents = super().load_data(file_path)
        for doc in documents:
            data = doc.text.strip()
            try:
                # Parse the text into a JSON structure
                parsed_data = json.loads(data)

                # Flatten the nested structure for parsing
                flattened_text = self.flatten_json(parsed_data)
                doc.text = flattened_text  # Assign flattened data as text
                doc.metadata = parsed_data  # Keep original data in metadata
            except json.JSONDecodeError as e:
                print(f"Skipping non-JSON content: {data}")
                print("Error:", e)
        return documents

    def flatten_json(self, data):
        """Recursively flatten a nested JSON object."""
        def recurse(curr, key='', result={}):
            if isinstance(curr, dict):
                for k, v in curr.items():
                    recurse(v, f"{key}.{k}" if key else k, result)
            else:
                result[key] = curr
        flattened = {}
        recurse(data, result=flattened)
        # Convert flattened dictionary to a readable string for text parsing
        return '\n'.join(f"{k}: {v}" for k, v in flattened.items())


In [86]:
reader = CustomJSONReader()
parser = JSONNodeParser()

In [87]:

#document = reader.load_data("/Users/lavo/Desktop/Projects/rag/data/test.json")

document = reader.load_data("/Users/lavo/Desktop/Projects/rag/data/monthly_smart_app_data_Large Capacity Cutting Machine 1_2024-03.json")


Skipping non-JSON content: "machine_id": "Large Capacity Cutting Machine 1",
"month": "2024-03",
"KPIs": {
"average_cycle_time": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"bad_cycles": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"consumption": {
"average": 0.0013015137854767644,
"min": 0.0,
"max": 0.09477750094431775
"consumption_idle": {
"average": 0.0004203452003983412,
"min": 0.0,
"max": 0.0024920118485895794
"consumption_working": {
"average": 0.0021111835419563543,
"min": 0.0,
"max": 0.017477024244629727
"cost": {
"average": 0.0007648453461734984,
"min": 0.0,
"max": 0.0
"cost_idle": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"cost_working": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"cycles": {
"average": 0.6451612903225806,
"min": 0.0,
"max": 28754.0
"good_cycles": {
"average": 1228.6129032258063,
"min": 0.0,
"max": 28754.0
"idle_time": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"offline_time": {
"average": 0.0,
"min": 0.0,
"max": 0.0
"power": {
"average": 0.0028015846972911963,
"min": 0.0

In [88]:
document

[Document(id_='c14f7f79-495e-48a3-abd8-2ef7bd2c15f7', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='"machine_id": "Large Capacity Cutting Machine 1",\n"month": "2024-03",\n"KPIs": {\n"average_cycle_time": {\n"average": 0.0,\n"min": 0.0,\n"max": 0.0\n"bad_cycles": {\n"average": 0.0,\n"min": 0.0,\n"max": 0.0\n"consumption": {\n"average": 0.0013015137854767644,\n"min": 0.0,\n"max": 0.09477750094431775\n"consumption_idle": {\n"average": 0.0004203452003983412,\n"min": 0.0,\n"max": 0.0024920118485895794\n"consumption_working": {\n"average": 0.0021111835419563543,\n"min": 0.0,\n"max": 0.017477024244629727\n"cost": {\n"average": 0.0007648453461734984,\n"min": 0.0,\n"max": 0.0\n"cost_idle": {\n"average": 0.0,\n"min": 0.0,\n"max": 0.0\n"cost_working": {\n"average": 0.0,\n"min": 0.0,\n"max": 0.0\n"cycles": {\n"average": 0.6451612903225806,\n"min": 0.0,\n"max": 28754.0\n"good_cycles": {\n"average": 1228.6129032258063,\n"min": 0

In [89]:
nodes = parser.get_nodes_from_documents(document)


In [90]:
nodes

[]