In [1]:
import json
import sys
# !pip install pympler


# function reading the json file 

In [2]:
def read_json_from_file(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)
    
def save_json_to_file(data, filename):
    with open(filename, "w") as f:
        json.dump(data, f, indent=2)

# removing the Keys 

In [4]:
def filter_json_remove_slash_key(data, pattern="/key"):
    """
    Recursively removes keys from a dictionary if they contain the given pattern.
    
    :param data: Dictionary to filter.
    :param pattern: String pattern to check in keys (default is "/key").
    :return: New dictionary with filtered keys.
    """
    if not isinstance(data, dict):
        return data  # If data is not a dictionary, return as is
    
    new_dict = {}
    for k, v in data.items():
        if pattern not in k:  # Exclude keys that contain the pattern
            if isinstance(v, dict):
                new_dict[k] = filter_json_remove_slash_key(v, pattern)  # Recurse for nested dict
            else:
                new_dict[k] = v  # Keep the value as is
    
    return new_dict


In [None]:
json2 = read_json_from_file('abscend.json')
json2= filter_json_remove_slash_key(json2)


In [3]:
def filter_json_remove_slash_key(data, pattern="/key"):
    # Create a new dictionary to store the filtered results
    new_dict = {}
    
    for k, v in data.items():
        # If the key does not contain the pattern, add it to the new dictionary
        if pattern not in k:
            if isinstance(v, dict):
                # Recursively apply the function if the value is a dictionary
                new_dict[k] = filter_json_remove_slash_key(v, pattern)
            else:
                # Otherwise, just add the key-value pair
                new_dict[k] = v
    
    return new_dict


In [6]:
def filter_json_remove_system_keys(data):
    # Ensure we only modify the 'System' key if it exists
    if 'System' in data:
        # List of keys to remove from 'System'
        keys_to_remove = ['Buffers', 'Transition', 'Flush', 'Tmp']
        
        # Loop through the list and remove each key from 'System' if it exists
        for key in keys_to_remove:
            data['System'].pop(key, None)
    # Filter out entries that have "last_written" in the key
    if isinstance(data, list):  # Check if data is a list of dictionaries
        data = [entry for entry in data if "/last_written" not in entry.get("key", "")]

    return data

In [7]:
def filter_json_remove_legacytrigger(data):
    """
    This function recursively removes keys that match the pattern "/Detectors/DetXX/Settings/LegacyTrigger".
    """
    def recursive_trim(d):
        if isinstance(d, dict):
            # Identify keys matching the specific pattern
            keys_to_remove = [key for key in d if "/Detectors/Det" in key and "/Settings/LegacyTrigger" in key]
            # Remove the matching keys
            for key in keys_to_remove:
                del d[key]
            # Recursively process nested dictionaries or lists
            for key, value in d.items():
                if isinstance(value, (dict, list)):  # If the value is a dictionary or list, apply recursion
                    recursive_trim(value)

    # Call the recursive trim function on the data
    recursive_trim(data)
    
    return data


In [82]:
# Removing last_readout duplicate time 

In [25]:
# def filter_data_remove_duplicate_last_written1(data):
#     seen_values = set()  # Stores unique "last_written" values

#     def recurse(d):
#         if isinstance(d, dict):
#             new_dict = {}
#             for k, v in d.items():
#                 if isinstance(v, dict) and "last_written" in v:
#                     last_written_val = v["last_written"]
#                     if last_written_val not in seen_values:
#                         seen_values.add(last_written_val)
#                         new_dict[k] = v  # Keep the first occurrence
#                 else:
#                     # Recursively process nested dictionaries and lists
#                     new_dict[k] = recurse(v)
#             return new_dict
#         elif isinstance(d, list):
#             return [recurse(item) for item in d]
#         return d

#     # First pass to collect duplicates
#     filtered_data = recurse(data)

#     return filtered_data


In [None]:
# json2 = read_json_from_file('savedata.json')

# json2 = filter_json_remove_slash_key(json2)


# json2 = filter_json_remove_system_keys(json2)


# json2 = filter_json_remove_legacytrigger(json2)


# json2 = json_filter_remove_last_written(json2)
# save_json_to_file(json2, "s")

In [8]:
# Function to recursively remove keys ending with "last_written"
def json_filter_remove_last_written(data):
    if isinstance(data, dict):
        return {k: json_filter_remove_last_written(v) for k, v in data.items() if not k.endswith("last_written")}
    elif isinstance(data, list):
        return [json_filter_remove_last_written(item) for item in data]
    return data

In [9]:
import re

def filter_json_differ_Readback_Setting(d):
    """
    Filters out keys from the 'Settings' section of Detectors/DetXX if they match the values in the 'Readback' section.
    Keeps the keys that differ between the 'Settings' and 'Readback' paths.
    """
    def recurse(d, readback_data, settings_data, path=""):
        """
        Recursive function to traverse and compare the two paths.
        - d: the current data (settings or readback)
        - readback_data: the current data from the Readback section
        - settings_data: the current data from the Settings section
        - path: the current path to compare (for debugging or tracking)
        """
        if isinstance(d, dict):
            # Iterate through the dictionary
            new_dict = {}
            for k, v in d.items():
                new_path = f"{path}/{k}" if path else k
                
                # Check if the path is within the Readback or Settings section
                if re.match(r"/Detectors/Det\d+/Readback", new_path):
                    # If the path starts with Readback, compare values from the 'Settings' section
                    if new_path in settings_data and settings_data[new_path] == v:
                        # If values are the same, remove them from the Settings data
                        settings_data.pop(new_path, None)
                    else:
                        new_dict[k] = recurse(v, readback_data, settings_data, new_path)
                elif re.match(r"/Detectors/Det\d+/Settings", new_path):
                    # Handle the Settings path, keep values only if different from Readback
                    if new_path in readback_data and readback_data.get(new_path) != v:
                        new_dict[k] = v
                    else:
                        # Remove from settings if matched with readback
                        settings_data.pop(new_path, None)
                else:
                    new_dict[k] = recurse(v, readback_data, settings_data, new_path)
            return new_dict
        elif isinstance(d, list):
            return [recurse(item, readback_data, settings_data, f"{path}[{idx}]") for idx, item in enumerate(d)]
        return d

    # Extract readback and settings data from the main structure
    readback_data = d.get('Detectors', {}).get('DetXX', {}).get('Readback', {})
    settings_data = d.get('Detectors', {}).get('DetXX', {}).get('Settings', {})

    # Filter out the matching values and keep differing ones
    filtered_data = recurse(d, readback_data, settings_data)
    #print(filtered_data)
    return filtered_data


In [17]:
# def find_differences(start_obj, end_obj, parent_key='', differences=None):
#     if differences is None:
#         differences = {}

#     # Get all the unique keys from both objects
#     all_keys = set(start_obj.keys()).union(set(end_obj.keys()))

#     # Iterate through all keys
#     for key in all_keys:
#         # Build the current key's path
#         current_key = parent_key + key

#         # Case where the values are lists
#         if isinstance(start_obj.get(key), list) or isinstance(end_obj.get(key), list):
#             # Compare lists by joining them into a string for easy comparison
#             if str(start_obj.get(key)) != str(end_obj.get(key)):
#                 differences[current_key] = end_obj.get(key)

#         # Case where the values are dictionaries
#         elif isinstance(start_obj.get(key), dict) and isinstance(end_obj.get(key), dict):
#             # Recurse into the nested dictionaries with updated parent_key
#             find_differences(start_obj.get(key), end_obj.get(key), current_key + '/', differences)

#         # Case where the values are primitive types (strings, numbers, etc.)
#         elif start_obj.get(key) != end_obj.get(key):
#             differences[current_key] = end_obj.get(key)

#     return differences

In [10]:
def find_differences(start_obj, end_obj):
    def recursive_diff(start, end):
        diff = {}
        for key in end.keys():
            if key not in start:
                # Key is new in end_obj
                diff[key] = end[key]
            elif isinstance(start[key], dict) and isinstance(end[key], dict):
                # Recursively check nested dictionaries
                nested_diff = recursive_diff(start[key], end[key])
                if nested_diff:  # Only add if there are differences
                    diff[key] = nested_diff
            elif isinstance(start[key], list) and isinstance(end[key], list):
                # Compare lists directly
                if start[key] != end[key]:
                    diff[key] = end[key]
            elif start[key] != end[key]:
                # Value has changed
                diff[key] = end[key]
        return diff

    return recursive_diff(start_obj, end_obj)
# differences = find_differences(start_json, end_json)
# print(differences)


In [18]:
json2 = read_json_from_file('savedata1.json')
json2 = filter_json_remove_slash_key(json2)
json2 = filter_json_remove_system_keys(json2)
json2 = filter_json_remove_legacytrigger(json2)
json2 = json_filter_remove_last_written(json2)
# json2 = filter_json_differ_Readback_Setting(json2)
json1 = read_json_from_file('savedata.json')
json1 = filter_json_remove_slash_key(json1)
json1 = filter_json_remove_system_keys(json1)
json1 = json_filter_remove_last_written(json1)
# json1 = filter_json_differ_Readback_Setting(json1)
finddifference=find_differences(json2,json1)

In [16]:
finddifference

{'Logger': {'Channels': {'0': {'Statistics': {'Disk level': {'access_mode': 7},
     'Bytes written': {'access_mode': 7},
     'Files written': {'access_mode': 7},
     'Events written': {'access_mode': 7},
     'Bytes written total': {'access_mode': 7},
     'Bytes written subrun': {'access_mode': 7},
     'Bytes written uncompressed': {'access_mode': 7}}}}},
 'Equipment': {'fridge_history': {'Variables': {'FTMP': {'notify_count': 1}}},
  'readouthistory10': {'Variables': {'LIVE': {'notify_count': 1},
    'RODT': {'notify_count': 1},
    'SCLR': {'notify_count': 1},
    'SLOW': {'notify_count': 1},
    'STLE': {'notify_count': 1}}},
  'readouthistory11': {'Variables': {'BSLN': {'notify_count': 1},
    'LIVE': {'notify_count': 1},
    'RODT': {'notify_count': 1},
    'SCLR': {'notify_count': 1},
    'SLOW': {'notify_count': 1},
    'STLE': {'notify_count': 1}}},
  'readouthistory12': {'Variables': {'BSLN': {'notify_count': 1},
    'LIVE': {'notify_count': 1},
    'RODT': {'notify_count

In [19]:
json2 = read_json_from_file('savedata1.json')
json2 = filter_json_remove_slash_key(json2)
json2 = filter_json_remove_system_keys(json2)
json2 = filter_json_remove_legacytrigger(json2)
json2 = json_filter_remove_last_written(json2)
json2 = filter_json_differ_Readback_Setting(json2)
json1 = read_json_from_file('savedata.json')
json1 = filter_json_remove_slash_key(json1)
json1 = filter_json_remove_system_keys(json1)
json1 = json_filter_remove_last_written(json1)
json1 = filter_json_differ_Readback_Setting(json1)
finddifference3=find_differences(json2,json1)

In [None]:
# json2 = filter_json_remove_slash_key(json2)
# json2 = filter_json_remove_system_keys(json2)
# json2 = filter_json_remove_legacytrigger(json2)
# json2 =filter_remove_last_written(json2)
# json2 = filter_json_differ_Readback_Setting(json2)

In [20]:
#Not Disk space its memory not relevant
# json2 = read_json_from_file('savedata1.json')
# size_in_bytes_comprehensive = asizeof.asizeof(json2)
# size_in_mb = size_in_bytes_comprehensive / (1024)
# # Print size in MB with 3 decimal points
# print(f"Size of json2 : {size_in_mb:.2f} KB")

# json2 = filter_json_remove_slash_key(json2)
# size_in_bytes_comprehensive = asizeof.asizeof(json2)
# size_in_mb = size_in_bytes_comprehensive / (1024)
# print(f"Size of json2 remove /Key : {size_in_mb:.2f} KB")

# json2 = filter_json_remove_system_keys(json2)
# size_in_bytes_comprehensive = asizeof.asizeof(json2)
# size_in_mb = size_in_bytes_comprehensive / (1024)
# print(f"Size of json2 remove system expect client: {size_in_mb:.2f} KB")

# json2 = filter_json_remove_legacytrigger(json2)
# size_in_bytes_comprehensive = asizeof.asizeof(json2)
# size_in_mb = size_in_bytes_comprehensive / (1024)
# print(f"Size of json2 remove legacytrigger: {size_in_mb:.2f} KB")

# json2 =json_filter_remove_last_written(json2)
# size_in_bytes_comprehensive = asizeof.asizeof(json2)
# size_in_mb = size_in_bytes_comprehensive / (1024)
# print(f"Size of json2 remove last written: {size_in_mb:.2f} KB")

# # either way is same remove  not needed
# json2 = filter_json_differ_Readback_Setting(json2)
# size_in_bytes_comprehensive = asizeof.asizeof(json2)
# size_in_mb = size_in_bytes_comprehensive / (1024)
# print(f"Size of json2 remove settings if Readback is same as Detectors/DetXX: {size_in_mb:.2f} KB")

# # either way is same remove 
# size_in_bytes_comprehensive = asizeof.asizeof(finddifference3)
# size_in_mb = size_in_bytes_comprehensive / (1024)
# print(f"Size of json2 remove if start is same as stop: {size_in_mb:.2f} KB")



In [21]:
json2 = read_json_from_file('savedata.json')

json2 = filter_json_remove_slash_key(json2)
save_json_to_file(json2, "savedata_2_removed_slash_key.json")

json2 = filter_json_remove_system_keys(json2)
save_json_to_file(json2, "savedata_3_removed_system_keys.json")

json2 = filter_json_remove_legacytrigger(json2)
save_json_to_file(json2, "savedata_4_removed_legacytrigger.json")

json2 = json_filter_remove_last_written(json2)
save_json_to_file(json2, "savedata_5_removed_last_written.json")

json2 = filter_json_differ_Readback_Setting(json2)
save_json_to_file(json2, "savedata_6_removed_readback_setting.json")


save_json_to_file(finddifference, "finddifference.json")

print("saving Json" )

saving Json


In [24]:
import os

# Define file names corresponding to each step with sizes in KB
file_sizes_on_disk = {
    "savedata.json_start_original": 5433740/1024,
    "savedata.json_end_original": 5433740 / 1024,
    "savedata_2_removed_slash_key.json": 3992631 / 1024,
    "savedata_3_removed_system_keys.json": 2835593 / 1024,
    "savedata_4_removed_legacytrigger.json": 2835593 / 1024,
    "savedata_5_removed_last_written.json": 2101905 / 1024,
    "savedata_6_removed_readback_setting.json": 2101905 / 1024,
    "difference in end ODB than start" : 7955/1024
}

# Print table header
print(f"{'File Name':<40}{'Disk Size (KB)':>20}")
print("=" * 60)

# Print each file's disk size
for file_name, size_kb in file_sizes_on_disk.items():
    print(f"{file_name:<40}{size_kb:>20.2f}")



File Name                                     Disk Size (KB)
savedata.json_start_original                         5306.39
savedata.json_end_original                           5306.39
savedata_2_removed_slash_key.json                    3899.05
savedata_3_removed_system_keys.json                  2769.13
savedata_4_removed_legacytrigger.json                2769.13
savedata_5_removed_last_written.json                 2052.64
savedata_6_removed_readback_setting.json             2052.64
difference in end ODB than start                        7.77
