## generator.ipynb

Random VSS generator

In [22]:
# import necessary libraries
import json
import jsonpatch
import random

In [23]:
class vss_json:
    """VSS JSON manager class"""
        
    def __init__(self, **kwargs):
        """
        Initialize VSS JSON data
        
        :param kwargs: EITHER of the following is required
            - file: JSON file to load
            - data: JSON data (for internal use)
        """
        assert 'file' in kwargs or 'data' in kwargs
        self.initialized = False        
        
        if 'file' in kwargs:
            with open(kwargs['file'], 'r') as f:
                self.data = json.load(f)  
            self.remove_key('description', 'uuid', 'type', 'comment', 'deprecated')
        elif 'data' in kwargs:
            self.data = kwargs['data']
            self.initialized = True
        else:
            raise ValueError('Either file or data must be provided')
        
    def print(self, indent: int=4, with_index: bool=False) -> None:
        """Print JSON data"""
        txt = json.dumps(self.data, indent=indent)
        if with_index:
            for i, line in enumerate(txt.split('\n'), 1):
                print(f'{i:05d}: {line}')
        else:
            print(txt)
        
    def remove_key(self, *keys: str) -> None:
        """
        Remove keys from JSON data recursively
        
        :param keys: Keys to remove
        """
        for leaf, _ in self.leaf_nodes():
            for key in keys:
                if key in leaf:
                    del leaf[key]
            
    def leaf_nodes(self, data=None, parent_key=''):
        """
        Traverse and yield all leaf values (dict/scaler) in JSON data
        
        :param data: JSON data to traverse (default: self.data)
        :param parent_key: Parent key to track the hierarchy (for debugging and display purposes)
        """
        if data is None:
            data = self.data
        
        if not self.initialized:
            if isinstance(data, dict):
                is_leaf = all(not isinstance(value, dict) for value in data.values())
                if is_leaf:
                    yield data, parent_key
                else:
                    for key, value in data.items():
                        yield from self.leaf_nodes(value, parent_key + '.' + key if parent_key else key)
            elif isinstance(data, list):
                for index, item in enumerate(data):
                    yield from self.leaf_nodes(item, parent_key + f'[{index}]')
        else:
            if isinstance(data, dict):
                for key, value in data.items():
                    yield from self.leaf_nodes(value, parent_key + '.' + key if parent_key else key)
            elif isinstance(data, list):
                for index, item in enumerate(data):
                    yield data, parent_key
            else:
                yield data, parent_key
                
    
    def generate(self, dataset_size: float=1.0) -> 'vss_json':
        """
        Generate an initial random dataset based on the JSON schema
        
        :param dataset_size: Dataset size ratio [0.0, 1.0]
        """
        result = {}
        
        for leaf, parent in self.leaf_nodes():
            # !! only add the node by dataset_size
            if random.random() > dataset_size:
                continue
            
            # step 1: set parent (without 'children' dictionary)
            hierarchy = [x for x in parent.split('.') if x != 'children']
            new = result
            for idx, node in enumerate(hierarchy):
                if node not in new:
                    new[node] = {}
                    if idx == len(hierarchy) - 1:
                        break
                    new = new[node]
                
            # step 2: set leaf by 'datatype'
            dtype = leaf['datatype']
            if dtype == 'boolean':
                new[node] = random.choice([True, False])
            elif dtype in ['int8', 'uint8', 'float'] and \
                'unit' in leaf and leaf['unit'] == 'percent':
                new[node] = random.random() * 100
                if dtype != 'float':
                    new[node] = int(new[node])
            elif 'allowed' in leaf:
                new[node] = random.choice(leaf['allowed'])
            elif dtype == 'double' or dtype == 'float':
                new[node] = random.random() * 100
            elif dtype == 'float[]':
                new[node] = []
                for i in range(random.randint(1, 5)):
                    new[node].append(random.random() * 100)
            elif dtype in ['int8', 'int16', 'int32']:
                new[node] = random.randint(-100, 100)
            elif dtype == 'string':
                # generate random str`ing
                new[node] = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=10))
            elif dtype == 'string[]':
                new[node] = []
                for i in range(random.randint(1, 5)):
                    new[node].append(''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=10)))
            elif dtype in ['uint8', 'uint16', 'uint32']:
                new[node] = random.randint(0, 100)
            elif dtype == 'uint8[]':
                new[node] = []
                for i in range(random.randint(1, 5)):
                    new[node].append(random.randint(0, 100))
                    
            # step 3: set the leaf
        
        return vss_json(data=result)
    
    def generate_next(self, change_rate):
        assert self.initialized, 'vss_json must be initialized with generate()'
        
        result = {}
        for leaf_data, parent in self.leaf_nodes():
            hierarchy = [x for x in parent.split('.') if x != 'children']
            current = result
            for idx, node in enumerate(hierarchy):
                if node not in current:
                    current[node] = {}
                    if idx == len(hierarchy) - 1:
                        break
                    current = current[node]
                    
            if isinstance(leaf_data, list):
                current[node] = leaf_data[:]
                continue        
            
            if random.random() > change_rate:
                current[node] = leaf_data
                continue
            
            match leaf_data:
                case str():
                    current[node] = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=10))
                case bool():
                    current[node] = not leaf_data
                case int():
                    current[node] = random.randint(0, 100)
                case float():
                    current[node] = random.random() * 100
        return vss_json(data=result), vss_json(data=jsonpatch.make_patch(self.data, result).patch)
    
    def save(self, file: str) -> None:
        """Save JSON data to file"""
        with open(file, 'w') as f:
            json.dump(self.data, f, indent=4)

In [24]:
vss = vss_json(file='../vss_rel_4.2.json')

In [25]:
# JSON 내용 확인
# vss.print(2, True)

In [26]:
# Generate initial random dataset
first = vss.generate(1.0)
first.print(2, True)

00001: {
00002:   "Vehicle": {
00003:     "ADAS": {
00004:       "ABS": {
00005:         "IsEnabled": false
00006:       }
00007:     }
00008:   },
00009:   "ADAS": {
00010:     "ABS": {
00011:       "IsEngaged": false
00012:     }
00013:   },
00014:   "ABS": {
00015:     "IsError": true
00016:   },
00017:   "ActiveAutonomyLevel": "SAE_4",
00018:   "CruiseControl": {
00019:     "IsActive": false
00020:   },
00021:   "IsEnabled": false,
00022:   "IsError": false,
00023:   "SpeedSet": 28.16553862222514,
00024:   "DMS": {
00025:     "IsEnabled": false
00026:   },
00028:   "EBA": {
00029:     "IsEnabled": true
00030:   },
00031:   "IsEngaged": true,
00032:   "EBD": {
00033:     "IsEnabled": false
00034:   },
00035:   "ESC": {
00036:     "IsEnabled": false
00037:   },
00038:   "IsStrongCrossWindDetected": false,
00039:   "RoadFriction": {
00040:     "LowerBound": 60.92671858689817
00041:   },
00042:   "MostProbable": 99.074253428698,
00043:   "UpperBound": 73.10333009012552,
00044:   "IsAut

In [27]:
# Generate random subset (if required)
# small = vss.generate(0.005)
# small.print(2, True)

In [28]:
# Generate next random dataset based on initial one
# 0.2 : 20% of the data will be changed
second, patch = first.generate_next(0.2)
second.print(2, True)

00001: {
00002:   "Vehicle": {
00003:     "ADAS": {
00004:       "ABS": {
00005:         "IsEnabled": false
00006:       }
00007:     }
00008:   },
00009:   "ADAS": {
00010:     "ABS": {
00011:       "IsEngaged": false
00012:     }
00013:   },
00014:   "ABS": {
00015:     "IsError": true
00016:   },
00017:   "ActiveAutonomyLevel": "SAE_4",
00018:   "CruiseControl": {
00019:     "IsActive": true
00020:   },
00021:   "IsEnabled": true,
00022:   "IsError": false,
00023:   "SpeedSet": 28.16553862222514,
00024:   "DMS": {
00025:     "IsEnabled": false
00026:   },
00028:   "EBA": {
00029:     "IsEnabled": true
00030:   },
00031:   "IsEngaged": true,
00032:   "EBD": {
00033:     "IsEnabled": false
00034:   },
00035:   "ESC": {
00036:     "IsEnabled": true
00037:   },
00038:   "IsStrongCrossWindDetected": false,
00039:   "RoadFriction": {
00040:     "LowerBound": 34.68654028185373
00041:   },
00042:   "MostProbable": 99.074253428698,
00043:   "UpperBound": 73.10333009012552,
00044:   "IsAutoPo

In [31]:
patch.print(2, True)

00001: [
00002:   {
00003:     "op": "add",
00004:     "path": "/CellVoltages",
00005:     "value": [
00006:       2.1653647865574333,
00007:       33.754007941010386,
00008:       41.21457838319723,
00009:       30.532539793715095,
00010:       69.5738006491624
00011:     ]
00012:   },
00013:   {
00014:     "op": "replace",
00015:     "path": "/FuelSystem/AbsoluteLevel",
00016:     "value": 19.84434796116894
00017:   },
00018:   {
00019:     "op": "replace",
00020:     "path": "/Brand",
00021:     "value": "tnaculdzgz"
00022:   },
00023:   {
00024:     "op": "replace",
00025:     "path": "/TimeInUse",
00026:     "value": 8.12628011462695
00027:   },
00028:   {
00029:     "op": "replace",
00030:     "path": "/ChargeVoltage/DC",
00031:     "value": 93.44965032665063
00032:   },
00033:   {
00034:     "op": "replace",
00035:     "path": "/NumberOfValvesPerCylinder",
00036:     "value": 66
00037:   },
00038:   {
00039:     "op": "replace",
00040:     "path": "/Infotainment/HMI/Brightness",

In [30]:
# Save to file
first.save('./first.json')
second.save('./second.json')

In [32]:
patch.save('./patch.json')