In [36]:
# Prepare import of modules from parent directory.
import os
import sys
module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)

import json
import numpy as np
import pandas as pd

from preparations import things_provider

with open('observations_example_timerange_reconstructed_cycles.json', 'r') as fp:
    observations_example_timerange_reconstructed_cycles = json.load(fp)
    
seconds_in_cycle_differing = np.array([])

for thing, programs in observations_example_timerange_reconstructed_cycles.items():
    for program, cycles in programs.items():
        for cycle in cycles:
            cycle_start = cycle['start']
            cycle_end = cycle['end']
            diff = cycle_end - cycle_start
            count = len(cycle['results'])
            seconds_in_cycle_differing= np.append(seconds_in_cycle_differing, diff != count)
            
diff_count = np.sum(seconds_in_cycle_differing)
if diff_count == 0:
    print('For all cycles the number of results is equal to the difference between the start and end time.')
else:
    print(f'Attention: Number of cycles with differing start/end times: {diff_count}')

# Pick 1000
# - random primary signal state changes out of the cycles and check if there exists a corresponing primary signal observation
# - random cycle start seconds and check if there exists a corresponding cycle second observation
tp = things_provider.ThingsProvider()
tp.filter_only_primary_signal_datastreams()
things = tp.get_things()
primary_signal_datastreams_by_thing_name = {}
for thing in things:
    if len(thing['Datastreams']) == 0:
        # No primary signal datastream for this thing
        continue
    if len(thing['Datastreams']) > 1:
        Exception("This should not happen and indicates an error in the ThingsProvider.")
    primary_signal_datastreams_by_thing_name[thing['name']] = thing['Datastreams'][0]['@iot.id']
tp = things_provider.ThingsProvider()
tp.filter_only_cycle_second_datastreams()
things = tp.get_things()
cycle_second_datastreams_by_thing_name = {}
for thing in things:
    if len(thing['Datastreams']) == 0:
        # No cycle second datastream for this thing
        continue
    if len(thing['Datastreams']) > 1:
        Exception("This should not happen and indicates an error in the ThingsProvider.")
    cycle_second_datastreams_by_thing_name[thing['name']] = thing['Datastreams'][0]['@iot.id']
    
CSV_FILE = 'observations_example_timerange.csv'
observations = pd.read_csv(CSV_FILE)

primary_signal_problems = []
cycle_second_problems = []
checked_count = 0

while checked_count < 1000:
    # Random thing
    thing = np.random.choice(list(observations_example_timerange_reconstructed_cycles.keys()))
    # Random program
    program = np.random.choice(list(observations_example_timerange_reconstructed_cycles[thing].keys()))
    if len(observations_example_timerange_reconstructed_cycles[thing][program]) == 0:
        continue
    # Random cycle
    cycle = np.random.choice(observations_example_timerange_reconstructed_cycles[thing][program])
    
    state_changes = []
    previous_result = None
    idx = 0
    for result in cycle['results']:
        if previous_result is not None and result != previous_result:
            state_changes.append((result, cycle['start'] + idx))
        previous_result = result
        idx += 1
        
    # Random state change
    if len(state_changes) == 0:
        continue
    result_idx = np.random.choice(len(state_changes))
    result = state_changes[result_idx]
    
    # Get datastream ID
    datastream_id_primary_signal = primary_signal_datastreams_by_thing_name[thing]
    
    # Check if there exists a row where datastream_id and result match
    result = observations[(observations['datastream_id'] == datastream_id_primary_signal) & (observations['result'] == result[0]) & (observations['phenomenon_time'] == result[1])]
    if len(result) != 1:
        primary_signal_problems.append((thing, datastream_id_primary_signal, program, cycle, result))
        
    cycle_start = cycle['start']
    
    # Get datastream ID
    datastream_id_cycle_second = cycle_second_datastreams_by_thing_name[thing]
    
    result = observations[(observations['datastream_id'] == datastream_id_cycle_second) & (observations['phenomenon_time'] == cycle_start)]
    if len(result) != 1:
        cycle_second_problems.append((thing, datastream_id_cycle_second, program, cycle, result))    
    
    checked_count += 1
    
if len(primary_signal_problems) > 0:
    print('Attention: There exists at least one primary signal state change without a corresponding observation. Problems: ')
    for problem in primary_signal_problems:
        print(problem)
else:
    print('For all primary signal state changes there exists a corresponding observation.')
    
if len(cycle_second_problems) > 0:
    print('Attention: There exists at least one cycle start second without a corresponding observation. Problems: ')
    for problem in cycle_second_problems:
        print(problem)
else:
    print('For all starts of cycles there exists a corresponding observation.')
            


For all cycles the number of results is equal to the difference between the start and end time.
Amount of things: 19844
Amount of things: 19844
For all primary signal state changes there exists a corresponding observation.
For all starts of cycles there exists a corresponding observation.
