# A how to guide for running self-check tests on the station.

You can find the test in `/home/gsc/SKAO/ska-low-mccs-spshw/src/ska_low_mccs_spshw/station/tests`. 

This group of tests are meant to run on an actual k8s deployment.

## Usefull tools

- timer_print: a function that prints strings with delays and resets the cursor position after the delay.
- wait2finish: waits until a Tango Device is not busy anymore, and prints the elapsed time while waiting.
- turn_on_tpms: tries to turn tpms on if they are offline
-  

In [None]:
from ska_control_model import AdminMode
from datetime import datetime, timedelta
import tango
import time
import json
import os

def filter_devices(group_name: str):
    """get rid of the device strings that represent old devices"""
    all_devices_strings = db.get_device_exported(group_name)
    real_strings = []
    old_string = []

    for device_str in all_devices_strings:
        device = tango.DeviceProxy(device_str)
        try:
            device.state()
            real_strings.append(device_str)
        except Exception as err:
            old_string.append(device_str)

    return real_strings

def timer_print(string: str, delay):
    # this function just resets the cursor at the start of the line after a delay.
    # it's great when you need to wait on something to finish, but you want feedback.
    print(string, end="")
    time.sleep(delay)
    print("\r", end="")

def wait2finish(device, additional_string = None, timeout=600, freq=0.1):
    # Wait for device to finish long running commands
    time.sleep(1)
    for i in range(timeout):
        if len(device.longrunningcommandsinqueue) == 0:
            break
        timer_print(f"{additional_string}{device.longrunningcommandsinqueue}, {i*freq:.1f} seconds elapsed", freq)

def reboot_tpms(tpms):
    """Attempt to fix tpms that are not online."""
    for tpm in tpms:
        tpm.OFF()
    wait2finish(tpm)
    for tpm in tpms:
        tpm.ON()
    wait2finish(tpm)
    
    for tpm in tpms:
        if tpm.state() != tango.DevState.ON:
            return False
    return True
    
def sync_tpms(tpms, station, daq):
    if any(station.TileProgrammingState) != 'Synchronised':    
        # Try and reboot tpms, if that fails, reboot station
        if not reboot_tpms(tpms):
            station.standby()
            wait2finish(station)
            station.ON()
            wait2finish(station)
            # if it fails again, sync fails
            if not reboot_tpms(tpms):
                return False
    
    if any(station.TileProgrammingState) != 'Synchronised':
        # config daq and write data
        station.StartAcquisition('{}')
        daq_status = json.loads(daq.DaqStatus())
        daq.stop()
        wait2finish(daq)
        daq.Configure(
            json.dumps(
                {
                    "directory": "/",
                    "nof_tiles": len(tpms),
                    "description": station.name(),
                }
            )
        )
        daq.start('{"modes_to_start": "RAW_DATA"}')
        wait2finish(daq)
        
        daq_status = json.loads(daq.DaqStatus())
        print(daq_status)
        start_time = datetime.utcnow() + timedelta(seconds=5)
        station.SendDataSamples(
            json.dumps(
                {
                    "data_type": "raw",
                    "start_time": start_time.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
                    "sync": True,
                }
            )
        )

        seconds_until_start = (start_time - datetime.utcnow()).total_seconds()
        if seconds_until_start > 0:
            print(f"Waiting {seconds_until_start} seconds for scan to start")
            time.sleep(seconds_until_start)
    
    # set tpms and station to engineering mode
    for tpm in tpms:
        tpm.admin_mode = 2
    station.admin_mode = 2
    daq.admin_mode = 2
    wait2finish(tpm)
    
    for i in range(120):
        if all(station.TileProgrammingState) == 'Synchronised':
            print(f"{station.TileProgrammingState}")
            return True
        timer_print(f"time elapsed: {i/2}, tpms status: {station.TileProgrammingState}", 0.5)

# required to connect to db now
os.environ['TANGO_HOST']='databaseds.skaffold.svc.stfc-ral-k8s.skao.int:10000'
db = tango.Database()
device_proxies = [tango.DeviceProxy(dev_str) for dev_str in filter_devices("low-mccs/*") ]

# get the station, daq and tpm devices
station = tango.DeviceProxy("low-mccs/spsstation/stfc-ral-software")
daq = tango.DeviceProxy("low-mccs/daqreceiver/stfc-ral-software-bandpass")
tpms = [tango.DeviceProxy(tpm_str) for tpm_str in filter_devices("low-mccs/tile/*") ]
print(f"{station.state() = }")



## Setup

In general you want the devices used for the test in engineering mode. This part of the code loops over all proxies and checks their mode, If they are not in in engineering mode, then they have admin mode changed. 30 seconds later the code writes 

In [None]:
# Turn to engineering mode
for device in device_proxies:
    print(f"{device} is in {device.state()} state, turning it to Engineering mode")
    device.adminMode = 2

for i in range(30):
    timer_print(f"Starting devices in progress {i/0.3:.2f}%", 1)

for device in device_proxies:
    print(f"{device} is in {device.state()} state, turning it to Engineering mode")

# make sure the tpms are Synchronised
if all(station.TileProgrammingState) == 'Synchronised':
    print(f"{station.TileProgrammingState}", end="")
else:
    print(sync_tpms(tpms, station, daq))

## Run Self Check tests

In [None]:
# change name to what you want to test
name = "TestRaw"

# Find out more about the test
print(station.DescribeTest("TestRaw"))

# Run the test and wait for it to finish
station.RunTest(json.dumps({"test_name" : name}))

wait2finish(station)

# Print results
print(station.testLogs)
print(station.testReport)

## Run multiple test

It's a bit convoluted, but this is what I run to test more stuff at once.
I like to keep the results in a dictionary to print them out later.

In [None]:
# default number of times to rerun a test:
rerun = 2
# comment out what you don't want to test:
test_list = {
    # "BaseDaqTest": rerun,
    # "TestResult": rerun,
    # "TpmSelfCheckTest": rerun,
    # "TestBeam": rerun,
    # "TestTileBeamformer" : rerun,
    # "TestChannel" : rerun,
    # "TestTilePointing" : rerun,
    # "TestIntegratedBeam": rerun,
    # "TestIntegratedChannel": rerun,
    # "TestRaw": rerun,
    "TestAntennaBuffer": 2, # If you want a test to run multiple times, you can just change here
    # "InitialiseStation": rerun,
    # "BasicTangoTest": rerun,
    # "TestStationBeamDataRate": rerun,
}

def run_test(name: str ="TestRaw", i: int = 0):
    """Runs the test and parses result"""
    station.RunTest(json.dumps({"test_name" : name}))
    time.sleep(0.5) # wait for it to get queued up
    wait2finish(station, f"{name} run {i}", timeout=6000) # time out is 10 min.

    report = station.testReport
    logs = station.testlogs
    if "Result: FAILED" in report:
        return 'FAILED', logs
    elif "Result: PASSED":
        return 'PASSED', logs
    elif "Result: ERRORED":
        return 'ERRORED', logs
    elif "Result: NOT_RUN":
        return 'NOT_RUN', logs


results = {}
for test_name, rerun_count in test_list.items():
    results[test_name] = []
    for i in range(rerun_count):
        results[test_name].append(run_test(test_name, i))

Here is how I write the results.
I like to add filters for a few common errors so that I can stop printing the entire message

In [None]:
for key, value in results.items():
    print(f"\n {key}:")
    print("="*len(key)+"===\n")
    for item in value:
        if "Test succeeded" in item[1] and  item[0] == "PASSED":
            print(f"PASSED", end="\t")
        elif "assert self._data_created_event.wait" in item[1]:
            print(f"FAILED: No data received", end="\t")
        elif "This test is skipped" in item[1]:
            print(f"SKIPPED", end="\t")
        else:
            print(f"{item[0]} {item[1][-500:0]}", end="\n")
    print(f"\n------------------------------------------------------------------\n")