# A how to guide for running self-check tests on the station.

You can find the test in `/home/gsc/SKAO/ska-low-mccs-spshw/src/ska_low_mccs_spshw/station/tests`. 

This group of tests are meant to run on an actual k8s deployment.

## Usefull tools

Here are some methods I use for the rest of these tests:

In [None]:
from ska_control_model import AdminMode
from datetime import datetime, timedelta
import tango
import time
import json

def timer_print(string: str, delay):
    # this function just resets the cursor at the start of the line after a delay.
    # it's great when you need to wait on something to finish, but you want feedback.
    print(string, end="")
    time.sleep(delay)
    print("\r", end="")

def wait2finish(device, additional_string = None, timeout=600, freq=0.1):
    # Wait for device to finish long running commands
    time.sleep(1)
    for i in range(timeout):
        if len(device.longrunningcommandsinqueue) == 0:
            break
        timer_print(f"{additional_string}{device.longrunningcommandsinqueue}, {i*freq:.1f} seconds elapsed", freq)


## Setup

In general you want the devices used for the test in engineering mode. This part of the code loops over all proxies and checks their mode, If they are not in in engineering mode, then they have admin mode changed. 30 seconds later the code writes 

In [None]:
db = tango.Database()
all_devices_strings = db.get_device_exported("low-mccs/*")

# Check all devices and save the ones that are not online
bad_devices = []
for device_str in all_devices_strings:
    device = tango.DeviceProxy(device_str)
    if device.status() != AdminMode.ONLINE:
        print(f"{device_str} is in {device.state()} state, changing to ON")
        device.adminMode = 0
        bad_devices.append(device_str)
    else:
        print(f"{device_str} is in {device.state()} state")

# this part is just a lazy progress bar.
for i in range(30):
    timer_print(f"Starting devices in progress {i/0.3:.2f}%", 1)

# print out the ones that were offline
for device_str in bad_devices:
    device = tango.DeviceProxy(device_str)
    print(f"{device_str} is in {device.state()} state")

# get the station, daq and tpm devices
station = tango.DeviceProxy("low-mccs/spsstation/stfc-ral-software")
daq = tango.DeviceProxy("low-mccs/daqreceiver/stfc-ral-software-bandpass")
tpms = [ tango.DeviceProxy(tpm_str) for tpm_str in db.get_device_exported("low-mccs/tile/*") ]
print(f"{station.state() = }")

# Turn to engineering mode
for device_str in all_devices_strings:
    device = tango.DeviceProxy(device_str)
    print(f"{device_str} is in {device.state()} state, turning it to Engineering mode")
    device.adminMode = 2

for i in range(30):
    timer_print(f"Starting devices in progress {i/0.3:.2f}%", 1)

for device_str in all_devices_strings:
    device = tango.DeviceProxy(device_str)
    print(f"{device_str} is in {device.AdminMode} state")

## Run Self Check tests

Honestly, its straight forward to run them:

In [None]:
# change name to what you want to test
name = "TestRaw"

# Find out more about the test
print(station.DescribeTest("TestRaw"))

# Run the test and wait for it to finish
station.RunTest(json.dumps({"test_name" : name}))

wait2finish(station)

# Print results
print(station.testLogs)
print(station.testReport)

## Run multiple test

It's a bit convoluted, but this is what I run to test more stuff at once.
I like to keep the results in a dictionary to print them out later.

In [None]:
# default number of times to rerun a test:
rerun = 2
# comment out what you don't want to test:
test_list = {
    # "BaseDaqTest": rerun,
    # "TestResult": rerun,
    # "TpmSelfCheckTest": rerun,
    # "TestBeam": rerun,
    "TestTileBeamformer" : rerun,
    "TestChannel" : rerun,
    "TestTilePointing" : rerun,
    "TestIntegratedBeam": rerun,
    "TestIntegratedChannel": rerun,
    "TestRaw": rerun,
    "TestAntennaBuffer": 10, # If you want a test to runb multiple times, you can just change here
    # "InitialiseStation": rerun,
    # "BasicTangoTest": rerun,
    # "TestStationBeamDataRate": rerun,
}

def run_test(name: str ="TestRaw", i: int = 0):
    """Runs the test and parses result"""
    station.RunTest(json.dumps({"test_name" : name}))
    wait2finish(station, f"{name} run {i}")

    report = station.testReport
    logs = station.testlogs
    if "Result: FAILED" in report:
        return 'FAILED', logs
    elif "Result: PASSED":
        return 'PASSED', logs
    elif "Result: ERRORED":
        return 'ERRORED', logs
    elif "Result: NOT_RUN":
        return 'NOT_RUN', logs


results = {}
for test_name, rerun_count in test_list.items():
    results[test_name] = []
    for i in range(rerun_count):
        results[test_name].append(run_test(test_name, i))

In [None]:
for key, value in results.items():
    print(f"\n {key}:")
    print(f"========================= \n")
    for item in value:
        if item[0] == 'PASSED':
            print(f"{item[0]} {item[1]}")
        else:
            print(f"{item[0]} {item[1]}")

## Troubleshooting steps

### Tpms syncronization

The tpms must be syncronized, and sometimes they don't start that way
This is a bit of code that tries to reset them and sync them again.

In [None]:
def check_tpms(tpms):
    """Check the state of tpms and return a list of those that are not online."""
    bad_tpms = []
    for tpm in tpms:
        if tpm.state() != AdminMode.ONLINE:
            bad_tpms.append(tpm)
    return bad_tpms

def fix_tpms(tpms):
    """Attempt to fix tpms that are not online."""
    bad_tpms = check_tpms(tpms)
    for i in range(3):
        if len(bad_tpms) == 0:
            break
        else:
            print(f"Atempt {i+1} at fixing tpms")
        for tpm in bad_tpms:
            if tpm.state() != AdminMode.ONLINE:
                tpm.ON()
        time.sleep(10)
        bad_tpms = check_tpms(tpms)
        for tpm in bad_tpms:
            print(f"{tpm.name()} is in {tpm.state()} state")
    if any(bad_tpms):
        return False
    return True

def wait2finish(device):
    for i in range(600):
        if len(device.longrunningcommandsinqueue) == 0:
            break
        print(f"{device.longrunningcommandsinqueue} progress {i/6:.2f}%", end="")
        time.sleep(0.1)
        print("\r", end="")
    print(device.state())

def sync_tpms(tpms):
    if any(station.TileProgrammingState) != 'Syncronized':
        # Try and reboot tpms, if that fails, reboot station
        if not fix_tpms(tpms):
            station.standby()
            wait2finish(station)
            station.ON()
            wait2finish(station)
            if not fix_tpms(tpms):
                return "Failed to sync"
        else:
            time.sleep(5)
    
    if any(station.TileProgrammingState) != 'Syncronized':    
        station.StartAcquisition('{}')
        daq_status = json.loads(daq.DaqStatus())
        daq.stop()
        wait2finish(daq)
        daq.Configure(
            json.dumps(
                {
                    "directory": "/",
                    "nof_tiles": len(tpms),
                    "description": station.name(),
                }
            )
        )
        daq.start('{"modes_to_start": "RAW_DATA"}')
        wait2finish(daq)
        
        daq_status = json.loads(daq.DaqStatus())
        print(daq_status)
        start_time = datetime.utcnow() + timedelta(seconds=5)
        station.SendDataSamples(
            json.dumps(
                {
                    "data_type": "raw",
                    "start_time": start_time.strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
                    "sync": True,
                }
            )
        )

        seconds_until_start = (start_time - datetime.utcnow()).total_seconds()
        if seconds_until_start > 0:
            print(f"Waiting {seconds_until_start} seconds for scan to start")
            time.sleep(seconds_until_start)
        for i in range(120):
            if all(station.TileProgrammingState) == 'Syncronized':
                break
            print(f"{station.TileProgrammingState}", end="")
            time.sleep(0.5)
            print("\r", end="")

station = tango.DeviceProxy("low-mccs/spsstation/stfc-ral-software")
daq = tango.DeviceProxy("low-mccs/daqreceiver/stfc-ral-software-bandpass")
tpms = [ tango.DeviceProxy(tpm_str) for tpm_str in db.get_device_exported("low-mccs/tile/*") ]
print(sync_tpms(tpms))