# Dataset Index - SKA[Template]
An index of all datasets that were collected for SKA[Template]

                                                               As of 25/06/2024

In [None]:
import os
import katdal

import sys; sys.path.append('/home/aph/work/dvs')
from dvs import cbid2url
cbid2fn = lambda cbid: f"./l1_data/{cbid}/{cbid}_sdp_l0.full.rdb"
from analysis.katselib import ls_archive

## Identify Datasets generated by Dish-AIV for the Test Report

In [None]:
recs = ls_archive("Antennas:*s0002* AND ProposalId:DISH-AIV " +
                  "AND StartTime:[2024-09-01T00:00:00Z TO 2024-12-12T10:00:00Z]",
                  min_duration=10, fields=["CaptureBlockId", "ExperimentID","Description","CenterFrequency","InstructionSet"])

In [None]:
# Omit records that are deemed to be unusable
ignore_recs = {} # cbid:Motivation text

recs = [r for r in recs if (int(r["CaptureBlockId"]) not in ignore_recs.keys())]

## Download the Datasets

In [None]:
# !dvs/bin/mvf_copy.py http://archive-gw-1.kat.ac.za/1719214722/1719214722_sdp_l0.full.rdb ./l1_data

def download(cbid, check=False, cacheroot="./l1_data"):
    if not os.path.exists(cbid2fn(cbid)):
        !python ../bin/mvf_copy.py {cbid2url(cbid)} {cache_root}
            
        # Delete empty folders left when there's an error
        if (len(os.listdir(f"{cache_root}/{cbid}-sdp-l0/correlator_data")) == 0):
            !rm -Rf {cache_root}/{cbid}*
            return False
    
    if check: # Check for data corruption and delete if there's an error
        print(f"Verifying {cbid}...", end="")
        if test_integrity(cbid):
            print(" OK.")
            return True
        else:
            print(" Corrupt!")
            !rm -Rf {cache_root}/{cbid}*
            return False

def test_integrity(cbid): # NB: only valid if done within ~30 days of download!!!
    try:
        ds = katdal.open(cbid2fn(cbid))
        vis2 = ds.vis[:]
        ds = katdal.open(cbid2url(cbid))
        vis1 = ds.vis[:]
        return np.array_equal(vis1, vis2)
    except:
        return False

In [None]:
failed_downloads = []

for r in recs:
    if not download(r["CaptureBlockId"], check=True):
        failed_downloads.append(r)

In [None]:
print("The following datasets could not be downloaded successfully.")
for r in failed_downloads:
    print(r["CaptureBlockId"], r["Description"])