# Example Benchmarking for SPARCL

In [None]:
# import some helpful python packages 
import numpy as np
import pandas as pd
import time
from astropy.table import Table

import matplotlib.pyplot as plt

## DataLab related modules
from sparcl.client import SparclClient
from dl import queryClient as qc

In [None]:
%%time
## Define here astrosparcl vs. astrosparcl2
client_astrosparcl = SparclClient()
client_astrosparcl2 = SparclClient(url='https://astrosparcl2.datalab.noirlab.edu')

In [None]:
client_astrosparcl

In [None]:
client_astrosparcl2

In [None]:
def run_desi_query(NN, randomid=[0,1]):

    # Query for DESI; keep only objects with a unique spectrum to avoid duplicates
    query = f'''SELECT targetid FROM desi_dr1.zpix 
                WHERE zcat_nspec=1 AND random_id BETWEEN {randomid[0]} AND {randomid[1]} LIMIT {NN}'''

    t = qc.query(sql=query, fmt='table')

    return(t)

In [None]:
def run_sdss_query(NN, randomid=[0,1]):

    # Query for SDSS; already unique per specobjid
    query = f'''SELECT specobjid FROM sdss_dr16.specobj 
                WHERE random_id BETWEEN {randomid[0]} AND {randomid[1]} LIMIT {NN}'''

    t = qc.query(sql=query, fmt='table')

    return(t)

In [None]:
# Define the fields to include in the retrieve function (common to SDSS and DESI)
inc = ['specid', 'redshift', 'specprimary', 'wavelength', 'flux', 'ivar']

In [None]:
def get_spec_timing(specids, dataset=['DESI-DR1']):
## Version for astrosparcl
## dataset = 'DESI-DR1' by default, can set 'SDSS-DR16' or others by request
    
    start_time = time.time()

    ## Retrieve spectra
    results = client_astrosparcl.retrieve_by_specid(specid_list=specids, include=inc, dataset_list=dataset, limit=24000)
    
    ## Time rounded to 0.1 sec
    duration = np.round((time.time() - start_time), 1)
    print(f'Querying N={len(specids)}; retrieved N={results.count} in {duration} sec')

    ## save space
    results = 0

    return(duration)

In [None]:
def get_spec_timing_2(specids, dataset=['DESI-DR1']):
## Version for astrosparcl2
## dataset = 'DESI-DR1' by default, can set 'SDSS-DR16' or others by request

    start_time = time.time()

    ## Retrieve spectra
    results = client_astrosparcl2.retrieve_by_specid(specid_list=specids, include=inc, dataset_list=dataset, limit=24000)

    ## Time rounded to 0.1 sec
    duration = np.round((time.time() - start_time), 1)
    print(f'Querying N={len(specids)}; retrieved N={results.count} in {duration} sec')

    ## save space
    results = 0

    return(duration)

In [None]:
# Define the Nb to retrieve
## Focus on small numbers up to 200
Ns = [1, 2, 4, 20, 40, 200] ##, 2000, 20000] # can extend to 24,000

out = Table()
out['N'] = Ns
# astrosparcl
out['T_retrieve_DESI'] = 0.0
out['T_retrieve_SDSS'] = 0.0
# astrosparcl2
out['T2_retrieve_DESI'] = 0.0
out['T2_retrieve_SDSS'] = 0.0

for i, N in enumerate(Ns):

    # Query for SDSS specobjid's
    ## NOTE: need to change code for randomid for SDSS as fct(N bins) and sample size)
    t_sdss = run_sdss_query(N) #, randomid=[0+i*0.1, 0.3+i*0.1])
    ids = t_sdss['specobjid']
    ids = ids.astype(int).tolist()    

    # Call retrieve and check timing for SDSS
    out['T_retrieve_SDSS'][i] = get_spec_timing(ids, dataset=['SDSS-DR16','BOSS-DR16'])
    out['T2_retrieve_SDSS'][i] = get_spec_timing_2(ids, dataset=['SDSS-DR16','BOSS-DR16'])

In [None]:
out

In [None]:
for i, N in enumerate(Ns):
    
    # Query for DESI targetid's
    t_desi = run_desi_query(N, randomid=[0+i*0.1, 0.1+i*0.1])
    ids = t_desi['targetid']
    ids = ids.astype(int).tolist()

    # Call retrieve and check timing
    out['T_retrieve_DESI'][i] = get_spec_timing(ids)
    out['T2_retrieve_DESI'][i] = get_spec_timing_2(ids)

In [None]:
out

In [None]:
#out_save = out.copy()
#out_save

In [None]:
#outfile = "results_250616_astrosparcl.fits"
#out_save['N','T_retrieve_DESI','T_retrieve_SDSS'].write(outfile, overwrite=False)

In [None]:
#outfile = "results_250613_astrosparcl2.fits"
#out_save['N','T2_retrieve_DESI','T2_retrieve_SDSS'].write(outfile, overwrite=False)