In [27]:
from app.dataprocessing.data_handler import DataHandler
from app.dataprocessing.benchmark import plt_img, save_simple_img, demo_plt_img
from app.datastructures.datastructure_interface import get_ipyleaflet_bounds
from app.dataprocessing.benchmark import Stopwatch
from dotenv import load_dotenv

import numpy as np
import os


In [28]:
def setup(c_size, struct):
    data_handler = DataHandler()
    data_handler.set_max_chunk_size(c_size)

    load_dotenv()
    USERNAME = os.environ.get("CMEMS_CAS_USERNAME")
    PASSWORD = os.environ.get("CMEMS_CAS_PASSWORD")

    dataset_url = 'https://nrt.cmems-du.eu/thredds/dodsC/global-analysis-forecast-phy-001-024-monthly'
    cas_url = "https://cmems-cas.cls.fr/cas/login"

    #constraints = {'time' : 19, 'depth':0}
    constraints = {'depth':0}

    # pre-compute file size once to avoid excessive server calls
    data_handler.set_opendap_cas(cas_url, dataset_url, USERNAME, PASSWORD, constraints=constraints, struct=struct, file_size=5738)


    # Show the auto-selected data structure
    print(f"Data structure in use: {data_handler.data_structure}")
    return data_handler

In [29]:
data_handler = setup(50, "Octree")
print(f'file size estimate: {data_handler.data_source.file_size_MB}')

Data structure in use: Octree with 512 chunks of maximum size 50MB
file size estimate: 5738


In [30]:
time_stamp = (np.datetime64('2020-01-01'), np.datetime64('2020-01-26'))

query_1 = ((-90, 90), (-180, 180), time_stamp)
query_2 = ((-74.09, -3.78), (-155.45, -56.49), time_stamp)
query_3 = ((-40.46, -5.30), (-68.70, -19.21), time_stamp)
query_4 = ((-9.20, -0.41), (-88.42, -76.05), time_stamp)

sequence = [query_1, query_2, query_3, query_4]
sizes = [10, 50, 100]
structs = ["Octree", "KDTree"]

In [31]:
overleaf_meta_log = []
overleaf_data_log = []

for struct in structs:
    for size in sizes:
        print(f"----------------------------")
        print(f"{struct} with chunksize: {size}")
        overleaf_meta_log.append((struct, size))
        data_handler = setup(size, struct)
        for idx, s in enumerate(sequence):
            print(f"--- step {idx} ---")
            print(f"query bounds: {s}")

            timer = Stopwatch()
            timer.start("Fetch netCDF chunk")
            file_name, bounds, node = data_handler.request_data_netcdf(
                s, return_xr_chunk=True
            )
            fetch_time = timer.alt_stop()
            fetch_time = f"{fetch_time:0.4f}"

            p_bounds = f"({bounds[0][0].values:.2f}, {bounds[0][1].values:.2f}), ({bounds[1][0].values:.2f}, {bounds[1][1].values:.2f}), ({str(bounds[2][0].values)[:10]}, {str(bounds[2][1].values)[:10]})"

            overleaf_data_log.append(
                (
                    p_bounds,
                    f"{data_handler.get_node_resolution(node):.2f}",
                    f"{data_handler.get_file_size_MB(file_name):.2f}",
                    fetch_time,
                )
            )


----------------------------
Octree with chunksize: 10
Data structure in use: Octree with 4096 chunks of maximum size 10MB
--- step 0 ---
query bounds: ((-90, 90), (-180, 180), (numpy.datetime64('2020-01-01'), numpy.datetime64('2020-01-26')))
--- step 1 ---
query bounds: ((-74.09, -3.78), (-155.45, -56.49), (numpy.datetime64('2020-01-01'), numpy.datetime64('2020-01-26')))
--- step 2 ---
query bounds: ((-40.46, -5.3), (-68.7, -19.21), (numpy.datetime64('2020-01-01'), numpy.datetime64('2020-01-26')))
--- step 3 ---
query bounds: ((-9.2, -0.41), (-88.42, -76.05), (numpy.datetime64('2020-01-01'), numpy.datetime64('2020-01-26')))
----------------------------
Octree with chunksize: 50
Data structure in use: Octree with 512 chunks of maximum size 50MB
--- step 0 ---
query bounds: ((-90, 90), (-180, 180), (numpy.datetime64('2020-01-01'), numpy.datetime64('2020-01-26')))
--- step 1 ---
query bounds: ((-74.09, -3.78), (-155.45, -56.49), (numpy.datetime64('2020-01-01'), numpy.datetime64('2020-01-

KeyboardInterrupt: 

In [None]:
idx = 0

for st, sz in overleaf_meta_log:
    print("\hline")
    print(f"\multicolumn{{5}}{{|c|}}{{{st}({sz})}} \\\\")
    print("\hline")
    print(
        "No. & Response bounds & Resolution & File size(MB) & Time(10 run avg.) \\\\"
    )
    print("\hline")
    for i in range(1, 5):
        print(
            f"{i} & {overleaf_data_log[idx][0]} & {overleaf_data_log[idx][1]}\% & {overleaf_data_log[idx][2]} & {overleaf_data_log[idx][3]} \\\\"
        )
        idx += 1
    print("\hline")
    print(f"\multicolumn{{5}}{{c}}{{}} \\\\")