GEBCO’s Gridded Bathymetry Data
source: Local

Size: 7.5GB

In [1]:
from app.dataprocessing.data_handler import DataHandler
from app.dataprocessing.benchmark import plt_img, save_simple_img, demo_plt_img
from app.datastructures.datastructure_interface import get_ipyleaflet_bounds
from app.dataprocessing.benchmark import Stopwatch

from dotenv import load_dotenv
import os


#### init

In [2]:
def setup(c_size, struct):
    data_handler = DataHandler()
    data_handler.set_max_chunk_size(c_size)

    file_path = "app/externalresources/datasets/GEBCO_2022_sub_ice_topo.nc"

    # data_handler.set_local_netcdf_reader(file_path, 'KDTree')
    data_handler.set_local_netcdf_reader(file_path, struct)

    print(f"Data structure in use: {data_handler.data_structure}")

    initial_ds, bnds, node = data_handler.get_initial_ds()
    # initial_ds.to_netcdf("panoplydemo.nc")
    data_variable = "elevation"
    img_format = "png"

    arr = initial_ds[data_variable].isel(
        {
            x: 0
            for x in initial_ds.dims
            if x not in ["lat", "lon", "latitude", "longitude"]
        }
    )
    vmin_glob = arr.min().values
    vmax_glob = arr.max().values

    bounds = get_ipyleaflet_bounds(initial_ds)
    return data_handler


In [3]:
query_1 = ((-90, 90), (-180, 180))
query_2 = ((-74.09, -3.78), (-155.45, -56.49))
query_3 = ((-40.46, -5.30), (-68.70, -19.21))
query_4 = ((-9.20, -0.41), (-88.42, -76.05))

sequence = [query_1, query_2, query_3, query_4]
sizes = [10, 50, 100]
structs = ["QuadTree", "KDTree"]


In [4]:
overleaf_meta_log = []
overleaf_data_log = []

for struct in structs:
    for size in sizes:
        print(f"----------------------------")
        print(f"{struct} with chunksize: {size}")
        overleaf_meta_log.append((struct, size))
        data_handler = setup(size, struct)
        for idx, s in enumerate(sequence):
            print(f"--- step {idx} ---")
            print(f"query bounds: {s}")
            timer = Stopwatch()

            # average time over 10 runs
            total_time = 0
            for _ in range(10):
                timer.start("Fetch netCDF chunk")
                file_name, bounds, node = data_handler.request_data_netcdf(
                    s, return_xr_chunk=True
                )
                fetch_time = timer.alt_stop()
                total_time += fetch_time
            avg_time = f"{total_time/10:0.4f}"

            # ref (-89.99, 89.88), (-179.99, 179.88) & 0.14\% & 9.83 & 1.0214s \\
            p_bounds = f"({bounds[0][0]:.2f}, {bounds[0][1]:.2f}), ({bounds[1][0]:.2f}, {bounds[1][1]:.2f})"
            #overleaf_log.append(
            #    f"{p_bounds} & {data_handler.get_node_resolution(node):.2f}\% & {data_handler.get_file_size_MB(file_name):.2f} & {avg_time}"
            #)
            overleaf_data_log.append((p_bounds, f"{data_handler.get_node_resolution(node):.2f}", f"{data_handler.get_file_size_MB(file_name):.2f}", avg_time))

            """            print(f"closest chunk bounds: {bounds}")
            print(
                f"Chunk resolution: {data_handler.get_node_resolution(node):.2f}%, File size: {data_handler.get_file_size_MB(file_name):.2f} MB"
            )
            print(fetch_time)
            """

----------------------------
QuadTree with chunksize: 10
Finished 'Loading dataset' in 0.1108 seconds
Finished 'Creating data structure' in 32.1814 seconds
Data structure in use: QuadTree with 1024 chunks at lowest level of max chunk size 10MB
--- step 0 ---
query bounds: ((-90, 90), (-180, 180))
--- step 1 ---
query bounds: ((-74.09, -3.78), (-155.45, -56.49))
--- step 2 ---
query bounds: ((-40.46, -5.3), (-68.7, -19.21))
--- step 3 ---
query bounds: ((-9.2, -0.41), (-88.42, -76.05))
----------------------------
QuadTree with chunksize: 50
Finished 'Loading dataset' in 0.0094 seconds
Finished 'Creating data structure' in 4.7907 seconds
Data structure in use: QuadTree with 256 chunks at lowest level of max chunk size 50MB
--- step 0 ---
query bounds: ((-90, 90), (-180, 180))
--- step 1 ---
query bounds: ((-74.09, -3.78), (-155.45, -56.49))
--- step 2 ---
query bounds: ((-40.46, -5.3), (-68.7, -19.21))
--- step 3 ---
query bounds: ((-9.2, -0.41), (-88.42, -76.05))
----------------------

In [10]:
"""
    \hline
     \multicolumn{4}{|c|}{QuadTree(10MB)} \\
     \hline
     No. & Response bounds & Resolution & File size(MB) & Computation time(10 run avg.) \\
     \hline
     1 & (-90.00, 89.89), (-180.00, 179.89) & 0.14\% & 9.83 & 0.1966 \\
     2 & (-90.00, -0.05), (-180.00, -0.04) & 0.51\% & 9.14 & 0.2393 \\
     3 & (-45.00, -0.02), (-90.00, -0.02) & 2.04\% & 9.14 & 0.1887 \\
     4 & (-11.25, -0.01), (-90.00, -67.51) & 25.00\% & 7.01 & 0.1569 \\
     \hline
     \multicolumn{4}{c}{} \\
"""

idx = 0

for st, sz in overleaf_meta_log:
    print("\hline")
    print(f"\multicolumn{{5}}{{|c|}}{{{st}({sz})}} \\\\")
    print("\hline")
    print(
        "No. & Response bounds & Resolution & File size(MB) & Time(10 run avg.) \\\\"
    )
    print("\hline")
    for i in range(1, 5):
        print(
            f"{i} & {overleaf_data_log[idx][0]} & {overleaf_data_log[idx][1]}\% & {overleaf_data_log[idx][2]} & {overleaf_data_log[idx][3]} \\\\"
        )
        idx += 1
    print("\hline")
    print(f"\multicolumn{{5}}{{c}}{{}} \\\\")


\hline
\multicolumn{5}{|c|}{QuadTree(10)} \\
\hline
No. & Response bounds & Resolution & File size(MB) & Time(10 run avg.) \\
\hline
1 & (-90.00, 89.89), (-180.00, 179.89) & 0.14\% & 9.83 & 1.0244 \\
2 & (-90.00, -0.05), (-180.00, -0.04) & 0.51\% & 9.14 & 0.6265 \\
3 & (-45.00, -0.02), (-90.00, -0.02) & 2.04\% & 9.14 & 0.2040 \\
4 & (-11.25, -0.01), (-90.00, -67.51) & 25.00\% & 7.01 & 0.1498 \\
\hline
\multicolumn{5}{c}{} \\
\hline
\multicolumn{5}{|c|}{QuadTree(50)} \\
\hline
No. & Response bounds & Resolution & File size(MB) & Time(10 run avg.) \\
\hline
1 & (-90.00, 89.95), (-180.00, 179.95) & 0.69\% & 49.55 & 2.5440 \\
2 & (-90.00, -0.02), (-180.00, -0.02) & 2.78\% & 49.55 & 2.5535 \\
3 & (-45.00, -0.01), (-90.00, -0.01) & 11.11\% & 49.55 & 0.8051 \\
4 & (-11.25, -0.00), (-90.00, -67.50) & 100.00\% & 27.90 & 0.1221 \\
\hline
\multicolumn{5}{c}{} \\
\hline
\multicolumn{5}{|c|}{QuadTree(100)} \\
\hline
No. & Response bounds & Resolution & File size(MB) & Time(10 run avg.) \\
\hline
1 