# 5. Benchmark `deeplake`

In [1]:
import deeplake
import os
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import IPython.display as ipd
from utils import time_me, time_me_seconds, SAMPLE_RATE
from benchmarks import run_all_benchmarks

In [2]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./secret/hpml-399816-55d1dc19c012.json"

In [3]:
def deeplake_cropper(crop_duration):
    crop_samples = int(SAMPLE_RATE * crop_duration)

    def crop(audio):
        audio = audio[:, 0]
        audio = audio / np.abs(audio).max()  # peak normalize
        audio = audio.astype(np.float32)  # convert to f32
        num_samples = audio.shape[0]
        assert num_samples != 0
        if num_samples < crop_samples:
            # pad if input is short
            audio = np.pad(audio, (0, crop_samples - num_samples))
        elif num_samples > crop_samples:
            # crop if it is too long
            rand_start = np.random.randint(0, num_samples - crop_samples)
            audio = audio[rand_start : rand_start + crop_samples]
        else:
            # just right :)
            pass
        assert audio.shape[0] == crop_samples

        return audio[None, :]

    return crop

In [4]:
@time_me_seconds
def build_deeplake_dataloader(
    deeplake_path,
    crop_duration=3.0,
    batch_size=32,
    shuffle_buffer=2048,
    num_workers=4,
    pin_memory=True,
):
    shuffle_buffer_size = (
        310 / 2048 * shuffle_buffer
    )  # deeplake uses MB as measurement for buffer
    ds = deeplake.load(deeplake_path)
    dataloader = ds.pytorch(
        tensors=["audio", "accent", "gender", "age"],
        transform={
            "audio": deeplake_cropper(crop_duration=crop_duration),
            "accent": None,
            "gender": None,
            "age": None,
        },
        num_workers=num_workers,
        batch_size=batch_size,
        pin_memory=pin_memory,
        shuffle=True,
        buffer_size=shuffle_buffer_size,
    )
    single_worker_dataloader = ds.pytorch(
        tensors=["audio", "accent", "gender", "age"],
        transform={
            "audio": deeplake_cropper(crop_duration=crop_duration),
            "accent": None,
            "gender": None,
            "age": None,
        },
        num_workers=0,
        batch_size=batch_size,
        pin_memory=pin_memory,
        shuffle=True,
        buffer_size=shuffle_buffer_size,
    )
    return dataloader, single_worker_dataloader

In [5]:
local_deeplake_path = "./data/deeplake/"
cloud_deeplake_path = "gcs://hpml-project/deeplake/"

In [6]:
tag = "deeplake_local"
dataloader, single_worker_dataloader = build_deeplake_dataloader(local_deeplake_path)
run_all_benchmarks(dataloader, single_worker_dataloader, tag=tag)


[2023-12-18_21-41-37]	
+-----------------------------------+
|Beginning build_deeplake_dataloader|
+-----------------------------------+
./data/deeplake/ loaded successfully.






[2023-12-18_21-41-42]	
+-------------------------------------------+
|build_deeplake_dataloader took 5.62 seconds|
+-------------------------------------------+



STAGE:2023-12-18 21:41:43 204585:204585 ActivityProfilerController.cpp:311] Completed Stage: Warm Up


Number of parameters of model: 64,628,259


  0%|                                                                                                                                                                                                                        | 0/200 [00:00<?, ?it/s]
Please wait, filling up the shuffle buffer with samples.:   0%|                                                                                                                                                           | 0.00/296M [00:00<?, ?B/s][A
Please wait, filling up the shuffle buffer with samples.:   0%|                                                                                                                                                 | 188k/296M [00:06<2:48:28, 30.6kB/s][A
Please wait, filling up the shuffle buffer with samples.:   8%|███████████▋                                                                                                                                      | 23.6M/296M [00:06<00:52, 5.44MB/s][A
Please 

Shuffle buffer filling is complete.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:19<00:00, 10.18it/s]
STAGE:2023-12-18 21:42:03 204585:204585 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-12-18 21:42:03 204585:204585 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


Time to 1st batch: 8.93 seconds


In [7]:
tag = "deeplake_cloud"
dataloader, single_worker_dataloader = build_deeplake_dataloader(cloud_deeplake_path)
run_all_benchmarks(dataloader, single_worker_dataloader, tag=tag)


[2023-12-18_21-42-26]	
+-----------------------------------+
|Beginning build_deeplake_dataloader|
+-----------------------------------+


/

gcs://hpml-project/deeplake/ loaded successfully.



 

[2023-12-18_21-43-18]	
+--------------------------------------------+
|build_deeplake_dataloader took 52.12 seconds|
+--------------------------------------------+



STAGE:2023-12-18 21:43:19 204585:204585 ActivityProfilerController.cpp:311] Completed Stage: Warm Up


Number of parameters of model: 64,628,259


  0%|                                                                                                                                                                                                                        | 0/200 [00:00<?, ?it/s]
Please wait, filling up the shuffle buffer with samples.:   0%|                                                                                                                                                           | 0.00/296M [00:00<?, ?B/s][A
Please wait, filling up the shuffle buffer with samples.:   0%|                                                                                                                                                 | 188k/296M [00:11<5:21:31, 16.1kB/s][A
Please wait, filling up the shuffle buffer with samples.:   8%|███████████▋                                                                                                                                      | 23.6M/296M [00:13<01:50, 2.59MB/s][A
Please 

Shuffle buffer filling is complete.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [01:20<00:00,  2.48it/s]
STAGE:2023-12-18 21:44:39 204585:204585 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-12-18 21:44:39 204585:204585 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


Time to 1st batch: 21.96 seconds
