# Goal
Find out if the Dataset implementation, specifically the get_item() method is to slow (single thread). <br>
From former experiments it could be shown that it takes around 42 min (to be repeated to be sure!) to load x,y batches using the DataLoader for loading rgb and measurements for the entire Dataset. Possible spots of inefficiency in the current implementation:
1. To many for loops & if statements in get_item() 
2. Data structure input_idx that get created (every time) in get_item()
3. Path generation every time by accessing the DataFrame and joining


<br>
Now "hardcoded", specific, non-generalizable approach to get an optimal single thread benchmark: <br>

1. only consider rgb and the measuremnts needed
2. hardcoded/fully-precomputed paths
3. no loops
4. create x, y
--> 35min 27s

Remark: Only pure DataLoading without preprocessing is tested!

In [1]:
import sys
import os
from torch.utils.data import DataLoader
sys.path.append("..")
from tqdm import tqdm
import shutil
import torch
import numpy as np
import pandas as pd

from utils import train_test_split, create_metadata_df
from dataset_xy import CARLADatasetXY
from data_preprocessing import preprocessing
from dataset_xy_opt import CARLADatasetXYOpt
import cv2
import json

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
path_data = os.path.join("..", "..", "data", "data")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }

# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])

# Create Dataset & DataLoader
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data, config=config_xy)

In [17]:
df_meta_data

Unnamed: 0,dir,rgb,measurements
0,../../data/data/cycl_dataset_23_11/Routes_non-...,0000.png,0000.json
1,../../data/data/cycl_dataset_23_11/Routes_non-...,0001.png,0001.json
2,../../data/data/cycl_dataset_23_11/Routes_non-...,0002.png,0002.json
3,../../data/data/cycl_dataset_23_11/Routes_non-...,0003.png,0003.json
4,../../data/data/cycl_dataset_23_11/Routes_non-...,0004.png,0004.json
...,...,...,...
258836,../../data/data/dirt_dataset_23_11/Routes_Scen...,0189.png,0189.json
258837,../../data/data/dirt_dataset_23_11/Routes_Scen...,0190.png,0190.json
258838,../../data/data/dirt_dataset_23_11/Routes_Scen...,0191.png,0191.json
258839,../../data/data/dirt_dataset_23_11/Routes_Scen...,0192.png,0192.json


# Benchmark: 

In [19]:
# paths are completely precomputed and must not be computed again
df_paths_rgb = df_meta_data["dir"] + os.sep + "rgb" + os.sep + df_meta_data["rgb"]
df_paths_measurements = df_meta_data["dir"] + os.sep + "measurements" + os.sep + df_meta_data["measurements"]

In [5]:
# When using num_workers= 0 in DataLoader this is essentially what is happening
def run_baseline():
    for i in tqdm(range(len(df_meta_data))):

        x_sample = dict()
        y_sample = dict()
        img = cv2.imread(df_paths_rgb.loc[i])
        # reshape to #channels; height; width
        img = img.reshape([3] + list(img.shape)[:-1])

        with open(df_paths_measurements.loc[i], 'r') as f:
            measurements = json.load(f)
        speed = measurements["speed"]
        throttle = measurements["throttle"]
        command = measurements["command"]
        steer = measurements["steer"]

        x_sample["rgb"] = img
        x_sample["speed"] = speed
        x_sample["command"] = command
        y_sample["throttle"] = throttle
        y_sample["command"] = command
    y_sample["steer"] = steer

100%|██████████| 258841/258841 [35:27<00:00, 121.69it/s]


# Self implemented parallel PandasDataLoader

In [21]:
def load_rgb(path):
    img = cv2.imread(df_paths_rgb.loc[i])
        # reshape to #channels; height; width
    img = img.reshape([3] + list(img.shape)[:-1])
    return img

In [22]:
def load_measurements(path):
    with open(path, 'r') as f:
        measurements = json.load(f)
    # speed = measurements["speed"]
    # throttle = measurements["throttle"]
    # command = measurements["command"]
    # steer = measurements["steer"]
    # return speed, throttle, command, steer
    return measurements

In [61]:
# 31.6 --> so also not faster
def pandas_data_loader():
    for i in tqdm(range(100)):   #int(len(df_meta_data))
        batch_size = 64
        batch_rand_idxs = np.random.randint(0, len(df_meta_data), size=batch_size)

        df_rgb = df_paths_rgb.loc[batch_rand_idxs].apply(load_rgb)
        rgb_np = np.stack(df_rgb.values)
        rgb_torch = torch.tensor(rgb_np)

        df_measurements = df_paths_measurements.loc[batch_rand_idxs].apply(load_measurements)

        df_speed = df_measurements.apply(lambda x: x["speed"])
        speed_np = df_speed.to_numpy()
        speed_torch = torch.tensor(rgb_np)

        df_command = df_measurements.apply(lambda x: x["command"])
        command_np = df_command.to_numpy()
        command_torch = torch.tensor(rgb_np)

        df_throttle = df_measurements.apply(lambda x: x["throttle"])
        throttle_np = df_throttle.to_numpy()
        throttle_torch = torch.tensor(rgb_np)

        df_steer = df_measurements.apply(lambda x: x["steer"])
        steer_np = df_steer.to_numpy()
        steer_torch = torch.tensor(rgb_np)

        df_brake = df_measurements.apply(lambda x: x["brake"])
        brake_np = df_brake.to_numpy()
        brake_torch = torch.tensor(brake_np)

        x_sample = {"rgb": rgb_torch, "speed": speed_torch, "command": command_torch}
        y_sample = {"steer": steer_torch, "throttle": throttle_torch, "brake": brake_torch}

100%|██████████| 100/100 [00:31<00:00,  3.17it/s]


# Test specialized DataLoader

In [6]:
df_meta_data_2 = df_meta_data.head(64 * 100)

In [12]:
ds_opt = CARLADatasetXYOpt(df_meta_data_2)
dl_opt = DataLoader(dataset=ds_opt, batch_size=64, num_workers=4, sampler=None, shuffle=False)

In [13]:
# worker=0   52.9s
# worker=2  32.1s
# worker=4  33.8s (but until 97% it went really quick)
# still need to test it how it performs in training loop!
for x, y in tqdm(dl_opt):
    pass

100%|██████████| 100/100 [00:33<00:00,  2.96it/s]


In [14]:
ds = dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data_2, config=config_xy)
dl = DataLoader(dataset=ds_opt, batch_size=64, num_workers=0, sampler=None, shuffle=False)

In [15]:
# 32.2s seems like this new/specialized isn't better than the old/generalized DataLoader
for x, y in tqdm(dl):
    pass

100%|██████████| 100/100 [00:32<00:00,  3.11it/s]


# Test DataLoader speed with preprocessed files

In [65]:
path_data = os.path.join("..", "..", "data", "data_prep")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }

# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])
df_meta_data_2 = df_meta_data.head(64*100)

# Create Dataset & DataLoader
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data_2, config=config_xy)
dl_npz = DataLoader(dataset=dataset, batch_size=64, num_workers=4, sampler=None, shuffle=False)

In [66]:
# 42.2s so longer than with with .png files
for x, y in tqdm(dl_npz):
    pass

100%|██████████| 100/100 [00:42<00:00,  2.37it/s]
