In [1]:
import sys
import os
from torch.utils.data import DataLoader
sys.path.append("..")
from tqdm import tqdm
import shutil
import torch
import numpy as np

from utils import train_test_split, create_metadata_df
from dataset_xy import CARLADatasetXY
from data_preprocessing import preprocessing

  from .autonotebook import tqdm as notebook_tqdm


# Prep & save to disk

In [5]:
path_data = os.path.join("..", "..", "data", "data")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }

# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])

# Create Dataset & DataLoader
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data, config=config_xy)


In [6]:
def prep_to_disk(format):
    assert format in [".npy", ".npz"]
    fn_save = np.save if format == ".npy" else np.savez_compressed
    # save npy/ npz
    df_meta = dataset.df_meta_data
    for idx in tqdm(range(len(df_meta))):
        path_parts = dataset.df_meta_data["dir"][idx].split(os.sep)
        path_parts[path_parts.index("data") + 1] += "_prep"
        dir_name_zip = os.path.join(*path_parts, "rgb")
        if not os.path.exists(dir_name_zip):
            os.makedirs(dir_name_zip)
            shutil.copytree(os.path.join(dataset.df_meta_data["dir"][idx], "measurements"), os.path.join(*path_parts, "measurements"))
        path = os.path.join(df_meta.iloc[idx][0], "rgb", df_meta.iloc[idx][1])
        img_np = dataset.load_data_from_path(path)
        img_torch = torch.Tensor(img_np)
        img_torch_prep = preprocessing["rgb"](img_torch)
        img_np_prep = img_torch_prep.numpy()
        filename_np = os.path.join(dir_name_zip, f"{df_meta.iloc[idx]['rgb'].split('.')[0]}{format}")
        # torch.save(img_torch_prep, filename_torch)
        with open(filename_np, 'wb') as f:
            fn_save(f, img_np_prep)

In [7]:
prep_to_disk(".npz")

100%|██████████| 258866/258866 [5:34:22<00:00, 12.90it/s]  


# Loading the prep data

In [8]:
path_data = os.path.join("..", "..", "data", "data_prep")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }

# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])

# Create Dataset & DataLoader
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data, config=config_xy)


In [9]:
dataset.get_statistics()

Unnamed: 0,rgb_in_GB,measurements_in_GB,driving_time,%_of_entire_data
0,83.95,1.14,"1 day, 11:57:13",100.0


In [10]:
0.461 * len(df_meta_data) / 1000

119.33722600000002

In [11]:
0.380 * len(df_meta_data) / 1000

98.36908

## Check speed: Loading and prep on fly

In [12]:
path_data = os.path.join("..", "..", "data", "data")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }
# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])

batch_size=64
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data.head(batch_size * 20), config=config_xy)
dl_prep = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=0, sampler=None)

In [13]:
for x, y in tqdm(dl_prep):
    x["rgb"] = preprocessing["rgb"](torch.squeeze(x["rgb"]))

100%|██████████| 20/20 [00:11<00:00,  1.67it/s]


## Check speed: Loading prep from .npy / .npz (depending on which option was saved to disk)

In [2]:
path_data = os.path.join("..", "..", "data", "data_prep")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }

# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])

batch_size=64
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data.head(batch_size * 20), config=config_xy)
dl_prep = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=0, sampler=None)

In [3]:
for x, y in tqdm(dl_prep):
    x

100%|██████████| 20/20 [00:10<00:00,  1.87it/s]


In [40]:
path = "/Users/julianvonklitzing/Documents/GitHub/end2endappras/data/data_prep/coke_dataset_23_11/Routes_Scenario3_Town01_curved_Seed1000/Scenario3_Town01_curved_route0_11_23_20_02_59/rgb/0000.npz"

with np.load(path, allow_pickle=True) as f:
    data = f["arr_0"]

In [41]:
data

array([[[196, 191, 187, ..., 210, 206, 203],
        [210, 206, 204, ..., 203, 191, 175],
        [203, 191, 175, ..., 196, 191, 187],
        ...,
        [105, 102, 104, ..., 215, 215, 212],
        [214, 214, 211, ..., 193, 217, 230],
        [186, 201, 219, ..., 113, 110, 109]],

       [[105, 104, 104, ..., 214, 213, 209],
        [224, 221, 220, ..., 194, 228, 230],
        [185, 199, 219, ..., 114, 110, 109],
        ...,
        [ 93, 114, 127, ...,  59, 102,  86],
        [ 71,  59,  38, ..., 104, 180, 128],
        [  0, 200, 192, ..., 127, 136, 145]],

       [[117, 133, 144, ...,  66,  99,  77],
        [ 75,  59,  46, ..., 182, 222, 184],
        [193, 236, 224, ..., 126, 134, 140],
        ...,
        [225, 231, 235, ..., 236, 240, 243],
        [236, 240, 243, ..., 224, 230, 235],
        [224, 230, 235, ..., 190, 206, 182]]], dtype=uint8)