In [24]:
import sys
import os
from torch.utils.data import DataLoader
sys.path.append("..")
from tqdm import tqdm
import shutil
import torch
import numpy as np

from utils import train_test_split, create_metadata_df
from dataset_xy import CARLADatasetXY
from data_preprocessing import preprocessing

# Prep & save to disk

In [2]:
path_data = os.path.join("..", "..", "data", "data")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }

# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])

# Create Dataset & DataLoader
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data, config=config_xy)


In [4]:
def prep_to_disk(format):
    assert format in [".npy", ".npz"]
    fn_save = np.save if format == ".npy" else np.savez_compressed
    # save npy/ npz
    df_meta = dataset.df_meta_data
    for idx in tqdm(range(len(df_meta))):
        path_parts = dataset.df_meta_data["dir"][idx].split(os.sep)
        path_parts[path_parts.index("data") + 1] += "_prep_"
        dir_name_zip = os.path.join(*path_parts, "rgb")
        if not os.path.exists(dir_name_zip):
            os.makedirs(dir_name_zip)
            shutil.copytree(os.path.join(dataset.df_meta_data["dir"][idx], "measurements"), os.path.join(*path_parts, "measurements"))
        path = os.path.join(df_meta.iloc[idx][0], "rgb", df_meta.iloc[idx][1])
        img_np = dataset.load_data_from_path(path)
        img_torch = torch.Tensor(img_np)
        # img_torch_prep = preprocessing["rgb"](img_torch)
        # img_np_prep = img_torch_prep.numpy()
        filename_np = os.path.join(dir_name_zip, f"{df_meta.iloc[idx]['rgb'].split('.')[0]}{format}")
        # torch.save(img_torch_prep, filename_torch)
        with open(filename_np, 'wb') as f:
            fn_save(f, img_np)

In [5]:
prep_to_disk(".npy")

  0%|          | 798/258841 [00:06<36:47, 116.91it/s]


KeyboardInterrupt: 

# Loading the prep data

In [8]:
path_data = os.path.join("..", "..", "data", "data_prep")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }

# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])

# Create Dataset & DataLoader
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data, config=config_xy)


In [9]:
dataset.get_statistics()

Unnamed: 0,rgb_in_GB,measurements_in_GB,driving_time,%_of_entire_data
0,83.95,1.14,"1 day, 11:57:13",100.0


## Check speed: Loading and prep on fly

In [12]:
path_data = os.path.join("..", "..", "data", "data")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }
# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])

batch_size=64
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data.head(batch_size * 20), config=config_xy)
dl_prep = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=0, sampler=None)

In [13]:
for x, y in tqdm(dl_prep):
    x["rgb"] = preprocessing["rgb"](torch.squeeze(x["rgb"]))

100%|██████████| 20/20 [00:11<00:00,  1.67it/s]


## Check speed: Loading .npy (but unprep because when preped 1.8MB per file because float) and then prep
See if loading and .npy and preprocessing is still a lot faster than loading .png and preprocessing

In [25]:
path_data = os.path.join("..", "..", "data", "data_prep")

config_xy = {"used_inputs": ["rgb", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "y": ["brake", "steer", "throttle"],
        "seq_len": 1
        }

# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])

batch_size=64
dataset = CARLADatasetXY(root_dir=path_data, df_meta_data=df_meta_data.head(batch_size * 20), config=config_xy)
dl_prep = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=0, sampler=None)

In [17]:
torch.squeeze(x["rgb"]).shape

torch.Size([64, 160, 960])

In [15]:
x["rgb"].shape

torch.Size([64, 1, 160, 960])

In [9]:
for x, y in tqdm(dl_prep):
    x["rgb"] = preprocessing["rgb"](torch.squeeze(x["rgb"]))

  0%|          | 0/20 [00:00<?, ?it/s]


RuntimeError: The size of tensor a (64) must match the size of tensor b (3) at non-singleton dimension 0

In [30]:
path = "/Users/julianvonklitzing/Documents/GitHub/end2endappras/data/data_prep/coke_dataset_23_11/Routes_Scenario3_Town01_curved_Seed1000/Scenario3_Town01_curved_route0_11_23_20_02_59/rgb/0000.npy"
# with  np.load(path, allow_pickle=True) as f:
#     data = f["arr_0"]
