In [83]:
import pandas as pd
import cv2
import json
from data_pipeline import CARLADataset, DataLoader
from torchvision import transforms
from torch.utils.data import Sampler
import numpy as np
import torch

In [86]:
"""
Inspiration from https://www.kaggle.com/code/shahules/guide-pytorch-data-samplers-sequence-bucketing

Assigns weights/probabilities to which each index in the dataset gets sampled.
Use cases:
1. Set probabilities of indices to zero which don't have enough previous data point in a specific route
accoring to the specified sequence length.
2. Oversampling of rare events/maneuvers such as turns to balance the dataset (for this additional metadata
will be needed).
"""
class WeightedSampler(Sampler):
    
    def __init__(self, dataset):
        
        self.dataset = dataset
        self.indices = list(range(len(dataset)))
        self.num_samples = len(dataset)
        restricted_idxs = self.__get_restricted_indices()
        weights = np.ones(dataset.__len__()) * 1 / (dataset.__len__() - len(restricted_idxs))
        weights[restricted_idxs] = 0
        
        self.weights = torch.tensor(weights,dtype=torch.double)
        
    def __iter__(self):
        count = 0
        index = [self.indices[i] for i in torch.multinomial(self.weights, self.num_samples, replacement=True)]
        while count < self.num_samples:
            yield index[count]
            count += 1

    def __get_restricted_indices(self):
        """
        Return:
                restricted_idx (list): List contains idxs that shall not be sampled because they don't
                have enough previous/lagged datapoint with respect to seq_len for each route.
        """
        df_meta_data, seq_len = self.dataset.df_meta_data, self.dataset.seq_len
        boarders = df_meta_data["route"].value_counts().sort_index().to_numpy()
        boarders_cumsum = np.cumsum(boarders) - 1
        boarders_cumsum = np.insert(boarders_cumsum, 0, 0)
        restricted_idxs = []
        for i in range(len(boarders_cumsum) - 1):
            if boarders_cumsum[i+1] - boarders_cumsum[i] < seq_len:
                restricted_idxs += list(range(boarders_cumsum[i+1] + 1))
            else:
                if boarders_cumsum[i] == 0:
                    restricted_idxs += list(range(seq_len))
                else:
                    restricted_idxs += list(range(boarders_cumsum[i] + 1, boarders_cumsum[i] + 1 + seq_len))
        return restricted_idxs

    
    def __len__(self):
        return self.num_samples
        
        



In [87]:
path_moritz_data = "myfolder/sample_trainingsdata"
path_ege_data = "../data/Dataset Ege"

data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

config = {"used_inputs": ["rgb", "depth", "measurements"], 
        "used_measurements": ["speed", "steer", "throttle"],
        "seq_len": 2
        }
dataset = CARLADataset(root_dir=path_ege_data, config=config, transform=None)
weighted_sampler = WeightedSampler(dataset=dataset)
print(dataset.__len__())

217


In [88]:
len(dataset[2]["depth"])

2

In [93]:
dl = DataLoader(dataset=dataset, batch_size=16, num_workers=0, sampler=weighted_sampler)

In [94]:
# Weird: When num_workers increases (>0) then it takes longer to loop over batches??
count = 0
for batch in dl:
    count += 1
print(count)

14


In [100]:
batch["depth"][0].shape

torch.Size([9, 160, 960, 3])

In [20]:
# DataLoader open questions:
# Check which indices where used for a specific batch
# --> Solved by adding "idx" attribute to sample
# 

5

In [11]:
image_path = "../data/Dataset Ege/Town10HD_Scenario10_route3_11_28_17_28_17/rgb/0000.png"
json_path = "../data/Dataset Ege/Town10HD_Scenario10_route3_11_28_17_28_17/measurements/0000.json"
npy_path = "../data/Dataset Ege/Town10HD_Scenario10_route3_11_28_17_28_17/lidar/0152.npy"

In [6]:
cv2.imread(image_path)

array([[[115, 111, 101],
        [ 25, 117, 113],
        [ 28, 123, 112],
        ...,
        [179, 194, 215],
        [197, 195, 202],
        [195, 202, 210]],

       [[115, 139, 156],
        [113, 158, 154],
        [ 60, 145, 134],
        ...,
        [164, 190, 215],
        [156, 174, 196],
        [169, 181, 195]],

       [[131, 169, 182],
        [198, 189, 189],
        [130, 172, 167],
        ...,
        [173, 197, 220],
        [166, 188, 207],
        [184, 195, 208]],

       ...,

       [[195, 198, 203],
        [190, 196, 203],
        [190, 196, 203],
        ...,
        [224, 230, 237],
        [221, 227, 234],
        [227, 228, 233]],

       [[188, 197, 205],
        [190, 196, 204],
        [189, 194, 202],
        ...,
        [228, 233, 239],
        [228, 232, 238],
        [228, 233, 238]],

       [[175, 186, 198],
        [179, 188, 199],
        [184, 190, 200],
        ...,
        [229, 234, 240],
        [229, 234, 240],
        [229, 234, 240]]

In [9]:
with open(json_path, 'r') as f:
    file = json.load(f)

In [12]:
file

{'x': -112.95048778796695,
 'y': -41.48863607339395,
 'theta': 6.280380725860596,
 'speed': 0.0,
 'target_speed': 4.0,
 'x_command': -82.62922440350494,
 'y_command': -41.57395940725048,
 'command': 4,
 'waypoints': [[-112.89775848388672, -41.48876953125, 6.2803715443298636],
  [-112.74459075927734, -41.489200592041016, 6.2803715443298636],
  [-112.48578643798828, -41.48992919921875, 6.2803715443298636],
  [-112.12135314941406, -41.4909553527832, 6.2803715443298636],
  [-111.65128326416016, -41.49227523803711, 6.2803715443298636],
  [-111.07557678222656, -41.493896484375, 6.2803715443298636],
  [-110.39423370361328, -41.49581527709961, 6.2803715443298636],
  [-109.60726165771484, -41.49802780151367, 6.2803715443298636]],
 'steer': -0.0,
 'throttle': 0.75,
 'brake': False,
 'junction': False,
 'vehicle_hazard': [False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,
  False,