In [47]:
import os
import math
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data.sampler import Sampler
from torch.utils.data.dataloader import DataLoader

In [48]:
def get_dataloader(data_dir, phase, obs_len, pred_len, batch_size):
    r"""Get dataloader for a specific phase

    Args:
        data_dir (str): path to the dataset directory
        phase (str): phase of the data, one of 'train', 'val', 'test'
        obs_len (int): length of observed trajectory
        pred_len (int): length of predicted trajectory
        batch_size (int): batch size

    Returns:
        loader_phase (torch.utils.data.DataLoader): dataloader for the specific phase
    """

    assert phase in ['train', 'val', 'test']

    data_set = data_dir + '/' + phase + '/'
    shuffle = True if phase == 'train' else False
    drop_last = True if phase == 'train' else False

    dataset_phase = TrajectoryDataset(data_set, obs_len=obs_len, pred_len=pred_len)
    sampler_phase = None
    if batch_size > 1:
        sampler_phase = TrajBatchSampler(dataset_phase, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
    loader_phase = DataLoader(dataset_phase, collate_fn=traj_collate_fn, batch_sampler=sampler_phase, pin_memory=True)
    return loader_phase

In [49]:
def traj_collate_fn(data):
    r"""Collate function for the dataloader

    Args:
        data (list): list of tuples of (obs_seq, pred_seq, non_linear_ped, loss_mask, seq_start_end)

    Returns:
        obs_seq_list (torch.Tensor): (num_ped, obs_len, 2)
        pred_seq_list (torch.Tensor): (num_ped, pred_len, 2)
        non_linear_ped_list (torch.Tensor): (num_ped,)
        loss_mask_list (torch.Tensor): (num_ped, obs_len + pred_len)
        scene_mask (torch.Tensor): (num_ped, num_ped)
        seq_start_end (torch.Tensor): (num_ped, 2)
    """

    obs_seq_list, pred_seq_list, non_linear_ped_list, loss_mask_list, _, _ = zip(*data)

    _len = [len(seq) for seq in obs_seq_list]
    cum_start_idx = [0] + np.cumsum(_len).tolist()
    seq_start_end = [[start, end] for start, end in zip(cum_start_idx, cum_start_idx[1:])]
    seq_start_end = torch.LongTensor(seq_start_end)
    scene_mask = torch.zeros(sum(_len), sum(_len), dtype=torch.bool)
    for idx, (start, end) in enumerate(seq_start_end):
        scene_mask[start:end, start:end] = 1

    out = [torch.cat(obs_seq_list, dim=0), torch.cat(pred_seq_list, dim=0),
           torch.cat(non_linear_ped_list, dim=0), torch.cat(loss_mask_list, dim=0), scene_mask, seq_start_end]
    return tuple(out)


class TrajBatchSampler(Sampler):
    r"""Samples batched elements by yielding a mini-batch of indices.
    Args:
        data_source (Dataset): dataset to sample from
        batch_size (int): Size of mini-batch.
        shuffle (bool, optional): set to ``True`` to have the data reshuffled
            at every epoch (default: ``False``).
        drop_last (bool): If ``True``, the sampler will drop the last batch if
            its size would be less than ``batch_size``
        generator (Generator): Generator used in sampling.
    """

    def __init__(self, data_source, batch_size=64, shuffle=False, drop_last=False, generator=None):
        self.data_source = data_source
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.generator = generator

    def __iter__(self):
        assert len(self.data_source) == len(self.data_source.num_peds_in_seq)

        if self.shuffle:
            if self.generator is None:
                generator = torch.Generator()
                generator.manual_seed(int(torch.empty((), dtype=torch.int64).random_().item()))
            else:
                generator = self.generator
            indices = torch.randperm(len(self.data_source), generator=generator).tolist()
        else:
            indices = list(range(len(self.data_source)))
        num_peds_indices = self.data_source.num_peds_in_seq[indices]

        batch = []
        total_num_peds = 0
        for idx, num_peds in zip(indices, num_peds_indices):
            batch.append(idx)
            total_num_peds += num_peds
            if total_num_peds >= self.batch_size:
                yield batch
                batch = []
                total_num_peds = 0
        if len(batch) > 0 and not self.drop_last:
            yield batch

    def __len__(self):
        # Approximated number of batches.
        # The order of trajectories can be shuffled, so this number can vary from run to run.
        if self.drop_last:
            return sum(self.data_source.num_peds_in_seq) // self.batch_size
        else:
            return (sum(self.data_source.num_peds_in_seq) + self.batch_size - 1) // self.batch_size


def read_file(_path, delim='\t'):
    data = []
    if delim == 'tab':
        delim = '\t'
    elif delim == 'space':
        delim = ' '
    with open(_path, 'r') as f:
        for line in f:
            line = line.strip().split(delim)
            data.append(line)
        file = np.asarray(data)
        new_file = file.squeeze()
        out_put = []
        for linE in new_file:
            linE = linE.split()
            linE = [float(i) for i in linE]
            out_put.append(linE)
        final_out = np.array(out_put)
        return final_out


def poly_fit(traj, traj_len, threshold):
    """
    Input:
    - traj: Numpy array of shape (2, traj_len)
    - traj_len: Len of trajectory
    - threshold: Minimum error to be considered for non-linear traj
    Output:
    - int: 1 -> Non Linear 0-> Linear
    """
    t = np.linspace(0, traj_len - 1, traj_len)
    res_x = np.polyfit(t, traj[0, -traj_len:], 2, full=True)[1]
    res_y = np.polyfit(t, traj[1, -traj_len:], 2, full=True)[1]
    if res_x + res_y >= threshold:
        return 1.0
    else:
        return 0.0

In [50]:
class TrajectoryDataset(Dataset):
    """Dataloder for the Trajectory datasets"""

    def __init__(self, data_dir, obs_len=8, pred_len=12, skip=1, threshold=0.02, min_ped=1, delim='\t'):
        """
        Args:
        - data_dir: Directory containing dataset files in the format <frame_id> <ped_id> <x> <y>
        - obs_len: Number of time-steps in input trajectories
        - pred_len: Number of time-steps in output trajectories
        - skip: Number of frames to skip while making the dataset
        - threshold: Minimum error to be considered for non-linear traj when using a linear predictor
        - min_ped: Minimum number of pedestrians that should be in a sequence
        - delim: Delimiter in the dataset files
        """
        super(TrajectoryDataset, self).__init__()

        self.data_dir = data_dir
        self.obs_len = obs_len
        self.pred_len = pred_len
        self.skip = skip
        self.seq_len = self.obs_len + self.pred_len
        self.delim = delim

        all_files = os.listdir(self.data_dir)
        all_files = [os.path.join(self.data_dir, _path) for _path in all_files]

        num_peds_in_seq = []
        seq_list = []
        loss_mask_list = []
        non_linear_ped = []
        for path in all_files:
            data = read_file(path, delim = 't')
            frames = np.unique(data[:, 0]).tolist()
            frame_data = []
            for frame in frames:
                frame_data.append(data[frame == data[:, 0], :])
            num_sequences = len(frame_data)

            for idx in range(0, num_sequences * self.skip , skip):
                    curr_seq_data = frame_data[idx]   #np.concatenate(frame_data[idx:idx + seq_len], axis=0)
                    first = 0
                    end = 20
                    frame = []
                    curr_seq_data[first:end]
                    for i in range(0, 30, 1):
                        first = i
                        end = i + 20
                        if end > len(curr_seq_data):
                            break
                        frame.append(curr_seq_data[first:end])
                    #frame
                    #peds_in_curr_seq = np.unique(curr_seq_data[:, 1])
                    curr_seq = np.zeros((len(frame), 2, self.seq_len))
                    curr_loss_mask = np.zeros((len(frame), self.seq_len))
                    num_peds_considered = 0
                    _non_linear_ped = []
                    for ped_id, sequence in enumerate(frame):
                        curr_ped_seq = sequence #curr_seq_data[curr_seq_data[:, 1] == ped_id, :]
                        curr_ped_seq = np.around(curr_ped_seq, decimals=4)
                        #pad_front = frames.index(curr_ped_seq[0, 0]) - idx
                        #pad_end = frames.index(curr_ped_seq[-1, 0]) - idx + 1
                        '''if pad_end - pad_front != seq_len:
                            continue'''
                        curr_ped_seq = np.transpose(curr_ped_seq[:, 2:])
                        curr_ped_seq = curr_ped_seq
                        _idx = num_peds_considered
                        curr_seq[_idx, :, :] = curr_ped_seq
                        # Linear vs Non-Linear Trajectory
                        _non_linear_ped.append(poly_fit(curr_ped_seq, pred_len, threshold))
                        curr_loss_mask[_idx, pad_front:pad_end] = 1
                        num_peds_considered += 1

                    if num_peds_considered > min_ped:
                        non_linear_ped += _non_linear_ped
                        num_peds_in_seq.append(num_peds_considered)
                        loss_mask_list.append(curr_loss_mask[:num_peds_considered])
                        seq_list.append(curr_seq[:num_peds_considered])

                

        self.num_seq = len(seq_list)
        seq_list = np.concatenate(seq_list, axis=0)
        loss_mask_list = np.concatenate(loss_mask_list, axis=0)
        non_linear_ped = np.asarray(non_linear_ped)
        self.num_peds_in_seq = np.array(num_peds_in_seq)

        # Convert numpy -> Torch Tensor
        self.obs_traj = torch.from_numpy(seq_list[:, :, :self.obs_len]).type(torch.float).permute(0, 2, 1)  # NTC
        self.pred_traj = torch.from_numpy(seq_list[:, :, self.obs_len:]).type(torch.float).permute(0, 2, 1)  # NTC
        self.loss_mask = torch.from_numpy(loss_mask_list).type(torch.float)
        self.non_linear_ped = torch.from_numpy(non_linear_ped).type(torch.float)
        cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist()
        self.seq_start_end = [(start, end) for start, end in zip(cum_start_idx, cum_start_idx[1:])]

    def __len__(self):
        return self.num_seq

    def __getitem__(self, index):
        start, end = self.seq_start_end[index]
        out = [self.obs_traj[start:end], self.pred_traj[start:end],
               self.non_linear_ped[start:end], self.loss_mask[start:end], None, [[0, end - start]]]
        return out

In [51]:
path = 'D:/AmirKabir/tez/eighen trajectory/EigenTrajectory-1/datasets/nuscenes_mini'
get_dataloader(path ,'train',12 , 8, 1 )

<torch.utils.data.dataloader.DataLoader at 0x17f4063ddf0>

In [40]:
import numpy as np
import math
def read_file(_path, delim='\t'):
    data = []
    if delim == 'tab':
        delim = '\t'
    elif delim == 'space':
        delim = ' '
    with open(_path, 'r') as f:
        for line in f:
            line = line.strip().split(delim)
            data.append(line)
        file = np.asarray(data)
        new_file = file.squeeze()
        out_put = []
        for linE in new_file:
            linE = linE.split()
            linE = [float(i) for i in linE]
            out_put.append(linE)
        final_out = np.array(out_put)
        return final_out
def poly_fit(traj, traj_len, threshold):
    """
    Input:
    - traj: Numpy array of shape (2, traj_len)
    - traj_len: Len of trajectory
    - threshold: Minimum error to be considered for non-linear traj
    Output:
    - int: 1 -> Non Linear 0-> Linear
    """
    t = np.linspace(0, traj_len - 1, traj_len)
    res_x = np.polyfit(t, traj[0, -traj_len:], 2, full=True)[1]
    res_y = np.polyfit(t, traj[1, -traj_len:], 2, full=True)[1]
    if res_x + res_y >= threshold:
        return 1.0
    else:
        return 0.0
path = 'D:/AmirKabir/tez/eighen trajectory/EigenTrajectory-1/datasets/nuscenes_mini/train/nuscenes_trajectory.txt'
seq_len = 20
skip = 1
pred_len = 8
threshold = 0.02
min_ped=1
num_peds_in_seq = []
seq_list = []
loss_mask_list = []
non_linear_ped = []
data = read_file(path, delim = 't')
frames = np.unique(data[:, 0]).tolist()
frame_data = []
for frame in frames:
    frame_data.append(data[frame == data[:, 0], :])
num_sequences = len(frame_data)
num_sequences

225

In [7]:
len(frame_data)

225

In [8]:
num_sequences

225

In [21]:
curr_seq_data = frame_data[0]  
peds_in_curr_seq = np.unique(curr_seq_data[:, 1])
peds_in_curr_seq

array([1.])

In [22]:
curr_ped_seq = curr_seq_data[curr_seq_data[:, 1] == 1, :]
curr_ped_seq

array([[0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732140e+02, 1.130480e+03],
       [0.000000e+00, 1.0

In [11]:
peds_in_curr_seq

array([1.])

In [None]:
first = 0
end = 20
frame = []
curr_seq_data[first:end]
for i in range(0, 30, 1):
    first = i
    end = i + 20
    if end > len(curr_seq_data):
        break
    frame.append(curr_seq_data[first:end])
frame

In [34]:
frame[18].shape

(20, 4)

In [37]:
frame[0]

array([[0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732140e+02, 1.130480e+03],
       [0.000000e+00, 1.0

In [38]:
curr_seq_data = frame_data[0]
curr_seq_data

array([[0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732560e+02, 1.130419e+03],
       [0.000000e+00, 1.000000e+00, 3.732140e+02, 1.130480e+03],
       [0.000000e+00, 1.0

In [41]:
for idx in range(0, num_sequences * skip , skip):
    curr_seq_data = frame_data[idx]   #np.concatenate(frame_data[idx:idx + seq_len], axis=0)
    first = 0
    end = 20
    frame = []
    curr_seq_data[first:end]
    for i in range(0, 30, 1):
        first = i
        end = i + 20
        if end > len(curr_seq_data):
            break
        frame.append(curr_seq_data[first:end])
    #frame
    #peds_in_curr_seq = np.unique(curr_seq_data[:, 1])
    curr_seq = np.zeros((len(frame), 2, seq_len))
    curr_loss_mask = np.zeros((len(frame), seq_len))
    num_peds_considered = 0
    _non_linear_ped = []
    for ped_id, sequence in enumerate(frame):
        curr_ped_seq = sequence #curr_seq_data[curr_seq_data[:, 1] == ped_id, :]
        curr_ped_seq = np.around(curr_ped_seq, decimals=4)
        #pad_front = frames.index(curr_ped_seq[0, 0]) - idx
        #pad_end = frames.index(curr_ped_seq[-1, 0]) - idx + 1
        '''if pad_end - pad_front != seq_len:
            continue'''
        curr_ped_seq = np.transpose(curr_ped_seq[:, 2:])
        curr_ped_seq = curr_ped_seq
        _idx = num_peds_considered
        curr_seq[_idx, :, :] = curr_ped_seq
        # Linear vs Non-Linear Trajectory
        _non_linear_ped.append(poly_fit(curr_ped_seq, pred_len, threshold))
        curr_loss_mask[_idx, pad_front:pad_end] = 1
        num_peds_considered += 1

    if num_peds_considered > min_ped:
        non_linear_ped += _non_linear_ped
        num_peds_in_seq.append(num_peds_considered)
        loss_mask_list.append(curr_loss_mask[:num_peds_considered])
        seq_list.append(curr_seq[:num_peds_considered])

In [55]:
seq_list[0].shape

(19, 2, 20)