In [30]:
npx.shape

(205316, 22, 38)

In [None]:
npx = np.stack(X)
npx.shape

In [None]:
npy = np.stack(y)
npy.shape

In [None]:
with open('train_X_clean.npy', 'wb') as f:
    np.save(f, npx)

with open('train_y_clean.npy', 'wb') as f:
    np.save(f, npy)

In [None]:
!pip install einops

In [None]:
def replace_bad_data_points(positions, velocities, total_distance):
    if total_distance < 10:
        return positions, velocities
    
    positions_copy = np.copy(positions)
    positions_copy[1:,:] = positions[:-1,:]
    distances = np.linalg.norm(positions-positions_copy, axis=1)
    bad_indices = np.where(distances[1:-1] == 0)[0]+1

    if len(bad_indices) == 0:
        return positions, velocities

    positions_copy = np.copy(positions)
    velocities_copy = np.copy(velocities)
    for index in bad_indices:
        positions_copy[index] = (positions[index - 1] + positions[index + 1]) / 2

    bad_indices = np.where(distances[1:-1] == 0)[0]+1
    for index in bad_indices:
        velocities_copy[index] = (velocities[index - 1] + velocities[index + 1]) / 2

    return positions_copy, velocities_copy

def detect_outlier(folder, file_name, thresh, agent_only=True, verbose=True):
    with open(os.path.join(folder, file_name + '.pkl'), 'rb') as f:
        data = pickle.load(f)

    num_cars = data['car_mask'].astype(int).sum()
    if 'p_out' in data:
        car_points = np.concatenate((data['p_in'], data['p_out']), axis=1)
        velocities = np.concatenate((data['v_in'], data['v_out']), axis=1)
    else:
        car_points = data['p_in']
        velocities = data['v_in']
    car_points = car_points[:num_cars, ...]
    
    if agent_only:
        agent_idx = np.where(data['track_id'] == data['agent_id'])[0][0]
        car_points = car_points[agent_idx]
    
    if agent_only:
        axis1=1
        axis2=0
        total_distance = np.linalg.norm(car_points[0]-car_points[-1])
    else:
        axis1=2
        axis2=1
        total_distance = np.linalg.norm(car_points[:,0]-car_points[:,-1], axis=axis2)

    car_points_copy = np.copy(car_points)
    if agent_only:
        car_points_copy[1:,:] = car_points[:-1,:]
    else:
        car_points_copy[:,1:,:] = car_points[:,:-1,:]
    distances = np.linalg.norm(car_points-car_points_copy, axis=axis1)
    prev_idx = np.argmax(distances, axis=axis2)

    if agent_only:
        prev = distances[prev_idx-1]
        if verbose:
            print((prev))
            print(np.max(distances, axis=axis2),  np.median(distances, axis=axis2))
        # return np.max(distances, axis=axis2) > thresh * prev
        return np.any(distances > thresh)
    else:
        prev_idx = np.array([[i, idx-1] for i,idx in enumerate(prev_idx)])
        prev = distances[prev_idx[:, 0], prev_idx[:, 1]]
        diff = distances[:,1:] - distances[:,:-1]
        if verbose:
            print(distances[1])
        for i,d in enumerate(distances):
            total_d = total_distance[i]
            if total_d > 5 and np.any(d > thresh):   
                return True
            
        return False


In [None]:
import pickle
from glob import glob
import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import colormaps
import matplotlib.animation as animation
import torch
import torch.nn
from einops import rearrange,reduce,repeat

In [None]:
pkl_list = glob(os.path.join('train', '*'))
len(pkl_list)

In [None]:
X = []
y = []

for pkl_path in tqdm(pkl_list):
    with open(pkl_path, 'rb') as f:
        scene = pickle.load(f)

    # discard outliers
    folder, file_name = os.path.split(pkl_path)
    file_name, _ = os.path.splitext(file_name)
    if detect_outlier(folder, file_name, thresh=10, agent_only=False, verbose=False):
        continue

    # the index of agent to be predicted 
    pred_id = np.where(scene["track_id"] == scene['agent_id'])[0][0]
    mask = np.where(scene['car_mask'] == 1)[0]
    
    # input: p_in & v_in; output: p_out
    p_in_raw = scene['p_in']
    p_out_raw = scene['p_out'][pred_id]
    v_in_raw = scene['v_in']
    
    lane_scene = scene['lane']
    
    # Normalization
    min_vecs = np.min(lane_scene, axis = 0)
    max_vecs = np.max(lane_scene, axis = 0)

    # Normalize by vectors
    p_in_norm = (p_in_raw - min_vecs)/(max_vecs - min_vecs)
    
    v_in_norm = np.linalg.norm(v_in_raw, axis=1, keepdims=True)
    v_in_norm = np.where(v_in_norm == 0.0, 1.0, v_in_norm)
    v_in_norm = v_in_raw / v_in_norm
    # v_out_normalized = v_out_raw / np.linalg.norm(v_out_raw, axis=1, keepdims=True)
    p_track = p_in_norm[mask].reshape(-1,19*2)
    v_track = v_in_norm[mask].reshape(-1,19*2)
    
    p_agent = p_in_norm[pred_id].reshape(1,-1)
    v_agent = v_in_norm[pred_id].reshape(1,-1)
    new_mask = []
    p_result = []
    v_result = []
    if (len(mask) > 10):
        
        dist = ((p_track - p_agent)**2).sum(axis=-1)
        #print('dist is ',dist.shape)
        new_mask = np.argpartition(dist,10)[:10]
        
        p_result = p_track[new_mask,:]
        v_result = v_track[new_mask,:]
    else:
        p_result = np.zeros((10,38))
        v_result = np.zeros((10,38))
        k = p_track.shape[0]
        #print('slice',k)
        p_result[:k] = p_track
        v_result[:k] = v_track
    
    #print('shape is',p_agent.shape,v_agent.shape,p_result.shape,v_result.shape)
    inp = np.vstack((p_agent,v_agent,p_result,v_result))
    #print('inp shape is',inp.shape)


    p_out_normalized = (p_out_raw - min_vecs)/(max_vecs - min_vecs)
    p_out_norm = rearrange(p_out_normalized, 'a b -> (a b)')
    # Convert to float torch tensor
    X.append(torch.from_numpy(inp).float()), y.append(torch.from_numpy(p_out_norm).float())
    