In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import os, os.path 
import numpy 
import pickle
from glob import glob

"""Change to the data folder"""
train_path = "./new_train/new_train/"
vali_path = "./new_val_in/new_val_in"
# number of sequences in each dataset
# train:205942  val:3200 test: 36272 
# sequences sampled at 10HZ rate

### Create a dataset class 

In [2]:
class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, data_path: str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.data_path = data_path
        self.transform = transform

        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self):
        return len(self.pkl_list)

    def __getitem__(self, idx):

        pkl_path = self.pkl_list[idx]
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
            
        if self.transform:
            data = self.transform(data)

        return data


# intialize a dataset
train_dataset  = ArgoverseDataset(data_path=train_path)
vali_dataset = ArgoverseDataset(data_path=vali_path)

### Create a loader to enable batch processing

In [3]:
batch_sz = 4

def my_collate(batch):
    """ collate lists of samples into batches, create [ batch_sz x agent_sz x seq_len x feature] """
    inp = [numpy.dstack([scene['p_in'], scene['v_in']]) for scene in batch]
    out = [numpy.dstack([scene['p_out'], scene['v_out']]) for scene in batch]
    inp = torch.LongTensor(inp)
    out = torch.LongTensor(out)
    return [inp, out]

train_loader = DataLoader(train_dataset,batch_size=batch_sz, shuffle = False, collate_fn=my_collate, num_workers=0)
vali_loader = DataLoader(vali_dataset,batch_size=batch_sz, shuffle = False, collate_fn=my_collate, num_workers=0)

In [4]:
len(train_dataset)

205942

In [5]:
len(vali_dataset)

3200

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

In [7]:
#for i in range(0, len(train_dataset)):

x = train_dataset[0]['p_in']
#    print(type(x))
    #print(x.shape)

In [8]:
import random
index = random.sample(range(1, len(train_dataset)), 10000)

In [9]:
len(index)

10000

In [10]:
type(x)

numpy.ndarray

In [11]:
x[0]

array([[3277.29638672, 1947.62609863],
       [3277.29614258, 1947.62597656],
       [3277.29614258, 1947.62585449],
       [3277.29614258, 1947.62597656],
       [3277.29589844, 1947.62561035],
       [3277.29589844, 1947.62561035],
       [3277.29589844, 1947.62548828],
       [3277.29614258, 1947.62573242],
       [3277.29638672, 1947.62573242],
       [3277.29614258, 1947.62573242],
       [3277.29614258, 1947.62561035],
       [3277.29638672, 1947.62573242],
       [3277.29638672, 1947.62585449],
       [3277.29663086, 1947.62597656],
       [3277.29638672, 1947.62585449],
       [3277.29638672, 1947.62585449],
       [3277.29638672, 1947.62561035],
       [3277.29638672, 1947.62573242],
       [3277.29638672, 1947.62585449]])

In [12]:
p_x = []
p_y = []
out_x = []
out_y = []

for n in range(0, 10000):
    x = train_dataset[index[n]]['track_id'] 
    id = train_dataset[index[n]]['agent_id']
    i = np.where(x == id)[0][0]
    position = train_dataset[index[n]]['p_in'][i]
    position = position.reshape(38)
    label = train_dataset[index[n]]['p_out'][i]
    label = label.reshape(60)
    p_x =  p_x + list(position[::2])
    p_y = p_y + list(position[1::2])
    out_x = out_x + list(label[::2])
    out_y = out_y + list(label[1::2])
    if(n % 1000 == 0):
        print("n is " + str(n))


n is 0
n is 1000
n is 2000
n is 3000
n is 4000
n is 5000
n is 6000
n is 7000
n is 8000
n is 9000


In [13]:
print(len(p_x))
print(len(p_y))
print(len(out_x))
print(len(out_y))

190000
190000
300000
300000


In [14]:
p_x = np.array(p_x).reshape((10000,19))
p_y = np.array(p_y).reshape((10000,19))
out_x = np.array(out_x).reshape((10000,30))
out_y = np.array(out_y).reshape((10000,30))

In [15]:
def get_out(n, label):
    out = []
    for i in range(0,len(label)):
        out.append(label[i][n])
    return out

In [16]:
y_0 = get_out(0, out_x)

In [17]:
len(y_0)

10000

In [18]:
t_x= []
t_y = []
for n in range(0, len(vali_dataset)):
    x = vali_dataset[n]['track_id'] 
    id = vali_dataset[n]['agent_id']
    i = np.where(x == id)[0][0]
    position = vali_dataset[n]['p_in'][i]
    position = position.reshape(38)
    t_x =  t_x + list(position[::2])
    t_y = t_y + list(position[1::2])
    if(n % 1000 == 0):
        print("n is " + str(n))


n is 0
n is 1000
n is 2000
n is 3000


In [19]:
len(t_x)

60800

In [20]:
len(t_y)

60800

In [21]:
t_x = np.array(t_x).reshape((3200,19))
t_y = np.array(t_y).reshape((3200,19))

In [22]:
#vali_dataset[0]

In [23]:
s_id = [None] * 3200
for i in range(0, len(vali_dataset)):
    s_id[i] = vali_dataset[i]['scene_idx']

In [24]:
dfc = pd.DataFrame(columns=np.arange(60))
dfc[0] = list(s_id)
dfc

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
0,10002,,,,,,,,,,...,,,,,,,,,,
1,10015,,,,,,,,,,...,,,,,,,,,,
2,10019,,,,,,,,,,...,,,,,,,,,,
3,10028,,,,,,,,,,...,,,,,,,,,,
4,1003,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,9897,,,,,,,,,,...,,,,,,,,,,
3196,99,,,,,,,,,,...,,,,,,,,,,
3197,9905,,,,,,,,,,...,,,,,,,,,,
3198,9910,,,,,,,,,,...,,,,,,,,,,


In [25]:
for i in range(0,30):
    real_x = get_out(i, out_x)
    real_y = get_out(i, out_y)
    regressor_x = LinearRegression()
    regressor_x.fit(p_x, real_x)
    pred_x_i = regressor_x.predict(t_x)
    regressor_y = LinearRegression()
    regressor_y.fit(p_y, real_y)
    pred_y_i = regressor_y.predict(t_y)
    dfc[i * 2 + 1] = list(pred_x_i)
    dfc[2 * i + 2] = list(pred_y_i)

In [26]:
dfc

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,10002,1714.777378,337.199261,1715.730199,338.116298,1716.695437,339.035481,1717.620307,339.941665,1718.546048,...,1738.764473,361.348703,1739.721823,362.272508,1740.650292,363.280774,1741.531718,364.255287,1742.452859,365.210979
1,10015,725.524088,1229.675534,725.464439,1229.442801,725.388316,1229.221879,725.327991,1229.029871,725.259416,...,723.349056,1226.002466,723.242466,1225.900470,723.141930,1225.814839,723.050571,1225.736556,722.949185,1225.642310
2,10019,574.022163,1244.657588,574.226982,1244.582410,574.462319,1244.498686,574.691099,1244.432921,574.912899,...,579.623051,1242.630450,579.846756,1242.555412,580.065052,1242.478833,580.264209,1242.395315,580.465167,1242.324058
3,10028,1691.344232,315.619272,1691.967599,316.176014,1692.595622,316.757616,1693.238591,317.365832,1693.875006,...,1707.256780,330.964330,1707.896469,331.584855,1708.525031,332.221458,1709.132092,332.830139,1709.747009,333.432889
4,1003,2122.794130,677.105355,2121.513165,676.000026,2120.217831,674.929059,2118.966996,673.840681,2117.680518,...,2090.609261,650.292290,2089.356006,649.279360,2088.113987,648.280420,2086.915795,647.223142,2085.676714,646.221800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,9897,256.429996,805.455028,256.676400,805.249830,256.931963,805.037179,257.182086,804.843303,257.436113,...,263.170258,799.872656,263.448220,799.644081,263.710266,799.437811,263.979462,799.200071,264.243175,798.994559
3196,99,587.866477,1153.969822,587.897815,1153.366877,587.930806,1152.699491,587.967273,1152.034222,587.997496,...,588.688828,1137.105300,588.718455,1136.419453,588.742999,1135.763588,588.769687,1135.095689,588.801478,1134.462162
3197,9905,1755.594999,444.202283,1755.428171,444.516914,1755.261726,444.825848,1755.100360,445.139414,1754.932745,...,1751.262173,452.115668,1751.089948,452.423299,1750.920581,452.768510,1750.744512,453.053348,1750.582500,453.379344
3198,9910,574.607937,1288.846472,574.508950,1288.612320,574.411812,1288.400916,574.316125,1288.202646,574.211579,...,571.628342,1283.345537,571.496566,1283.132435,571.364495,1282.929413,571.230086,1282.702028,571.108232,1282.503128


In [27]:
dfc.to_csv('sub_linear_shuffle.csv', index=False)

In [28]:

df

NameError: name 'df' is not defined

In [None]:
#train_dataset[0]

### Visualize the batch of sequences

In [None]:
import matplotlib.pyplot as plt
import random

agent_id = 0

def show_sample_batch(sample_batch, agent_id):
    """visualize the trajectory for a batch of samples with a randon agent"""
    inp, out = sample_batch
    batch_sz = inp.size(0)
    agent_sz = inp.size(1)
    
    fig, axs = plt.subplots(1,batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
    fig.subplots_adjust(hspace = .5, wspace=.001)
    axs = axs.ravel()   
    for i in range(batch_sz):
        axs[i].xaxis.set_ticks([])
        axs[i].yaxis.set_ticks([])
        
        # first two feature dimensions are (x,y) positions
        axs[i].scatter(inp[i, agent_id,:,0], inp[i, agent_id,:,1])
        axs[i].scatter(out[i, agent_id,:,0], out[i, agent_id,:,1])

        
for i_batch, sample_batch in enumerate(train_loader):
    inp, out = sample_batch
    """TODO:
      Deep learning model
      training routine
    """
    show_sample_batch(sample_batch, agent_id)
    
    break