In [8]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import transformer.DataTransformer as DataTransformer
import yaml
import numpy as np
from utils import *
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

from trainer.Trainer import Trainer
import pickle5
import pdb
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

def load_config():
    with open("config.yaml", "r") as configfile:
        config_dict = yaml.load(configfile, Loader=yaml.FullLoader)
    # print(config_dict)
    return config_dict


In [2]:
config_dict = load_config()
val_files =  ['2022-07-12T17-02-16', '2022-07-12T17-12-05', '2022-07-12T17-25-48', '2022-09-24T15:18:08', '2022-09-24T15:23:37', '2022-09-24T15:27:58', '2022-09-24T15:37:31', '2022-09-24T15:41:55', '2022-09-25T16:16:15', '2022-09-25T16:21:25']
cuda = torch.cuda.is_available()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
seq_len = config_dict['data']['SEQUENCE_LENGTH']

In [3]:
df_videos = dict(np.load(config_dict['transformer']['data_save_file'] + '_video.npz', allow_pickle=True))
# need video and sensor data separately
with open(config_dict['transformer']['data_save_file'] + '_sensor.pickle', 'rb') as handle:
    df_sensor = pickle5.load(handle)

val_transforms = transforms.Compose([transforms.ToTensor()])

In [10]:
from collections import defaultdict
class IntentVideoDataset(Dataset):
    def __init__(self, df_videos, df_sensor, files, transforms, seq_len, config_dict=None, test='train'):
        self.transforms = transforms
        print(test, ":  ", files)
        self.files = files # files sorted by names
        self.seq_len = seq_len
        self.df_videos = df_videos
        self.df_sensor = df_sensor #df_sensor['sample']['direction_label']['direction']
        self.config = config_dict
        # self.frame_path = frame_path
        self.items = defaultdict(list)
        
        # input sequence of seq_len frames at config_dict['transformer']['fps'] fps
        #label is 1-second future direction
        #we can have a gps signal where between 2 to 6 seconds (inclusive) before the actual turn (1-5 seconds before the label timestamp) with probabilities that are normally distributed.
        self.gps_range = (0, seq_len - self.config['transformer']['fps'])
        self.prob_gps = get_gps_probabilities(self.gps_range) # (0, 20-4 = 16)
        
        
        for f in files:
            df = convert_to_dataframe(self.df_sensor[f]['direction_label']['direction'])
            df_processed = preprocess_labels(df) # assign 1 sec forward labels
            # pdb.set_trace()

            # Generate training sequences
            for i in range(len(df_processed)-self.seq_len):
                self.items['filename'].append(f)

                index_item = df_processed['frame_index'][i]
                self.items['index'].append(index_item)
                
                y = label_map(df_processed['labels'][i+self.seq_len-1])
                self.items['label'].append(y)

                y_lab = []
                y_dir = []
                
                for kl in range(self.seq_len):
                    y_lab.append(label_map(df_processed['labels'][i+kl]))
                    y_dir.append(label_map(df_processed['directions'][i+kl]))

                self.items['label history'].append(y_lab)
                self.items['direction history'].append(y_dir)
                
       
        self.test = test
        
        self.intent_positions = self.create_intent_postions()                

        self.intent = []
        for ip, y_val in (self.intent_positions, self.items['label']):
            if(y_val != 2): # it is not front label
                intt = np.random.choice(np.arange(self.gps_range[0], self.gps_range[1]), p = self.prob_gps) #intent start position
            else:
                intt = -1 # none (2) intent

            for i in range(vid_idx, vid_idx+self.seq_len): 
                try:
                    # frame = np.load(osp.join(self.frame_path,filename), allow_pickle=True)
                    frame = self.df_videos[vid_file][i]
                    # frame = (frame - frame.min())/(frame.max() - frame.min())
                    frame = self.transforms(frame)
                    if(intent!=-1 and i>=intent and i<intent+self.config['transformer']['fps']):  #pass intent vector for just 1 second
                        intent_tensor = torch.full((1, self.config['data']['HEIGHT'], self.config['data']['WIDTH']), self.items[idx][0])
                    else:
                        intent_tensor = torch.full((1, self.config['data']['HEIGHT'], self.config['data']['WIDTH']), -1)  # no context signal

    
    
    def create_intent_postions(self):
        intent_positions = []
        for i, _, _ in self.items:
            if(i!=2):
                intent_positions.append(np.random.choice(np.arange(self.gps_range[0], self.gps_range[1]), p = self.prob_gps)) #intent start position
            else:
                intent_positions.append(-1)
        assert(len(intent_positions) == len(self.items))
        return intent_positions


    def __len__(self):
        return len(self.items)
        # return 1
    
    def __getitem__(self, idx):
        # vid = self.df_videos['sample'][idx:idx+self.seq_len]
        # print(vid.shape)
        # seq_filename = self.X[idx]
        vid_file = self.items[idx][1]
        vid_idx = self.items[idx][2]

        #+1 for intent channels
        video = torch.FloatTensor(self.seq_len, self.config['data']['CHANNELS']+1, self.config['data']['HEIGHT'], self.config['data']['WIDTH'])

        if(self.test!='train'): # pick predefined intent positions for testing and validation
            intent = self.intent_positions[idx]
        else:
            # picking randomly for training
            if(self.items[idx][0] != 2): # it is not front label
                intent = np.random.choice(np.arange(self.gps_range[0], self.gps_range[1]), p = self.prob_gps) #intent start position
            else:
                intent = -1 # none (2) intent



        # for e,filename in enumerate(seq_filename):
        for i in range(vid_idx, vid_idx+self.seq_len): 
            try:
                # frame = np.load(osp.join(self.frame_path,filename), allow_pickle=True)
                frame = self.df_videos[vid_file][i]
                # frame = (frame - frame.min())/(frame.max() - frame.min())
                frame = self.transforms(frame)
                if(intent!=-1 and i>=intent and i<intent+self.config['transformer']['fps']):  #pass intent vector for just 1 second
                    intent_tensor = torch.full((1, self.config['data']['HEIGHT'], self.config['data']['WIDTH']), self.items[idx][0])
                else:
                    intent_tensor = torch.full((1, self.config['data']['HEIGHT'], self.config['data']['WIDTH']), -1)  # no context signal


            except Exception as ex:
                print("Error reading frame", ex)
                frame = torch.zeros((self.config['data']['CHANNELS'], self.config['data']['HEIGHT'], self.config['data']['WIDTH']))
                intent_tensor = torch.full((1, self.config['data']['HEIGHT'], self.config['data']['WIDTH']), -1)
        
            context_frame = torch.cat((frame, intent_tensor), dim = 0) #attach intent as last channel
            # print(context_frame.shape)
            video[i-vid_idx,:,:,:] = context_frame
          
        # return video
        # return video, torch.LongTensor(self.y[idx])
        return video, self.items[idx][0]

In [11]:
val_dataset = IntentVideoDataset(df_videos, df_sensor, sorted(val_files), transforms=val_transforms, seq_len = seq_len, config_dict=config_dict, test= 'validation')

validation :   ['2022-07-12T17-02-16', '2022-07-12T17-12-05', '2022-07-12T17-25-48', '2022-09-24T15:18:08', '2022-09-24T15:23:37', '2022-09-24T15:27:58', '2022-09-24T15:37:31', '2022-09-24T15:41:55', '2022-09-25T16:16:15', '2022-09-25T16:21:25']
586
782
3055
Getting Intent Data for:  validation


In [7]:
items = []
def process(rows):
    return label_map(rows)

for f in val_files:
    df = convert_to_dataframe(df_sensor[f]['direction_label']['direction'])
    df_processed = preprocess_labels(df) # assign 1 sec forward labels
    # pdb.set_trace()

    # Generate training sequences
    for i in range(len(df_processed)-seq_len):
        index_item = df_processed['frame_index'][i]
        # Picking the label of the last element of the sequence
        y = label_map(df_processed['labels'][i+seq_len-1])

        items.append((y, f, index_item))

    df_processed['train_labels'] = df_processed['labels'].apply(process)

    print(df_processed)
    print(items)
    break

     frame_index  timestamp  directions  labels  train_labels
0    0            500        0.0         0.0     2           
1    1            1000       0.0         0.0     2           
2    2            1500       0.0         0.0     2           
3    3            2000       0.0         0.0     2           
4    4            2500       0.0         0.0     2           
5    5            3000       0.0         0.0     2           
6    6            3500       0.0         0.0     2           
7    7            4000       0.0         0.0     2           
8    8            4500       0.0         0.0     2           
9    9            5000       0.0         0.0     2           
10   10           5500       0.0         0.0     2           
11   11           6000       0.0         0.0     2           
12   12           6500       0.0         0.0     2           
13   13           7000       0.0         0.0     2           
14   14           7500       0.0         0.0     2           
15   15 