In [8]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import transformer.DataTransformer as DataTransformer
import yaml
import numpy as np
from utils import *
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

from trainer.Trainer import Trainer
import pickle5
import pdb
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

def load_config():
    with open("config.yaml", "r") as configfile:
        config_dict = yaml.load(configfile, Loader=yaml.FullLoader)
    # print(config_dict)
    return config_dict


In [23]:
x = [1,2,3,4,5,6]
x = np.array(x).reshape((2,3))
x = torch.Tensor(x)
print(x.shape)
print(x)
torch.flatten(x[:, 1:])

torch.Size([2, 3])
tensor([[1., 2., 3.],
        [4., 5., 6.]])


tensor([2., 3., 5., 6.])

In [9]:
config_dict = load_config()
val_files =  ['2022-07-12T17-02-16', '2022-07-12T17-12-05', '2022-07-12T17-25-48', '2022-09-24T15:18:08', '2022-09-24T15:23:37', '2022-09-24T15:27:58', '2022-09-24T15:37:31', '2022-09-24T15:41:55', '2022-09-25T16:16:15', '2022-09-25T16:21:25']
cuda = torch.cuda.is_available()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
seq_len = config_dict['data']['SEQUENCE_LENGTH']

In [10]:
df_videos = dict(np.load(config_dict['transformer']['data_save_file'] + '_video.npz', allow_pickle=True))
# need video and sensor data separately
with open(config_dict['transformer']['data_save_file'] + '_sensor.pickle', 'rb') as handle:
    df_sensor = pickle5.load(handle)

val_transforms = transforms.Compose([transforms.ToTensor()])

In [11]:
class NewIntentVideoDataset(Dataset):
    def __init__(self, df_videos, df_sensor, files, transforms, seq_len, config_dict=None):
        self.transforms = transforms
        
        self.files = files # files sorted by names
        self.seq_len = seq_len
        self.df_videos = df_videos
        self.df_sensor = df_sensor #df_sensor['sample']['direction_label']['direction']
        self.config = config_dict

        self.items = []

        same_count = 0
        diff_count = 0
        turn_diff_count = 0
        for f in files:
            df_processed = convert_to_dataframe(self.df_sensor[f]['direction_label']['direction']) # assign 1 sec forward labels
            # pdb.set_trace()
            df_processed['labels'] = df_processed['labels'].apply(label_map)
            # Generate training sequences
            for i in range(len(df_processed)-self.seq_len):
                index_item = df_processed['frame_index'][i]
                y_list = list(df_processed['labels'][i: i+self.seq_len])
                
                if(not NewIntentVideoDataset.checkAllSame(y_list)):
                    for x in range(3, 10):
                        intent_list = [-1 for _ in range(self.seq_len)]
                        for inx in range(0, self.seq_len - x):
                            if(y_list[inx + x]!=2 and y_list[inx + x]!=y_list[inx] and ((inx+x+1)>= len(df_processed) or y_list[inx + x]==df_processed['labels'][i + inx + x + 1])):
                                if(y_list[inx + x] != y_list[inx + x-1]):
                                    intent_list[inx] = y_list[inx + x]
                                    turn_diff_count +=1 
                        self.items.append((y_list, f, index_item, intent_list))   
                        diff_count += 1
                else:
                    self.items.append((y_list, f, index_item, [-1 for _ in range(self.seq_len)]))
                    same_count += 1

        # print(self.items)
        print("Same Count", same_count)
        print("Diff Count", diff_count)
        print("Turn Diff Count", turn_diff_count)
        
        
   
    def checkAllSame(lst):
        return len(set(lst)) == 1
    
    
    def __len__(self):
        return len(self.items)
      
    
    def __getitem__(self, idx):
       
        labels = self.items[idx][0]
        intents = self.items[idx][3]
        vid_file = self.items[idx][1]
        vid_idx = self.items[idx][2]

        #+1 for intent channels
        video = torch.FloatTensor(self.seq_len, self.config['data']['CHANNELS']+1, self.config['data']['HEIGHT'], self.config['data']['WIDTH'])

        for it in range(self.seq_len): 
            try:
                # frame = np.load(osp.join(self.frame_path,filename), allow_pickle=True)
                frame = self.df_videos[vid_file][it + vid_idx]
                # frame = (frame - frame.min())/(frame.max() - frame.min())
                frame = self.transforms(frame)
                intent_tensor = torch.full((1, self.config['data']['HEIGHT'], self.config['data']['WIDTH']), intents[it])  # no context signal


            except Exception as ex:
                print("Error reading frame", ex)
                frame = torch.zeros((self.config['data']['CHANNELS'], self.config['data']['HEIGHT'], self.config['data']['WIDTH']))
                intent_tensor = torch.full((1, self.config['data']['HEIGHT'], self.config['data']['WIDTH']), -1)
        

            context_frame = torch.cat((frame, intent_tensor), dim = 0) #attach intent as last channel
            # print(context_frame.shape)
            video[it,:,:,:] = context_frame
            
        
        # return video
        # return video, torch.LongTensor(self.y[idx])
        return video, labels

In [12]:
val_dataset = NewIntentVideoDataset(df_videos, df_sensor, sorted(val_files), transforms=val_transforms, seq_len = seq_len, config_dict=config_dict)

[([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], '2022-07-12T17-02-16', 0, [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]), ([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], '2022-07-12T17-02-16', 1, [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]), ([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], '2022-07-12T17-02-16', 2, [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]), ([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], '2022-07-12T17-02-16', 3, [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]), ([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], '2022-07-12T17-02-16', 4, [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]), ([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], '2022-07-12T17-02-16', 5, [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]), ([2, 2, 2, 2, 2, 2, 2, 2, 2, 1], '2022-07-12T17-02-16', 6, [-1, -1, -1, -1, -1, -1, 1, -1, -1, -1]), ([2, 2, 2, 2, 2, 2, 2, 2, 2, 1], '2022-07-12T17-02-16', 6, [-1, -1, -1, -1, -1, 1, -1, -1, -1, -1]), ([2, 2, 2, 2, 2, 2, 2, 2, 2, 1], '2022-07-12T17-02-16', 6, [-1, -1, -1, -1, 1, -1, -1, -1, -1, -1]), ([2, 2, 2, 2, 2, 2, 2, 2, 2, 1], '2022-07-12T17-02-16', 6, [-1, -1, -1, 1, -1, -1, -

In [7]:
items = []
def process(rows):
    return label_map(rows)

for f in val_files:
    df = convert_to_dataframe(df_sensor[f]['direction_label']['direction'])
    df_processed = preprocess_labels(df) # assign 1 sec forward labels
    # pdb.set_trace()

    # Generate training sequences
    for i in range(len(df_processed)-seq_len):
        index_item = df_processed['frame_index'][i]
        # Picking the label of the last element of the sequence
        y = label_map(df_processed['labels'][i+seq_len-1])

        items.append((y, f, index_item))

    df_processed['train_labels'] = df_processed['labels'].apply(process)

    print(df_processed)
    print(items)
    break

     frame_index  timestamp  directions  labels  train_labels
0    0            500        0.0         0.0     2           
1    1            1000       0.0         0.0     2           
2    2            1500       0.0         0.0     2           
3    3            2000       0.0         0.0     2           
4    4            2500       0.0         0.0     2           
5    5            3000       0.0         0.0     2           
6    6            3500       0.0         0.0     2           
7    7            4000       0.0         0.0     2           
8    8            4500       0.0         0.0     2           
9    9            5000       0.0         0.0     2           
10   10           5500       0.0         0.0     2           
11   11           6000       0.0         0.0     2           
12   12           6500       0.0         0.0     2           
13   13           7000       0.0         0.0     2           
14   14           7500       0.0         0.0     2           
15   15 