In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import pandas as pd
import torchvision
import random
import os
import os.path as osp
import ffmpeg
from tqdm import tqdm
import numpy as np
from sklearn.model_selection import train_test_split
from torchvision import transforms
import matplotlib.pyplot as plt
import cv2
import pickle

ModuleNotFoundError: No module named 'torch'

In [5]:
VID_PATH = "../../data_temp/extracted_videos/"
VID_PATH_OG = "../../data_temp/videos_new/"
LABEL_FILE = "../../data/data.npy"
PROCESSED_PATH = "../../data_temp/processed/" #frames
DATA_SAVE_PATH = "../../data_temp/labeled_videos/"  #videos_frame_timestamped
MODELS_PATHS = "./models/"
# LABEL_PATH = '../../data/labels/'
FPS = 5
n_classes = 3

In [6]:
BATCH = 2
SEQUENCE_LENGTH = 20
HEIGHT = 128
WIDTH = 128
CHANNELS = 3
CONTEXT_CHANNELS = 1

In [7]:
"""
  Preprocess video data.
"""
def label_map(lab):
    if(lab == 0):
        return 2
    elif(lab == -1):
        return 0
    else:
        return 1
    
def get_all_files_from_dir(directory, vids = False):
    file_paths = []
    print(directory)
    try:
        for root, dirs, files in os.walk(directory):
            # print(files)
            if(vids):
                file_paths += [os.path.join(root, x,x+".mp4") for x in dirs]
            else:
                file_paths += [os.path.join(root, x) for x in files]
        return sorted(file_paths)
    except Exception as e:
        print(e)
    
def turn_high_labeled(vid_context, yy):
    start = -10
    if(len(vid_context)<10):
        start = 0
    
    for s in vid_context[start:]:
        if(s==yy):
            return True
    return False

def get_label_pos(vid_pos, offset):
    ts = vid_pos + offset
    ts = ts - (ts%100)
    return ts
    
def process_video(video_file, labels):
    video_filename = video_file.split('/')[-1].split('.')[0]
    vidcap = cv2.VideoCapture(video_file)

    ctr = 0
    video_frames = []
    video_context = []
    video_labels = []
    
    hasFrames,image = vidcap.read()
    tot_frames = 0
    while (hasFrames):
        tot_frames += 1
        save_file_name = video_filename + "_" + str(ctr) + ".npy"
        
        vid_pos = vidcap.get(cv2.CAP_PROP_POS_MSEC)
        label_ts = get_label_pos(vid_pos,1000)
        if(label_ts not in labels.keys()):
            print(label_ts)
            hasFrames,image = vidcap.read()
            continue

        image = cv2.resize(image, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA)
        np.save(osp.join(PROCESSED_PATH, save_file_name), image)  
        video_frames.append(save_file_name)

        label = labels[label_ts]
        video_labels.append(label)
        
        context_ts = get_label_pos(vid_pos,3000) + 100
        limit_ts = get_label_pos(vid_pos,1500)
        
        if(context_ts in labels.keys() and not turn_high_labeled(video_context,labels[context_ts]) and labels[limit_ts]!=labels[context_ts]):
            video_context.append(labels[context_ts])
        else:
            video_context.append(2) #Default: FRONT == 2
        
        
        hasFrames,image = vidcap.read()
        ctr += 1
    
    print("Actual labels: ", labels)
    print("Labels: ", video_labels)
    print("Context: ", video_context)
    
    df = pd.DataFrame({'frames': video_frames, 'gps': video_context, 'labels': video_labels})
    df.to_csv(osp.join(DATA_SAVE_PATH,video_filename+".csv"), index=None)

    print("After processing:")
    print("Total frames: ",tot_frames)
    print("Number of frames labelled: ", ctr)
    
def preprocess():
    f = np.load(LABEL_FILE, allow_pickle = True)
    # print(f.keys())
    for video_file in get_all_files_from_dir(VID_PATH):
        video_filename = video_file.split('/')[-1].split('.')[0]
        print(video_filename)
        # if(video_filename+".csv" not in os.listdir(DATA_SAVE_PATH)):
        labels = f[video_filename]['Sensor']['direction_label']['direction']

        for k,v in labels.items():
            labels[k] = label_map(v)

        process_video(video_file, labels)
        print("Finished processing ", video_file)

def process_videos(vid_path = VID_PATH_OG):
    fp = get_all_files_from_dir(vid_path, vids=True)
    print(fp)
    for fl in fp:
        video_filename = fl.split('/')[-1]
        if(video_filename not in os.listdir(VID_PATH)):
            ffmpeg.input(fl).filter('fps', fps=FPS, round='up').output(VID_PATH+video_filename).run() 

In [8]:
# process_videos()

In [9]:
### preprocess videos
# preprocess()

In [10]:
def labelCount(label):
    label_count = [0]*(n_classes)
    for lab in label:
        label_count[lab] += 1
    return label_count

def sampler_(dataset_labels):
    dataset_counts = labelCount(dataset_labels)
    print(dataset_counts)
    num_samples = sum(dataset_counts)
    class_weights = [num_samples/i for i in dataset_counts]
    weights = [class_weights[y] for y in dataset_labels]
    sampler = WeightedRandomSampler(torch.DoubleTensor(weights), int(num_samples))
    return sampler

def sanity_check(gps, y):
    if(y!=2):
        for i in gps:
            if(i==y):
                return True
        return False
    else:
        return True
    
def good_data_check(ys):
    for i in range(7):
        if(ys[i]!=2):
            return False
    return True
    

In [11]:
class VideoDataset(Dataset):
    def __init__(self, files, transforms, seq_len, base_path):
        self.transforms = transforms
        self.files = files
        self.seq_len = seq_len
        self.base_path = base_path
        X = []
        context = []
        y = []
        for f in files:
            df = pd.read_csv(f)
            for i in range(len(df)-self.seq_len):
                cand_x = df['frames'][i:i+self.seq_len].to_numpy()
                cand_gps = df['gps'][i:i+self.seq_len].to_numpy()
                cand_y = df['labels'][i+self.seq_len-1]
                if(sanity_check(cand_gps, cand_y) and good_data_check(df['labels'][i:i+self.seq_len].to_numpy())):
                    X.append(cand_x)
                    context.append(cand_gps)
                    y.append(cand_y)
        
        # print("Y", y)

        self.X = np.stack(X, axis = 0)
        self.context = np.stack(context, axis = 0)
        self.y = np.array(y)
        print(len(self.y))
        
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        seq_filename = self.X[idx]
        context = self.context[idx]
        video = torch.FloatTensor(self.seq_len, CHANNELS+CONTEXT_CHANNELS, HEIGHT, WIDTH)
        for e,filename_context in enumerate(zip(seq_filename,context)):
            filename, cont = filename_context
            try:
                frame = np.load(osp.join(self.base_path,filename), allow_pickle=True)
                frame = (frame - frame.min())/(frame.max() - frame.min())
                frame = self.transforms(frame)

            except Exception as ex:
                print(ex)
                frame = torch.zeros((CHANNELS, HEIGHT, WIDTH))

            context_tensor = torch.full((CONTEXT_CHANNELS, HEIGHT, WIDTH), cont)
            context_frame = torch.cat((context_tensor, frame), dim = 0)
            video[e,:,:,:] = context_frame
          
        # return video, torch.LongTensor(self.y[idx])
        return video, self.y[idx]
        

In [14]:
def make_tt_split(data_folder, seq_len):
    files = []
    for filename in os.listdir(data_folder):
        if(filename[-3:]=="csv"):
            files.append(osp.join(data_folder,filename))
    
    random.shuffle(files)
    
    ts = int(len(files) * 0.25)
    test_files = files[:ts]
    train_files = files[ts:]
    print("Test files ",test_files)
    return train_files, test_files


def use_tt_split(data_folder, lst):
    files = []
    for filename in os.listdir(data_folder):
        filename = osp.join(data_folder,filename)
        if(filename[-3:]=="csv" and filename not in lst):
            files.append(filename)
    print(files)
    return files, lst
    

In [15]:
cuda = torch.cuda.is_available()
print(cuda)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# train_transforms = [ttf.ToTensor(), transforms.Resize((HEIGHT, WIDTH)), transforms.ColorJitter(), transforms.RandomRotation(10), transforms.GaussianBlur(3)]
# train_transforms = transforms.Compose([transforms.ToTensor(), transforms.Resize((HEIGHT, WIDTH))])
# val_transforms = transforms.Compose([transforms.ToTensor(), transforms.Resize((HEIGHT, WIDTH))])

train_transforms = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
val_transforms = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])

test_files = ['../../data_temp/labeled_videos/2022-04-04T19:01:16.csv', '../../data_temp/labeled_videos/2022-04-04T19:02:28.csv', '../../data_temp/labeled_videos/2022-04-04T18:41:15.csv', '../../data_temp/labeled_videos/2022-04-04T19:19:33.csv', '../../data_temp/labeled_videos/2022-04-04T19:24:10.csv']
train_files, test_files = use_tt_split(DATA_SAVE_PATH, test_files)
# train_files, test_files = make_tt_split(DATA_SAVE_PATH, seq_len = SEQUENCE_LENGTH)

train_dataset = VideoDataset(train_files, transforms=train_transforms, seq_len = SEQUENCE_LENGTH, base_path = PROCESSED_PATH)
val_dataset = VideoDataset(test_files, transforms=val_transforms, seq_len = SEQUENCE_LENGTH, base_path = PROCESSED_PATH)

sampler = sampler_(train_dataset.y)

train_args = dict(batch_size=BATCH, sampler = sampler, num_workers=2, pin_memory=True, drop_last=False) if cuda else dict(batch_size=BATCH, drop_last=False)
train_loader = DataLoader(train_dataset, **train_args)

val_args = dict(shuffle=False, batch_size=BATCH, num_workers=2, pin_memory=True, drop_last=False) if cuda else dict(shuffle=False, batch_size=BATCH, drop_last=False)
val_loader = DataLoader(val_dataset, **val_args)



True
['../../data_temp/labeled_videos/2022-04-04T18:47:26.csv', '../../data_temp/labeled_videos/2022-04-04T18:48:45.csv', '../../data_temp/labeled_videos/2022-04-04T18:58:12.csv', '../../data_temp/labeled_videos/2022-04-04T18:59:05.csv', '../../data_temp/labeled_videos/2022-04-04T19:04:56.csv', '../../data_temp/labeled_videos/2022-04-04T19:06:29.csv', '../../data_temp/labeled_videos/2022-04-04T19:07:55.csv', '../../data_temp/labeled_videos/2022-04-04T19:09:43.csv', '../../data_temp/labeled_videos/2022-04-04T19:10:28.csv', '../../data_temp/labeled_videos/2022-04-04T19:16:45.csv', '../../data_temp/labeled_videos/2022-04-04T19:17:50.csv', '../../data_temp/labeled_videos/2022-04-04T19:18:52.csv', '../../data_temp/labeled_videos/2022-04-04T19:21:41.csv', '../../data_temp/labeled_videos/2022-04-04T19:22:45.csv', '../../data_temp/labeled_videos/2022-04-04T19:23:24.csv', '../../data_temp/labeled_videos/2022-04-04T19:28:21.csv', '../../data_temp/labeled_videos/2022-04-04T19:28:40.csv', '../../d

In [16]:
print(len(train_dataset))
print(len(val_dataset))

1265
470


In [17]:
class ConvLSTMCell(nn.Module):

    def __init__(self, input_dim, hidden_dim, kernel_size, bias):
        """
        Initialize ConvLSTM cell.
        Parameters
        ----------
        input_dim: int
            Number of channels of input tensor.
        hidden_dim: int
            Number of channels of hidden state.
        kernel_size: (int, int)
            Size of the convolutional kernel.
        bias: bool
            Whether or not to add the bias.
        """

        super(ConvLSTMCell, self).__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.kernel_size = kernel_size
        self.padding = kernel_size[0] // 2, kernel_size[1] // 2
        self.bias = bias

        self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
                              out_channels=4 * self.hidden_dim,
                              kernel_size=self.kernel_size,
                              padding=self.padding,
                              bias=self.bias)
        
        for mod in self.modules():
            if isinstance(mod, nn.Conv2d):
                nn.init.kaiming_normal_(mod.weight, mode='fan_out', nonlinearity='sigmoid')
            # elif isinstance(mod, nn.BatchNorm2d):
            #     nn.init.constant_(mod.weight, 1)
            #     nn.init.constant_(mod.bias, 0)


    def forward(self, input_tensor, cur_state):
        h_cur, c_cur = cur_state

        combined = torch.cat([input_tensor, h_cur], dim=1)  # concatenate along channel axis

        combined_conv = self.conv(combined)
        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)

        return h_next, c_next

    def init_hidden(self, batch_size, image_size):
        height, width = image_size
        return (torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device),
                torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device))


class ConvLSTM(nn.Module):

    """
    Parameters:
        input_dim: Number of channels in input
        hidden_dim: Number of hidden channels
        kernel_size: Size of kernel in convolutions
        num_layers: Number of LSTM layers stacked on each other
        batch_first: Whether or not dimension 0 is the batch or not
        bias: Bias or no bias in Convolution
        return_all_layers: Return the list of computations for all layers
        Note: Will do same padding.
    Input:
        A tensor of size B, T, C, H, W or T, B, C, H, W
    Output:
        A tuple of two lists of length num_layers (or length 1 if return_all_layers is False).
            0 - layer_output_list is the list of lists of length T of each output
            1 - last_state_list is the list of last states
                    each element of the list is a tuple (h, c) for hidden state and memory
    Example:
        >> x = torch.rand((32, 10, 64, 128, 128))
        >> convlstm = ConvLSTM(64, 16, 3, 1, True, True, False)
        >> _, last_states = convlstm(x)
        >> h = last_states[0][0]  # 0 for layer index, 0 for h index
    """

    def __init__(self, input_dim, hidden_dim, kernel_size, num_layers,
                 batch_first=False, bias=True, return_all_layers=False):
        super(ConvLSTM, self).__init__()

        self._check_kernel_size_consistency(kernel_size)

        # Make sure that both `kernel_size` and `hidden_dim` are lists having len == num_layers
        kernel_size = self._extend_for_multilayer(kernel_size, num_layers)
        hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers)
        if not len(kernel_size) == len(hidden_dim) == num_layers:
            raise ValueError('Inconsistent list length.')

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.num_layers = num_layers
        self.batch_first = batch_first
        self.bias = bias
        self.return_all_layers = return_all_layers

        cell_list = []
        for i in range(0, self.num_layers):
            cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i - 1]

            cell_list.append(ConvLSTMCell(input_dim=cur_input_dim,
                                          hidden_dim=self.hidden_dim[i],
                                          kernel_size=self.kernel_size[i],
                                          bias=self.bias))

        self.cell_list = nn.ModuleList(cell_list)

    def forward(self, input_tensor, hidden_state=None):
        """
        Parameters
        ----------
        input_tensor: todo
            5-D Tensor either of shape (t, b, c, h, w) or (b, t, c, h, w)
        hidden_state: todo
            None. todo implement stateful
        Returns
        -------
        last_state_list, layer_output
        """
        if not self.batch_first:
            # (t, b, c, h, w) -> (b, t, c, h, w)
            input_tensor = input_tensor.permute(1, 0, 2, 3, 4)

        b, _, _, h, w = input_tensor.size()

        # Implement stateful ConvLSTM
        if hidden_state is not None:
            raise NotImplementedError()
        else:
            # Since the init is done in forward. Can send image size here
            hidden_state = self._init_hidden(batch_size=b,
                                             image_size=(h, w))

        layer_output_list = []
        last_state_list = []

        seq_len = input_tensor.size(1)
        cur_layer_input = input_tensor

        for layer_idx in range(self.num_layers):

            h, c = hidden_state[layer_idx]
            output_inner = []
            for t in range(seq_len):
                h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[:, t, :, :, :],
                                                 cur_state=[h, c])
                output_inner.append(h) #[batch_size, self.hidden_dim, height, width]

            layer_output = torch.stack(output_inner, dim=1) #[batch_size,t,self.hidden_dim, height, width]
            cur_layer_input = layer_output

            layer_output_list.append(layer_output)
            last_state_list.append([h, c])

        if not self.return_all_layers:
            layer_output_list = layer_output_list[-1:]
            last_state_list = last_state_list[-1:]

        return layer_output_list, last_state_list

    def _init_hidden(self, batch_size, image_size):
        init_states = []
        for i in range(self.num_layers):
            init_states.append(self.cell_list[i].init_hidden(batch_size, image_size))
        return init_states

    @staticmethod
    def _check_kernel_size_consistency(kernel_size):
        if not (isinstance(kernel_size, tuple) or
                (isinstance(kernel_size, list) and all([isinstance(elem, tuple) for elem in kernel_size]))):
            raise ValueError('`kernel_size` must be tuple or list of tuples')

    @staticmethod
    def _extend_for_multilayer(param, num_layers):
        if not isinstance(param, list):
            param = [param] * num_layers
        return param

In [18]:
class ConvLSTMModel(nn.Module):

    def __init__(self, input_dim, hidden_dim, kernel_size, num_layers,
                 batch_first=False, bias=True, return_all_layers=False, num_classes = 3):
        super(ConvLSTMModel, self).__init__()
        self.convlstm = ConvLSTM(input_dim, hidden_dim, kernel_size, num_layers,batch_first, bias, return_all_layers)
        self.linear = nn.Linear(hidden_dim * HEIGHT * WIDTH, num_classes)

    def forward(self, input_tensor, hidden_state=None):
      x,_ = self.convlstm(input_tensor)
      x = x[0]
      # print(x.shape)  # torch.Size([2, 8, 128, 256, 256]) batch, t, channels, h, w
      x = x[:,-1,:,:,:]  #pick the last timestamp output
      # print(x.shape)
      x = torch.flatten(x, start_dim=1) # change from start_dim=2 after adding the above line
      # print(x.shape)  	# torch.Size([2,8388608])

      x = self.linear(x) #op: [batch, num_classes] 
      return x


In [19]:
def save(model, index, acc, optim = False):
    if not os.path.exists(MODELS_PATHS+'/attempt11_1sec_prior_convlstm_new_data_3sec_prior_gps'):
        os.mkdir(MODELS_PATHS+'/attempt11_1sec_prior_convlstm_new_data_3sec_prior_gps')
    if(optim):
        torch.save(model.state_dict(), MODELS_PATHS+'/attempt11_1sec_prior_convlstm_new_data_3sec_prior_gps'+'/optimizer_params_{:08d}_acc={}.pth'.format(index,acc))
    else:
        torch.save(model.state_dict(), MODELS_PATHS+'/attempt11_1sec_prior_convlstm_new_data_3sec_prior_gps'+'/model_params_{:08d}_acc={}.pth'.format(index,acc))

In [22]:
lr = 0.008 #changed from 0.01
epochs = 50
lamda = 1e-2  #L2 regularization #changed from 1e-4
num_classes = 3
convlstm_hidden = 256
num_conv_lstm_layers = 1

model = ConvLSTMModel(CHANNELS+CONTEXT_CHANNELS,convlstm_hidden,(3,3),num_conv_lstm_layers,batch_first=True)
model.load_state_dict(torch.load('./models/attempt11_1sec_prior_convlstm_new_data_3sec_prior_gps/model_params_00000006_acc=50.0.pth'))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=lamda, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=lamda)
optimizer.load_state_dict(torch.load('./models/attempt11_1sec_prior_convlstm_new_data_3sec_prior_gps/optimizer_params_00000006_acc=50.0.pth'))

# for g in optimizer.param_groups:
#     g['lr'] = lr
#     g['weight_decay']= lamda
    
scaler = torch.cuda.amp.GradScaler()
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))
print(model)

ConvLSTMModel(
  (convlstm): ConvLSTM(
    (cell_list): ModuleList(
      (0): ConvLSTMCell(
        (conv): Conv2d(260, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
    )
  )
  (linear): Linear(in_features=4194304, out_features=3, bias=True)
)


In [23]:
for epoch in range(7,epochs):
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 

    num_correct = 0
    total_loss = 0
    
    for i, (x, y) in enumerate(train_loader):
       
        model.train()
        optimizer.zero_grad()

        x = x.float().to(device)
        y = y.to(device)
        
        with torch.cuda.amp.autocast():
            outputs = model(x)
            del x
            # loss = criterion(outputs.view(-1,num_classes), y.long().view(-1))
            loss = criterion(outputs, y.long())

        num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
        del outputs
        total_loss += float(loss)

        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * num_correct / ((i + 1) * BATCH)),
            loss="{:.04f}".format(float(total_loss / (i + 1))),
            num_correct=num_correct,
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        scaler.scale(loss).backward()
        scaler.step(optimizer) 
        scaler.update()

        scheduler.step()

        batch_bar.update() # Update tqdm bar
        

    batch_bar.close()
    acc = 100 * num_correct / (len(train_dataset))
    print("Epoch {}/{}: Train Acc {:.04f}%, Train Loss {:.04f}, Learning Rate {:.04f}".format(
        epoch + 1,
        epochs,
        acc,
        float(total_loss / len(train_loader)),
        float(optimizer.param_groups[0]['lr'])))
    
    # validation
    model.eval()
    val_num_correct = 0
    
    for i, (vx, vy) in tqdm(enumerate(val_loader)):
      
      vx = vx.to(device)
      vy = vy.to(device)

      with torch.no_grad():
          outputs = model(vx)
          del vx

      val_num_correct += int((torch.argmax(outputs, axis=1) == vy).sum())
      del outputs

    acc = 100 * val_num_correct / (len(val_dataset))
    print("Validation: {:.04f}%".format(acc))
    
    
    save(model, epoch, acc)
    save(optimizer, epoch,acc, optim=True)
    
    
batch_bar.close()

                                                                                                                        

Epoch 8/50: Train Acc 59.3676%, Train Loss 2698.5924, Learning Rate 0.0076


235it [02:40,  1.47it/s]


Validation: 19.3617%


                                                                                                                        

Epoch 9/50: Train Acc 63.4783%, Train Loss 1578.0679, Learning Rate 0.0076


235it [02:38,  1.49it/s]


Validation: 53.1915%


                                                                                                                        

Epoch 10/50: Train Acc 59.7628%, Train Loss 2005.5028, Learning Rate 0.0076


235it [02:38,  1.49it/s]


Validation: 39.1489%


                                                                                                                        

Epoch 11/50: Train Acc 66.5613%, Train Loss 1930.3819, Learning Rate 0.0075


235it [02:40,  1.46it/s]


Validation: 53.8298%


                                                                                                                        

Epoch 12/50: Train Acc 77.7075%, Train Loss 956.6614, Learning Rate 0.0074


235it [02:41,  1.46it/s]


Validation: 50.2128%


                                                                                                                        

Epoch 13/50: Train Acc 74.6245%, Train Loss 988.0241, Learning Rate 0.0074


235it [02:41,  1.45it/s]


Validation: 40.0000%


                                                                                                                        

Epoch 14/50: Train Acc 81.6601%, Train Loss 643.0930, Learning Rate 0.0073


235it [02:42,  1.45it/s]


Validation: 36.8085%


                                                                                                                        

Epoch 15/50: Train Acc 79.6838%, Train Loss 619.8096, Learning Rate 0.0071


235it [02:41,  1.46it/s]


Validation: 37.4468%


                                                                                                                        

Epoch 16/50: Train Acc 77.3913%, Train Loss 649.9733, Learning Rate 0.0070


235it [02:42,  1.45it/s]


Validation: 42.3404%


                                                                                                                        

Epoch 17/50: Train Acc 81.1067%, Train Loss 485.9408, Learning Rate 0.0069


235it [02:40,  1.46it/s]


Validation: 35.7447%


                                                                                                                        

Epoch 18/50: Train Acc 77.7866%, Train Loss 708.3089, Learning Rate 0.0067


235it [02:38,  1.48it/s]


Validation: 44.0426%


                                                                                                                        

Epoch 19/50: Train Acc 75.1779%, Train Loss 830.3116, Learning Rate 0.0066


235it [02:39,  1.47it/s]


Validation: 28.9362%


                                                                                                                        

Epoch 20/50: Train Acc 61.8182%, Train Loss 1683.7948, Learning Rate 0.0064


235it [02:38,  1.48it/s]


Validation: 43.1915%


                                                                                                                        

Epoch 21/50: Train Acc 73.9921%, Train Loss 751.1762, Learning Rate 0.0062


235it [02:38,  1.48it/s]


Validation: 44.8936%


                                                                                                                        

Epoch 22/50: Train Acc 78.7352%, Train Loss 481.1679, Learning Rate 0.0060


235it [02:37,  1.49it/s]


Validation: 45.7447%


                                                                                                                        

Epoch 23/50: Train Acc 81.4229%, Train Loss 475.5321, Learning Rate 0.0059


235it [02:38,  1.48it/s]


Validation: 41.2766%


                                                                                                                        

Epoch 24/50: Train Acc 83.1621%, Train Loss 356.8769, Learning Rate 0.0056


235it [02:39,  1.47it/s]


Validation: 47.4468%


                                                                                                                        

Epoch 25/50: Train Acc 83.6364%, Train Loss 314.4207, Learning Rate 0.0054


235it [02:37,  1.49it/s]


Validation: 41.7021%


                                                                                                                        

Epoch 26/50: Train Acc 67.5889%, Train Loss 1181.9481, Learning Rate 0.0052


235it [02:38,  1.48it/s]


Validation: 47.4468%


                                                                                                                        

Epoch 27/50: Train Acc 76.5217%, Train Loss 606.6721, Learning Rate 0.0050


235it [02:39,  1.47it/s]


Validation: 27.4468%


                                                                                                                        

Epoch 28/50: Train Acc 83.7154%, Train Loss 383.3983, Learning Rate 0.0048


235it [02:41,  1.46it/s]


Validation: 54.2553%


                                                                                                                        

Epoch 29/50: Train Acc 80.9486%, Train Loss 379.0868, Learning Rate 0.0045


235it [02:37,  1.49it/s]


Validation: 44.4681%


                                                                                                                        

Epoch 30/50: Train Acc 78.1818%, Train Loss 488.6598, Learning Rate 0.0043


235it [02:37,  1.49it/s]


Validation: 46.8085%


                                                                                                                        

Epoch 31/50: Train Acc 79.2095%, Train Loss 460.2602, Learning Rate 0.0040


235it [02:41,  1.45it/s]


Validation: 48.2979%


                                                                                                                        

Epoch 32/50: Train Acc 83.2411%, Train Loss 306.6646, Learning Rate 0.0038


235it [02:41,  1.46it/s]


Validation: 32.3404%


                                                                                                                        

Epoch 33/50: Train Acc 82.6087%, Train Loss 406.6773, Learning Rate 0.0036


235it [02:38,  1.49it/s]


Validation: 36.5957%


                                                                                                                        

Epoch 34/50: Train Acc 87.7470%, Train Loss 210.7678, Learning Rate 0.0033


235it [02:38,  1.48it/s]


Validation: 51.2766%


                                                                                                                        

Epoch 35/50: Train Acc 86.4822%, Train Loss 225.4317, Learning Rate 0.0031


235it [02:37,  1.49it/s]


Validation: 53.6170%


                                                                                                                        

Epoch 36/50: Train Acc 87.7470%, Train Loss 214.9265, Learning Rate 0.0029


235it [02:40,  1.47it/s]


Validation: 40.8511%


                                                                                                                        

Epoch 37/50: Train Acc 84.0316%, Train Loss 213.3020, Learning Rate 0.0026


235it [02:38,  1.48it/s]


Validation: 39.3617%


                                                                                                                        

Epoch 38/50: Train Acc 88.3004%, Train Loss 188.9890, Learning Rate 0.0024


235it [02:38,  1.48it/s]


Validation: 47.2340%


                                                                                                                        

Epoch 39/50: Train Acc 89.2490%, Train Loss 133.0325, Learning Rate 0.0022


235it [02:40,  1.47it/s]


Validation: 48.7234%


                                                                                                                        

Epoch 40/50: Train Acc 88.7747%, Train Loss 120.8571, Learning Rate 0.0020


235it [02:34,  1.52it/s]


Validation: 38.9362%


                                                                                                                        

Epoch 41/50: Train Acc 89.5652%, Train Loss 114.9701, Learning Rate 0.0018


235it [02:40,  1.46it/s]


Validation: 47.4468%


                                                                                                                        

Epoch 42/50: Train Acc 94.7826%, Train Loss 45.6650, Learning Rate 0.0016


235it [02:38,  1.48it/s]


Validation: 40.2128%


                                                                                                                        

Epoch 43/50: Train Acc 93.8340%, Train Loss 57.7543, Learning Rate 0.0014


235it [02:38,  1.48it/s]


Validation: 43.8298%


                                                                                                                        

Epoch 44/50: Train Acc 93.4387%, Train Loss 60.8694, Learning Rate 0.0012


235it [02:40,  1.46it/s]


Validation: 37.6596%


                                                                                                                        

Epoch 45/50: Train Acc 95.2569%, Train Loss 28.7116, Learning Rate 0.0010


235it [02:41,  1.46it/s]


Validation: 47.4468%


                                                                                                                        

Epoch 46/50: Train Acc 95.0198%, Train Loss 28.9360, Learning Rate 0.0009


235it [02:39,  1.47it/s]


Validation: 37.4468%


                                                                                                                        

Epoch 47/50: Train Acc 96.6008%, Train Loss 21.7528, Learning Rate 0.0007


235it [02:38,  1.48it/s]


Validation: 47.4468%


                                                                                                                        

Epoch 48/50: Train Acc 98.3399%, Train Loss 9.4489, Learning Rate 0.0006


235it [02:38,  1.48it/s]


Validation: 45.5319%


                                                                                                                        

Epoch 49/50: Train Acc 98.2609%, Train Loss 7.6866, Learning Rate 0.0005


235it [02:38,  1.49it/s]


Validation: 45.3191%


                                                                                                                        

Epoch 50/50: Train Acc 97.8656%, Train Loss 5.8826, Learning Rate 0.0004


235it [02:41,  1.45it/s]


Validation: 44.2553%


In [None]:

def prep_test_data(filename, seq_len):
    X = []
    y = []
    
    print(filename)
    df = pd.read_csv(filename)
    print(len(df))
    frames = df['frames'].to_numpy()
    labels = df['labels'].to_numpy()
    for i in range(0,len(df)-seq_len, seq_len):
        X.append(frames[i:i+seq_len])
        y.append(labels[i:i+seq_len])
   
    X = np.stack(X, axis = 0)
    y = np.stack(y, axis = 0)
    print(X.shape)
    print(y.shape)
    a,b = X.shape
    return X,y, df[:a*b]

def validate_test(test_loader, test_dataset, model):
    model.eval()
    predictions = []
    val_num_correct = 0

    
    for i, (vx, vy) in tqdm(enumerate(test_loader)):

        vx = vx.to(device)
        vy = vy.to(device)

        with torch.no_grad():
            outputs = model(vx)
            del vx

        preds = torch.argmax(outputs, axis=2)
        # print(preds.shape)
        predictions.append(preds.cpu().detach().numpy())
        val_num_correct += int((preds == vy).sum())
        del outputs

        # val_num_correct += int((torch.argmax(outputs, axis=2) == vy).sum())
        # del outputs
    print(len(predictions))
    predictions = np.concatenate(predictions, axis = 0)
    print(predictions.shape)
    predictions = predictions.flatten()
    print(predictions.shape)
    acc = (100 * val_num_correct / (len(test_dataset) * SEQUENCE_LENGTH))
    print("Validation: {:.04f}%".format(acc))
    return predictions, acc


In [None]:
num_classes = 3
convlstm_hidden = 128
num_conv_lstm_layers = 2
BATCH = 2

model = ConvLSTMModel(CHANNELS,convlstm_hidden,(3,3),num_conv_lstm_layers,True)
model.load_state_dict(torch.load('./models/attempt3_1sec_prior/model_params_00000013.pth'))
model = model.to(device)

cuda = torch.cuda.is_available()
print(cuda)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# train_transforms = [ttf.ToTensor(), transforms.Resize((HEIGHT, WIDTH)), transforms.ColorJitter(), transforms.RandomRotation(10), transforms.GaussianBlur(3)]
val_transforms = transforms.Compose([transforms.ToTensor(), transforms.Resize((HEIGHT, WIDTH))])
# f = {'../../data/videos/walking_data_2.csv',
#  '../../data/videos/walking_data_5.csv'}

for f in os.listdir(DATA_SAVE_PATH):    
    X, y, df = prep_test_data(osp.join(DATA_SAVE_PATH,f), seq_len = SEQUENCE_LENGTH)
    test_dataset = VideoDataset(X, y, transforms=val_transforms, seq_len = SEQUENCE_LENGTH, base_path = "../../data_temp/processed/")

    test_args = dict(shuffle=False, batch_size=BATCH, num_workers=2, pin_memory=True, drop_last=False) if cuda else dict(shuffle=False, batch_size=BATCH, drop_last=False)
    test_loader = DataLoader(test_dataset, **test_args)
    
    predictions, acc = validate_test(test_loader, test_dataset, model)
    df['predictions'] = predictions
    print(df.head())
    df.to_csv("convlstm_predictions_{}_{}.csv".format(fn,acc), index=None)



In [None]:
# """
#   Preprocess video data.
# """
# def label_map(lab):
#     if(lab == 0):
#         return 2
#     elif(lab == -1):
#         return 0
#     else:
#         return 1
    
# def get_all_files_from_dir(directory, vids = False):
#     file_paths = []
#     print(directory)
#     try:
#         for root, dirs, files in os.walk(directory):
#             # print(files)
#             if(vids):
#                 file_paths += [os.path.join(root, x,x+".mp4") for x in dirs]
#             else:
#                 file_paths += [os.path.join(root, x) for x in files]
#         return sorted(file_paths)
#     except Exception as e:
#         print(e)
    
# def turn_high_labeled(vid_context, yy):
#     start = -10
#     if(len(vid_context)<10):
#         start = 0
    
#     for s in vid_context[start:]:
#         if(s==yy):
#             return True
#     return False

# def process_video(video_file, labels):
#     video_filename = video_file.split('/')[-1].split('.')[0]
#     vidcap = cv2.VideoCapture(video_file)

#     ctr = 0
#     video_frames = []
#     video_context = []
#     video_labels = []
    
#     hasFrames,image = vidcap.read()
#     tot_frames = 0
#     while (hasFrames):
#         tot_frames += 1
#         save_file_name = video_filename + "_" + str(ctr) + ".npy"
        
#         vid_pos = vidcap.get(cv2.CAP_PROP_POS_MSEC)
#         label_ts = vid_pos + 1000 #take 1 sec ahead labels 
#         label_ts = label_ts - (label_ts%100)
#         if(label_ts not in labels.keys()):
#             print(label_ts)
#             hasFrames,image = vidcap.read()
#             continue

#         image = cv2.resize(image, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA)
#         np.save(osp.join(PROCESSED_PATH, save_file_name), image)  
#         video_frames.append(save_file_name)

#         label = labels[label_ts]
#         video_labels.append(label)
        
#         context_ts = vid_pos + 3000 #take 1 sec ahead labels 
#         context_ts = context_ts - (context_ts%100) + 100
        
#         if(context_ts in labels.keys() and not turn_high_labeled(video_context,labels[context_ts])):
#             video_context.append(labels[context_ts])
#         else:
#             video_context.append(2) #Default: FRONT == 2
        
        
#         hasFrames,image = vidcap.read()
#         ctr += 1
        
#     df = pd.DataFrame({'frames': video_frames, 'gps': video_context, 'labels': video_labels})
#     df.to_csv(osp.join(DATA_SAVE_PATH,video_filename+".csv"), index=None)

#     print("After processing:")
#     print("Total frames: ",tot_frames)
#     print("Number of frames labelled: ", ctr)
    
# def preprocess():
#     f = np.load(LABEL_FILE, allow_pickle = True)
#     # print(f.keys())
#     for video_file in get_all_files_from_dir(VID_PATH):
#         video_filename = video_file.split('/')[-1].split('.')[0]
#         print(video_filename)
#         if(video_filename+".csv" not in os.listdir(DATA_SAVE_PATH)):
#             labels = f[video_filename]['Sensor']['direction_label']['direction']
           
#             for k,v in labels.items():
#                 labels[k] = label_map(v)
                
#             process_video(video_file, labels)
#             print("Finished processing ", video_file)
        
# def process_videos(vid_path = VID_PATH_OG):
#     fp = get_all_files_from_dir(vid_path, vids=True)
#     print(fp)
#     for fl in fp:
#         video_filename = fl.split('/')[-1]
#         if(video_filename not in os.listdir(VID_PATH)):
#             ffmpeg.input(fl).filter('fps', fps=FPS, round='up').output(VID_PATH+video_filename).run() 