In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torchvision  
import subprocess
import cv2
import ffmpeg
import os
import os.path as osp
import torchvision.models as models

from tqdm import tqdm
import numpy as np
from sklearn.model_selection import train_test_split
from torchvision import transforms
import matplotlib.pyplot as plt

In [9]:
VID_PATH = "../../data/extracted_videos/"
VID_PATH_OG = "../../data/videos_new/"
LABEL_FILE = "../../data/data.npy"
PROCESSED_PATH = "../../data_temp/processed/"
DATA_SAVE_PATH = "../../data_temp/labeled_videos/"
MODELS_PATHS = "./models/"
# LABEL_PATH = '../../data/labels/'
FRAME_RATE = 2

In [10]:
"""
  Preprocess video data.
"""
def label_map(lab):
    if(lab == 0):
        return 2
    elif(lab == -1):
        return 0
    else:
        return 1
    
def get_all_files_from_dir(directory, vids = False):
    file_paths = []
    print(directory)
    try:
        for root, dirs, files in os.walk(directory):
            # print(files)
            if(vids):
                file_paths += [os.path.join(root, x,x+".mp4") for x in dirs]
            else:
                file_paths += [os.path.join(root, x) for x in files]
        return sorted(file_paths)
    except Exception as e:
        print(e)
    
def process_video(video_file, labels):
    video_filename = video_file.split('/')[-1].split('.')[0]
    vidcap = cv2.VideoCapture(video_file)

    ctr = 0
    video_frames = []
    video_labels = []
    
    hasFrames,image = vidcap.read()

    while (hasFrames):
        save_file_name = video_filename + "_" + str(ctr) + ".npy"
        np.save(osp.join(PROCESSED_PATH, save_file_name), image)  
        label_ts = vidcap.get(cv2.CAP_PROP_POS_MSEC) + 1000 #take 1 sec ahead labels 
        label_ts = label_ts - (label_ts%100)
        if(label_ts not in labels.keys()):
            print(label_ts)
            hasFrames,image = vidcap.read()
            continue
        label = labels[label_ts]
        video_labels.append(label_map(label))
        video_frames.append(save_file_name)
        hasFrames,image = vidcap.read()
        ctr += 1
        
    df = pd.DataFrame({'frames': video_frames, 'labels': video_labels})
    df.to_csv(osp.join(DATA_SAVE_PATH,video_filename+".csv"), index=None)

    print("After processing:")
    print("Number of frames labelled: ", ctr)
    
def preprocess():
    f = np.load(LABEL_FILE, allow_pickle = True)
    for video_file in get_all_files_from_dir(VID_PATH):
        video_filename = video_file.split('/')[-1].split('.')[0]
        labels = f[video_filename]['Sensor']['direction_label']['direction']
        process_video(video_file, labels)
        print("Finished processing ", video_file)
        
def process_videos():
    fp = get_all_files_from_dir(VID_PATH_OG, vids=True)
    print(fp)
    for fl in fp:
        video_filename = fl.split('/')[-1]
        ffmpeg.input(fl).filter('fps', fps=10, round='up').output(VID_PATH+video_filename).run() 

In [11]:
### preprocess videos
# process_videos()
preprocess()

../../data/extracted_videos/
16200.0
16300.0
16400.0
16500.0
16600.0
16700.0
16800.0
16900.0
17000.0
17100.0
After processing:
Number of frames labelled:  152
Finished processing  ../../data/extracted_videos/2022-04-04T16:06:23.406Z.mp4
27400.0
27500.0
27600.0
27700.0
27800.0
27900.0
28000.0
28100.0
28200.0
28300.0
After processing:
Number of frames labelled:  264
Finished processing  ../../data/extracted_videos/2022-04-04T16:07:08.854Z.mp4
18200.0
18300.0
18400.0
18500.0
18600.0
18700.0
18800.0
18900.0
19000.0
19100.0
19200.0
After processing:
Number of frames labelled:  172
Finished processing  ../../data/extracted_videos/2022-04-04T16:07:57.983Z.mp4
51800.0
51900.0
52000.0
52100.0
52200.0
52300.0
52400.0
52500.0
52600.0
52700.0
52800.0
After processing:
Number of frames labelled:  508
Finished processing  ../../data/extracted_videos/2022-04-04T16:08:26.648Z.mp4
14300.0
14400.0
14500.0
14600.0
14700.0
14800.0
14900.0
15000.0
15100.0
15200.0
15300.0
After processing:
Number of frames 

In [13]:
BATCH = 64
SEQUENCE_LENGTH = 10
HEIGHT = 128
WIDTH = 128
CHANNELS = 3

In [14]:
def save(model, index, optim = False):
    if not os.path.exists(MODELS_PATHS+'/attempt_7_frames_resnet34_new_data'):
        os.mkdir(MODELS_PATHS+'/attempt_7_frames_resnet34_new_data')
    if(optim):
        torch.save(model.state_dict(), MODELS_PATHS+'/attempt_7_frames_resnet34_new_data'+'/optimizer_params_{:08d}.pth'.format(index))
    else:
        torch.save(model.state_dict(), MODELS_PATHS+'/attempt_7_frames_resnet34_new_data'+'/model_params_{:08d}.pth'.format(index))

In [15]:
class ResNet18(nn.Module):
    """
    Container for ResNet50 s.t. it can be used for metric learning.
    The Network has been broken down to allow for higher modularity, if one wishes
    to target specific layers/blocks directly.
    """

    def __init__(self, fixconvs=False, pretrained=True):
        super(ResNet18, self).__init__()
        self.model = models.resnet18(pretrained=pretrained)
        if fixconvs:
            for param in self.model.parameters():
                param.requires_grad = False

        self.regressor = nn.Linear(self.model.fc.in_features, 3)
        self.dropout = torch.nn.Dropout(p=0.05)
        self.model = torch.nn.Sequential(*(list(self.model.children())[:-1]))
        # model.fc.weight.requires_grad = True
        # model.fc.bias.requires_grad = True

    def forward(self, x):
        x = self.model(x)
        x = torch.squeeze(x)
        x = self.dropout(x)
        x = self.regressor(x)
        return x

In [16]:
class FrameDataset(Dataset):
    def __init__(self, x, y, transforms, base_path):
        self.transforms = transforms
        self.X = x
        self.y = y
        # self.seq_len = seq_len
        self.base_path = base_path
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        seq_filename = self.X[idx]
        try:
            frame = np.load(osp.join(self.base_path,seq_filename), allow_pickle=True)
            frame = (frame - frame.min())/(frame.max() - frame.min())
            frame = self.transforms(frame)
            
        except Exception as ex:
            print("Error occured while loading frame: ", ex)
            frame = torch.zeros((CHANNELS, HEIGHT, WIDTH))
        
        return frame, self.y[idx]
        

In [20]:
def make_tt_split(data_folder):
    X = []
    y = []
    
    for filename in os.listdir(data_folder):
        if(filename[-3:]=="csv"):
            df = pd.read_csv(osp.join(data_folder,filename))
            X.append(df['frames'])
            y.append(df['labels'])
    
    X = pd.concat(X)
    # print(X.head())
    X.reset_index(drop=True,inplace=True)
    X = X.to_numpy()

    
    y = pd.concat(y)
    y.reset_index(drop=True,inplace=True)
    y = y.to_numpy()
            
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    return X_train, X_test, y_train, y_test


In [21]:
cuda = torch.cuda.is_available()
print(cuda)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# train_transforms = [ttf.ToTensor(), transforms.Resize((HEIGHT, WIDTH)), transforms.ColorJitter(), transforms.RandomRotation(10), transforms.GaussianBlur(3)]
train_transforms = transforms.Compose([transforms.ToTensor(), transforms.Resize((HEIGHT, WIDTH))])
val_transforms = transforms.Compose([transforms.ToTensor(), transforms.Resize((HEIGHT, WIDTH))])

X_train, X_test, y_train, y_test = make_tt_split(DATA_SAVE_PATH)
train_dataset = FrameDataset(X_train, y_train, transforms=train_transforms, base_path = PROCESSED_PATH)
val_dataset = FrameDataset(X_test, y_test, transforms=val_transforms, base_path = PROCESSED_PATH)

train_args = dict(shuffle=True, batch_size=BATCH, num_workers=1, pin_memory=True, drop_last=False) if cuda else dict(shuffle=True, batch_size=BATCH, drop_last=False)
train_loader = DataLoader(train_dataset, **train_args)

val_args = dict(shuffle=False, batch_size=BATCH, num_workers=2, pin_memory=True, drop_last=False) if cuda else dict(shuffle=False, batch_size=BATCH, drop_last=False)
val_loader = DataLoader(val_dataset, **val_args)



True


In [22]:
print(len(train_dataset))
print(len(val_dataset))

3455
864


In [23]:
def validate(val_loader, val_dataset, model):
    # validation
    model.eval()
    val_num_correct = 0
   
    for i, (vx, vy) in enumerate(val_loader):
      
        vx = vx.float().to(device)
        vy = vy.to(device)

        with torch.no_grad():
            outputs = model(vx)
            del vx

        val_num_correct += int((torch.argmax(outputs, axis=1) == vy).sum())
        del outputs
        # break
    

    print("Validation: {:.04f}%".format(100 * val_num_correct / (len(val_dataset))))


In [24]:
lr = 0.005 #changed from 0.01
epochs = 25
lamda = 1e-3  #L2 regularization #changed from 1e-4
num_classes = 3
convlstm_hidden = 128
num_conv_lstm_layers = 2

model = ResNet18()
model.load_state_dict(torch.load('./models/attempt_7_frames_resnet34_new_data/model_params_00000003.pth'))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=lamda, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=lamda)
optimizer.load_state_dict(torch.load('./models/attempt_7_frames_resnet34_new_data/optimizer_params_00000003.pth'))

for g in optimizer.param_groups:
    g['lr'] = lr
    # g['weight_decay']= lamda
    
scaler = torch.cuda.amp.GradScaler()
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))
# print(model)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/ubuntu/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [None]:
for epoch in range(4,epochs):
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 

    num_correct = 0
    total_loss = 0
    
    for i, (x, y) in enumerate(train_loader):
       
        model.train()
        optimizer.zero_grad()

        x = x.float().to(device)
        y = y.to(device)
        
        with torch.cuda.amp.autocast():
            outputs = model(x)
            del x
            loss = criterion(outputs.view(-1,num_classes), y.long().view(-1))

        # print(outputs.shape)
        num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
        del outputs
        total_loss += float(loss)

        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * num_correct / ((i + 1) * BATCH)),
            loss="{:.04f}".format(float(total_loss / (i + 1))),
            num_correct=num_correct,
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        scaler.scale(loss).backward()
        scaler.step(optimizer) 
        scaler.update()

        scheduler.step()

        batch_bar.update() # Update tqdm bar
        # break
        

    batch_bar.close()

    print("Epoch {}/{}: Train Acc {:.04f}%, Train Loss {:.04f}, Learning Rate {:.04f}".format(
        epoch + 1,
        epochs,
        100 * num_correct / (len(train_dataset)),
        float(total_loss / len(train_loader)),
        float(optimizer.param_groups[0]['lr'])))
    
    save(model, epoch)
    save(optimizer, epoch, optim=True)
    
    validate(val_loader, val_dataset, model)
    
batch_bar.close()

                                                                                                                                       

Epoch 5/25: Train Acc 80.1737%, Train Loss 0.5839, Learning Rate 0.0050
Validation: 80.3241%


                                                                                                                                       

Epoch 6/25: Train Acc 81.0999%, Train Loss 0.5218, Learning Rate 0.0049
Validation: 81.2500%


                                                                                                                                       

Epoch 7/25: Train Acc 81.2446%, Train Loss 0.5001, Learning Rate 0.0048
Validation: 54.5139%


                                                                                                                                       

Epoch 8/25: Train Acc 83.7337%, Train Loss 0.4287, Learning Rate 0.0047
Validation: 82.4074%


                                                                                                                                       

Epoch 9/25: Train Acc 83.9942%, Train Loss 0.4207, Learning Rate 0.0045
Validation: 74.7685%


                                                                                                                                       

Epoch 10/25: Train Acc 86.1360%, Train Loss 0.3628, Learning Rate 0.0043
Validation: 80.9028%


Train:  98%|██████████████████████████████████▎| 53/54 [03:55<00:04,  4.48s/it, acc=87.5000%, loss=0.3299, lr=0.0041, num_correct=2968]