In [2]:
import sys
import os
os.chdir('/scratch/nhl256/dl_project/code/')

import random
import argparse

import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

from data_helper import LabeledDataset
from helper import *

import math
import time

import utils

In [3]:
image_folder = 'data/data'
annotation_csv = 'data/data/annotation.csv'


cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda else "cpu")
print(device)

cuda:0


In [30]:
def get_transform(): 
    return torchvision.transforms.ToTensor()

In [39]:
labeled_scene_index = np.arange(120, 134)
labeled_trainset = LabeledDataset(
    image_folder=image_folder,
    annotation_file=annotation_csv,
    scene_index=labeled_scene_index,
    transform=get_transform(),
    extra_info=False
    )
dataloader = torch.utils.data.DataLoader(
    labeled_trainset,
    batch_size=1,
    shuffle=False,
    num_workers=4
    )
trainloader = torch.utils.data.DataLoader(labeled_trainset,
                                          batch_size=2,
                                          shuffle=True,
                                          num_workers=2,
                                          collate_fn=collate_fn)

In [49]:
train_sample, train_target, train_road_image = train_data
train_sample = torch.stack(train_sample)
train_sample = train_sample.cuda()

In [50]:
train_sample.shape

torch.Size([2, 6, 3, 256, 306])

### Test for 1 sample of batch size 1


In [74]:
sample, target, road_image = next(iter(dataloader))
sample = sample.cuda()

In [75]:
sample.shape

torch.Size([1, 6, 3, 256, 306])

In [77]:
def extract_features(one_sample):
    feature_extractor = torchvision.models.resnet18(pretrained=False)
    feature_extractor = nn.Sequential(*list(feature_extractor.children())[:-2])
    feature_extractor.cuda()
    return feature_extractor(one_sample)

def concat_features(features, dim = 2):
    #dim 0 ==> stacking the images in the channel dimension
    #dim 1 ==> stacking the images in row dimension
    #dim 2 ==> stacking the images in column dimension
    tensor_tuples = torch.unbind(features, dim=0)
    concatenated_fm = torch.cat(tensor_tuples, dim=dim)
    return concatenated_fm 

def prepare_inputs(samples):
    """
    Input: samples is a cuda tensor with size [batch_size, 6, 3, 256, 306]
    Output: a list of batch_size tensor, each tensor with size [512, 16, 114]
    """
    batchsize = samples.shape[0]
    fe_batch = []
    for i in range(batchsize):
        image_tensor = samples[i]
        features = extract_features(image_tensor)
        #print(features.shape)
        features = concat_features(features)
        features = features.view(3, 512, 160)
        #print(features.shape)
        fe_batch.append(features)
    
    return fe_batch

# Need to convert this to 
def reorder_coord(pred_bboxes):
    xmin, ymin, xmax, ymax = pred_bboxes.unbind(1)
    return torch.stack((xmax, xmax, xmin, xmin, ymax, ymin, ymax, ymin), dim=1).view(-1, 2, 4)


In [83]:
inputs = prepare_inputs(sample)

In [92]:
len(inputs)

1

## Get Model

In [4]:

import math
import time

import utils

In [5]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)

In [86]:
model_path = '/scratch/nhl256/dl_project/model/object_detection_resnet18_epoch2.pth'
model.load_state_dict(torch.load(model_path))
model.eval()

In [129]:
def get_bounding_boxes(samples):
    # samples is a cuda tensor with size [batch_size, 6, 3, 256, 306]
    # You need to return a tuple with size 'batch_size' and each element is a cuda tensor [N, 2, 4]
    # where N is the number of object
    inputs = prepare_inputs(samples)
    predictions = model(inputs)
    res = []
    for i in range(len(predictions)):
        prediction = predictions[i]
        pred_bboxes = prediction['boxes']
        reorder_pred_bboxes = reorder_coord(pred_bboxes)
        res.append(reorder_pred_bboxes)
        
    return res

In [138]:
# test function get_bounding_boxes
res = get_bounding_boxes(sample)
train_res = get_bounding_boxes(train_sample)

In [136]:
res[0].shape

torch.Size([60, 2, 4])

In [142]:
print(len(train_res))
for i in range(len(train_res)):
    print(train_res[i].shape)

2
torch.Size([58, 2, 4])
torch.Size([73, 2, 4])


## model_loader.py

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

import math
import time

import utils

In [11]:
def extract_features(one_sample):
    feature_extractor = torchvision.models.resnet18(pretrained=False)
    feature_extractor = nn.Sequential(*list(feature_extractor.children())[:-2])
    feature_extractor.cuda()
    return feature_extractor(one_sample)

def concat_features(features, dim = 2):
    #dim 0 ==> stacking the images in the channel dimension
    #dim 1 ==> stacking the images in row dimension
    #dim 2 ==> stacking the images in column dimension
    tensor_tuples = torch.unbind(features, dim=0)
    concatenated_fm = torch.cat(tensor_tuples, dim=dim)
    return concatenated_fm 

def prepare_inputs(samples):
    """
    Input: samples is a cuda tensor with size [batch_size, 6, 3, 256, 306]
    Output: a list of batch_size tensor, each tensor with size [512, 16, 114]
    """
    batchsize = samples.shape[0]
    fe_batch = []
    for i in range(batchsize):
        image_tensor = samples[i]
        features = extract_features(image_tensor)
        #print(features.shape)
        features = concat_features(features)
        features = features.view(3, 512, 160)
        #print(features.shape)
        fe_batch.append(features)
    
    return fe_batch

# Need to convert this to 
def reorder_coord(pred_bboxes):
    xmin, ymin, xmax, ymax = pred_bboxes.unbind(1)
    return torch.stack((xmax, xmax, xmin, xmin, ymax, ymin, ymax, ymin), dim=1).view(-1, 2, 4)


In [12]:
def get_transform(): 
    return torchvision.transforms.ToTensor()

In [19]:
class ModelLoader():
    # Fill the information for your team
    team_name = 'team_name'
    round_number = 1
    team_member = []
    contact_email = '@nyu.edu'

    def __init__(self, model_file='put_your_model_file_name_here'):
        self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        self.model = model.to(device)
        #self.model_path = '/scratch/nhl256/dl_project/model/object_detection_resnet18_epoch2.pth'
        self.model.load_state_dict(torch.load(model_file))
        self.model.eval()

    def get_bounding_boxes(self, samples):
        # samples is a cuda tensor with size [batch_size, 6, 3, 256, 306]
        # You need to return a tuple with size 'batch_size' and each element is a cuda tensor [N, 2, 4]
        # where N is the number of object
        inputs = prepare_inputs(samples)
        predictions = self.model(inputs)
        res = []
        for i in range(len(predictions)):
            prediction = predictions[i]
            pred_bboxes = prediction['boxes']
            reorder_pred_bboxes = reorder_coord(pred_bboxes)
            res.append(reorder_pred_bboxes)

        return res

    def get_binary_road_map(self, samples):
        # samples is a cuda tensor with size [batch_size, 6, 3, 256, 306]
        # You need to return a cuda tensor with size [batch_size, 800, 800] 
        
        return torch.rand(1, 800, 800) > 0.5

In [144]:
# model_loader = ModelLoader()
# predicted_bounding_boxes = model_loader.get_bounding_boxes(sample)[0].cpu()
# ats_bounding_boxes = compute_ats_bounding_boxes(predicted_bounding_boxes,
#                                                 target['bounding_box'][0])

## Test for the whole dataloader

In [20]:
labeled_scene_index = np.arange(120, 134)
labeled_trainset = LabeledDataset(
    image_folder=image_folder,
    annotation_file=annotation_csv,
    scene_index=labeled_scene_index,
    transform=get_transform(),
    extra_info=False
    )
dataloader = torch.utils.data.DataLoader(
    labeled_trainset,
    batch_size=1,
    shuffle=False,
    num_workers=4
    )

model_path = '/scratch/nhl256/dl_project/model/object_detection_resnet18_epoch2.pth'
model_loader = ModelLoader(model_path)


In [21]:
print('number of samples in dataloader is {}'.format(dataloader.__len__()))

number of samples in dataloader is 1764


In [22]:
total = 0
total_ats_bounding_boxes = 0
total_ts_road_map = 0
for i, data in enumerate(dataloader):
    total += 1
    sample, target, road_image = data
    sample = sample.cuda()

    predicted_bounding_boxes = model_loader.get_bounding_boxes(sample)[0].cpu()
    print(predicted_bounding_boxes.shape)

    ats_bounding_boxes = compute_ats_bounding_boxes(predicted_bounding_boxes,
                                                    target['bounding_box'][0])

    total_ats_bounding_boxes += ats_bounding_boxes

torch.Size([70, 2, 4])
torch.Size([67, 2, 4])
torch.Size([64, 2, 4])
torch.Size([64, 2, 4])
torch.Size([70, 2, 4])
torch.Size([68, 2, 4])
torch.Size([65, 2, 4])
torch.Size([67, 2, 4])
torch.Size([59, 2, 4])
torch.Size([64, 2, 4])
torch.Size([58, 2, 4])
torch.Size([64, 2, 4])
torch.Size([59, 2, 4])
torch.Size([71, 2, 4])
torch.Size([70, 2, 4])
torch.Size([64, 2, 4])
torch.Size([66, 2, 4])
torch.Size([69, 2, 4])
torch.Size([60, 2, 4])
torch.Size([72, 2, 4])
torch.Size([64, 2, 4])
torch.Size([66, 2, 4])
torch.Size([68, 2, 4])
torch.Size([63, 2, 4])
torch.Size([63, 2, 4])
torch.Size([67, 2, 4])
torch.Size([70, 2, 4])
torch.Size([65, 2, 4])
torch.Size([72, 2, 4])
torch.Size([68, 2, 4])
torch.Size([69, 2, 4])
torch.Size([65, 2, 4])
torch.Size([71, 2, 4])
torch.Size([70, 2, 4])
torch.Size([64, 2, 4])
torch.Size([69, 2, 4])
torch.Size([62, 2, 4])
torch.Size([76, 2, 4])
torch.Size([72, 2, 4])
torch.Size([68, 2, 4])
torch.Size([64, 2, 4])
torch.Size([66, 2, 4])
torch.Size([59, 2, 4])
torch.Size(

torch.Size([62, 2, 4])
torch.Size([72, 2, 4])
torch.Size([67, 2, 4])
torch.Size([60, 2, 4])
torch.Size([64, 2, 4])
torch.Size([67, 2, 4])
torch.Size([70, 2, 4])
torch.Size([67, 2, 4])
torch.Size([61, 2, 4])
torch.Size([62, 2, 4])
torch.Size([63, 2, 4])
torch.Size([61, 2, 4])
torch.Size([65, 2, 4])
torch.Size([65, 2, 4])
torch.Size([66, 2, 4])
torch.Size([60, 2, 4])
torch.Size([74, 2, 4])
torch.Size([66, 2, 4])
torch.Size([73, 2, 4])
torch.Size([63, 2, 4])
torch.Size([66, 2, 4])
torch.Size([73, 2, 4])
torch.Size([62, 2, 4])
torch.Size([73, 2, 4])
torch.Size([65, 2, 4])
torch.Size([68, 2, 4])
torch.Size([69, 2, 4])
torch.Size([65, 2, 4])
torch.Size([68, 2, 4])
torch.Size([67, 2, 4])
torch.Size([59, 2, 4])
torch.Size([64, 2, 4])
torch.Size([62, 2, 4])
torch.Size([63, 2, 4])
torch.Size([65, 2, 4])
torch.Size([61, 2, 4])
torch.Size([70, 2, 4])
torch.Size([60, 2, 4])
torch.Size([65, 2, 4])
torch.Size([66, 2, 4])
torch.Size([74, 2, 4])
torch.Size([71, 2, 4])
torch.Size([64, 2, 4])
torch.Size(

torch.Size([69, 2, 4])
torch.Size([65, 2, 4])
torch.Size([60, 2, 4])
torch.Size([71, 2, 4])
torch.Size([64, 2, 4])
torch.Size([71, 2, 4])
torch.Size([69, 2, 4])
torch.Size([69, 2, 4])
torch.Size([65, 2, 4])
torch.Size([65, 2, 4])
torch.Size([64, 2, 4])
torch.Size([65, 2, 4])
torch.Size([60, 2, 4])
torch.Size([70, 2, 4])
torch.Size([72, 2, 4])
torch.Size([62, 2, 4])
torch.Size([69, 2, 4])
torch.Size([63, 2, 4])
torch.Size([63, 2, 4])
torch.Size([73, 2, 4])
torch.Size([63, 2, 4])
torch.Size([65, 2, 4])
torch.Size([71, 2, 4])
torch.Size([65, 2, 4])
torch.Size([65, 2, 4])
torch.Size([65, 2, 4])
torch.Size([64, 2, 4])
torch.Size([67, 2, 4])
torch.Size([65, 2, 4])
torch.Size([61, 2, 4])
torch.Size([61, 2, 4])
torch.Size([67, 2, 4])
torch.Size([67, 2, 4])
torch.Size([65, 2, 4])
torch.Size([72, 2, 4])
torch.Size([69, 2, 4])
torch.Size([70, 2, 4])
torch.Size([63, 2, 4])
torch.Size([60, 2, 4])
torch.Size([70, 2, 4])
torch.Size([57, 2, 4])
torch.Size([70, 2, 4])
torch.Size([67, 2, 4])
torch.Size(

torch.Size([68, 2, 4])
torch.Size([60, 2, 4])
torch.Size([68, 2, 4])
torch.Size([69, 2, 4])
torch.Size([62, 2, 4])
torch.Size([60, 2, 4])
torch.Size([73, 2, 4])
torch.Size([63, 2, 4])
torch.Size([64, 2, 4])
torch.Size([59, 2, 4])
torch.Size([67, 2, 4])
torch.Size([69, 2, 4])
torch.Size([68, 2, 4])
torch.Size([64, 2, 4])
torch.Size([72, 2, 4])
torch.Size([62, 2, 4])
torch.Size([67, 2, 4])
torch.Size([65, 2, 4])
torch.Size([60, 2, 4])
torch.Size([66, 2, 4])
torch.Size([67, 2, 4])
torch.Size([66, 2, 4])
torch.Size([69, 2, 4])
torch.Size([66, 2, 4])
torch.Size([58, 2, 4])
torch.Size([68, 2, 4])
torch.Size([68, 2, 4])
torch.Size([63, 2, 4])
torch.Size([60, 2, 4])
torch.Size([61, 2, 4])
torch.Size([62, 2, 4])
torch.Size([66, 2, 4])
torch.Size([62, 2, 4])
torch.Size([67, 2, 4])
torch.Size([67, 2, 4])
torch.Size([68, 2, 4])
torch.Size([67, 2, 4])
torch.Size([60, 2, 4])
torch.Size([64, 2, 4])
torch.Size([55, 2, 4])
torch.Size([74, 2, 4])
torch.Size([63, 2, 4])
torch.Size([67, 2, 4])
torch.Size(

torch.Size([65, 2, 4])
torch.Size([62, 2, 4])
torch.Size([79, 2, 4])
torch.Size([63, 2, 4])
torch.Size([69, 2, 4])
torch.Size([71, 2, 4])
torch.Size([73, 2, 4])
torch.Size([69, 2, 4])
torch.Size([69, 2, 4])
torch.Size([70, 2, 4])
torch.Size([65, 2, 4])
torch.Size([67, 2, 4])
torch.Size([73, 2, 4])
torch.Size([67, 2, 4])
torch.Size([70, 2, 4])
torch.Size([62, 2, 4])
torch.Size([66, 2, 4])
torch.Size([67, 2, 4])
torch.Size([68, 2, 4])
torch.Size([65, 2, 4])
torch.Size([70, 2, 4])
torch.Size([69, 2, 4])
torch.Size([66, 2, 4])
torch.Size([70, 2, 4])
torch.Size([65, 2, 4])
torch.Size([63, 2, 4])
torch.Size([66, 2, 4])
torch.Size([62, 2, 4])
torch.Size([70, 2, 4])
torch.Size([59, 2, 4])
torch.Size([72, 2, 4])
torch.Size([64, 2, 4])
torch.Size([62, 2, 4])
torch.Size([65, 2, 4])
torch.Size([73, 2, 4])
torch.Size([81, 2, 4])
torch.Size([67, 2, 4])
torch.Size([68, 2, 4])
torch.Size([65, 2, 4])
torch.Size([66, 2, 4])
torch.Size([71, 2, 4])
torch.Size([55, 2, 4])
torch.Size([66, 2, 4])
torch.Size(

In [23]:
total_ats_bounding_boxes 

tensor(19.4353)