In [1]:
import os
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
import math
import time

from BaselineDataset import BaselineDataset
from Model import EnDeWithPooling, EnDeConvLSTM_ws, SkipLSTMEnDe
from torchvision import transforms
from PIL import Image

In [2]:
def saveTransformedImages(imageTensor):
    to_pil = torchvision.transforms.ToPILImage()
    im = to_pil(imageTensor)
    mn, mx = np.min(im), np.max(im)
    im = (im - mn) / (mx - mn)
    print(im)
    plt.imshow(im, cmap='gray')
    plt.show()

In [3]:
def plotTrajectory(xValsGT, yValsGT, xValsPred, yValsPred, xValsPredMulti, yValsPredMulti, seqLen, im_path, numFrames=None):
    fig = plt.figure(figsize=(8, 8))
    plt.plot(yValsGT, xValsGT, c='r', label='Ground Truth')
    plt.plot(yValsPred, xValsPred, c='g', label='Prediction')
    plt.plot(yValsPredMulti, xValsPredMulti, c='b', label='Multimodal Prediction', alpha=0.8)
    axes = plt.gca()
    axes.set_xlim([1, 512])
    axes.set_ylim([1, 512])
    plt.xlabel('X-Axis')
    plt.ylabel('Y-Axis')
    plt.legend(loc='upper right')
    if numFrames == None:
        plt.title('Trajectory')
    else:
        plot_title = 'Trajectory (' + str(numFrames // 10 - 2) + "s)"
        plt.title(plot_title)
    plt.savefig(im_path)
    plt.close()

In [4]:
def heatmapAccuracy(outputMap, labelMap, thr=1.5):
    pred = np.unravel_index(outputMap.argmax(), outputMap.shape)
    gt = np.unravel_index(labelMap.argmax(), labelMap.shape)

    dist = math.sqrt((pred[0] - gt[0]) ** 2 + (pred[1] - gt[1]) ** 2)
    if dist <= thr:
        return 1, dist, (pred[0], pred[1]), (gt[0], gt[1])
    return 0, dist, (pred[0], pred[1]), (gt[0], gt[1])

In [5]:
def largest_indices(ary, n):
    """Returns the n largest indices from a numpy array."""
    flat = ary.flatten()
    indices = np.argpartition(flat, -n)[-n:]
    indices = indices[np.argsort(-flat[indices])]
    return np.unravel_index(indices, ary.shape)

In [6]:
def multiAccuracy(outputMap, labelMap, topK=5):
    pred = largest_indices(outputMap, topK)
    gt = np.unravel_index(labelMap.argmax(), labelMap.shape)
    dist_arr = []
    for i in range(len(pred[0])):
        dist = math.sqrt((pred[0][i] - gt[0]) ** 2 + (pred[1][i] - gt[1]) ** 2)
        dist_arr.append(dist)
    
    min_val = np.min(dist_arr)
    min_idx = np.argmin(dist_arr)
    within_radius = 0
    if min_val <= 4:
        within_radius = 1
    return 0, min_val, (pred[0][min_idx], pred[1][min_idx]), (gt[0], gt[1]), within_radius

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [8]:
torch.set_default_tensor_type(torch.cuda.FloatTensor)

### Cityscapes Future Prediction

In [13]:
checkpoint_path = os.path.join("/home/fbd/rrc/backup/test-baseline-transfer-0", "checkpoint_future.tar")

In [14]:
checkpoint = torch.load(checkpoint_path)
model = SkipLSTMEnDe(activation="relu", initType="default", numChannels=4, imageHeight=256, imageWidth=256, batchnorm=False, softmax=False)
model.load_state_dict(checkpoint["model_state_dict"])
model = model.cuda()
model.convlstm = model.convlstm.cuda()

In [21]:
data_dir = "/home/fbd/rrc/submission/INFER-datasets/cityscapes"
val_dir = os.path.join(data_dir, "test.csv")
val_dataset = BaselineDataset(data_dir, height=256, width=256, train=False, infoPath=val_dir, augmentation=False, groundTruth=True)

In [22]:
upsample_512 = torch.nn.Upsample(scale_factor=2, mode='bilinear')
labelTransform = transforms.Compose([
    transforms.ToTensor()
])
targetGTDir = os.path.join(data_dir, 'targetGT')
valLoss1, valLoss2, valLoss3, valLoss4, valLoss = [], [], [], [], []
futureFrames = 14
topK = 5

In [23]:
debug, prevOut, state = True, None, None
xValsGT, yValsGT, xValsPred, yValsPred = [], [], [], []
seqLoss, seqVals = [], []
seqNum, seqLen = 0, 0

start_time = time.time()
model.eval()

for i in range(len(val_dataset)):
    grid, kittiSeqNum, vehicleId, frame1, frame2, endOfSequence, offset, numFrames, augmentation = val_dataset[i]
    
    if endOfSequence is False:
        if int(offset) % 2 == 0:
            continue

    # The Last Channel is the target frame and first n - 1 are source frames
    inp = grid[:-1, :].unsqueeze(0).to(device)
    currLabel = grid[-1:, :].unsqueeze(0).to(device)
    
    if offset >= futureFrames:
        new_inp = inp.clone().squeeze(0)
        mn, mx = torch.min(prevOut), torch.max(prevOut)
        prevOut = (prevOut - mn) / (mx - mn)
        new_inp[0] = prevOut
        inp = new_inp.unsqueeze(0).cuda()

    # Forward the input and obtain the result
    out = model.forward(inp, state)
    state = (model.h, model.c, model.h1, model.c1, model.h2, model.c2)
    currOutputMap = out.clone()
    newOutputMap = upsample_512(currOutputMap)
    nextTargetImg = Image.open(os.path.join(targetGTDir, str(kittiSeqNum).zfill(4), 
                                            str(frame2).zfill(6), str(vehicleId).zfill(6) + '.png'))
    
    nextTargetTensor = labelTransform(nextTargetImg).unsqueeze(0)
    
    prevOut = currOutputMap.detach().cpu().squeeze(0).squeeze(0)
    currOutputMap = currOutputMap.detach().cpu().numpy().squeeze(0).squeeze(0)
    currLabel = currLabel.detach().cpu().numpy().squeeze(0).squeeze(0)
    _, dist, predCoordinates, gtCoordinates = heatmapAccuracy(currOutputMap, currLabel)
    
    # Upsampled outputs and inputs
    currOutputMap1 = newOutputMap.detach().cpu().numpy().squeeze(0).squeeze(0)
    currLabel1 = nextTargetTensor.detach().cpu().numpy().squeeze(0).squeeze(0)
    
    _, dist1, predCoordinates1, gtCoordinates1 = heatmapAccuracy(currOutputMap1, currLabel1)
    _, dist2, predCoordinates2, gtCoordinates2, within_radius = multiAccuracy(currOutputMap1, currLabel1, topK=topK)    
    
    if offset >= futureFrames:
        seqLoss.append(dist2)

    seqLen += 1
    xValsGT.append(gtCoordinates1[0])
    yValsGT.append(gtCoordinates1[1])
    xValsPred.append(predCoordinates1[0])
    yValsPred.append(predCoordinates1[1])
    
    if endOfSequence:
        seqVals.append(seqLen)
        xValsGT, yValsGT, xValsPred, yValsPred = [], [], [], []
        seqNum += 1
        state = None
        valLoss.append(np.mean(seqLoss))
        print("SeqNum: {}, KittiSeqNum: {}, VehicleNum: {}, numFrames: {}, loss: {}, len(seqLoss): {}".format(seqNum, kittiSeqNum, vehicleId, numFrames, np.mean(seqLoss), len(seqLoss)))
        seqLoss = []

end_time = time.time()

  "See the documentation of nn.Upsample for details.".format(mode))


SeqNum: 1, KittiSeqNum: 0, VehicleNum: 3, numFrames: 30, loss: 4.437170643907619, len(seqLoss): 8
SeqNum: 2, KittiSeqNum: 0, VehicleNum: 4, numFrames: 30, loss: 3.4176136863300393, len(seqLoss): 8
SeqNum: 3, KittiSeqNum: 18, VehicleNum: 6, numFrames: 30, loss: 9.473552973931827, len(seqLoss): 8
SeqNum: 4, KittiSeqNum: 18, VehicleNum: 7, numFrames: 30, loss: 4.715468606885709, len(seqLoss): 8
SeqNum: 5, KittiSeqNum: 26, VehicleNum: 21, numFrames: 30, loss: 0.8017766952966369, len(seqLoss): 8
SeqNum: 6, KittiSeqNum: 32, VehicleNum: 14, numFrames: 30, loss: 2.423145600891813, len(seqLoss): 8
SeqNum: 7, KittiSeqNum: 42, VehicleNum: 15, numFrames: 30, loss: 6.089248944342616, len(seqLoss): 8
SeqNum: 8, KittiSeqNum: 50, VehicleNum: 2, numFrames: 30, loss: 3.4582886740603396, len(seqLoss): 8
SeqNum: 9, KittiSeqNum: 1, VehicleNum: 25, numFrames: 30, loss: 7.6653954699247935, len(seqLoss): 8
SeqNum: 10, KittiSeqNum: 10, VehicleNum: 20, numFrames: 30, loss: 2.2519174652330793, len(seqLoss): 8
Se

In [24]:
print("Avg Loss: {}".format(np.mean(valLoss)))
print("Avg Loss in m: {}".format(np.mean(valLoss) * 0.25))
print("Num Seq: {}".format(len(valLoss)))

Avg Loss: 4.0851795278580845
Avg Loss in m: 1.0212948819645211
Num Seq: 26
