In [3]:
# predicted as a batch
from parameters import par
from model import DeepVO
import numpy as np
from PIL import Image
import os
import time
import torch

from data_helper import get_data_info, ImageSequenceDataset
from torch.utils.data import DataLoader
from helper import eulerAnglesToRotationMatrix
from sthree_wrapper import to_s3_npy, read_img, list_contents, test_read, load_npy

In [3]:
list_contents('{}{}/image_2/'.format(par.sequence_key, test_video))

[{'Key': 'sequences/00/image_2/000000.png',
  'LastModified': datetime.datetime(2022, 1, 4, 4, 47, 19, tzinfo=tzlocal()),
  'ETag': '"8c81916da1a3b960335e9215f8ef5f8b"',
  'Size': 851565,
  'StorageClass': 'STANDARD',
  'Owner': {'DisplayName': 'deepankerk',
   'ID': '1e4f2619b2912ddd926600d3246d586939a15102e0a529bc76af64e5adbf9af6'}},
 {'Key': 'sequences/00/image_2/000001.png',
  'LastModified': datetime.datetime(2022, 1, 4, 4, 47, 18, tzinfo=tzlocal()),
  'ETag': '"5c3ae10c4f97c37976ecd1ac52e4c2ba"',
  'Size': 838481,
  'StorageClass': 'STANDARD',
  'Owner': {'DisplayName': 'deepankerk',
   'ID': '1e4f2619b2912ddd926600d3246d586939a15102e0a529bc76af64e5adbf9af6'}},
 {'Key': 'sequences/00/image_2/000002.png',
  'LastModified': datetime.datetime(2022, 1, 4, 4, 47, 19, tzinfo=tzlocal()),
  'ETag': '"19a2c950fb8868baa57d71adeae5191c"',
  'Size': 840684,
  'StorageClass': 'STANDARD',
  'Owner': {'DisplayName': 'deepankerk',
   'ID': '1e4f2619b2912ddd926600d3246d586939a15102e0a529bc76af64e

In [6]:
videos_to_test = ['00']

# Path
load_model_path = "models/t000104_v04_im184x608_s5x10_b8_rnn1000_optAdagrad_lr0.0005.model.train"   #choose the model you want to load
save_dir = 'result/'  # directory to save prediction answer
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# Load model
M_deepvo = DeepVO(par.img_h, par.img_w, par.batch_norm)
use_cuda = torch.cuda.is_available()
if use_cuda:
    M_deepvo = M_deepvo.cuda()
    M_deepvo.load_state_dict(torch.load("models/model-1500000.meta")) # Replace with load_model_path
    print("loaded model")
    #M_deepvo.load_state_dict(torch.load(load_model_path))
else:
    M_deepvo.load_state_dict(torch.load(load_model_path, map_location={'cuda:0': 'cpu'}))
print('Load model from: ', load_model_path)

# Data
n_workers = 1
seq_len = int((par.seq_len[0]+par.seq_len[1])/2)
overlap = seq_len - 1
print('seq_len = {},  overlap = {}'.format(seq_len, overlap))
batch_size = par.batch_size

fd=open('test_dump.txt', 'w')
fd.write('\n'+'='*50 + '\n')

for test_video in videos_to_test:
    df = get_data_info(folder_list=[test_video], seq_len_range=[seq_len, seq_len], overlap=overlap, sample_times=1, shuffle=False, sort=False)
    df = df.loc[df.seq_len == seq_len]  # drop last
    dataset = ImageSequenceDataset(df, par.resize_mode, (par.img_w, par.img_h), par.img_means, par.img_stds, par.minus_point_5)
    df.to_csv('test_df.csv')
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=n_workers)
    
    #print('{}{}.npy'.format(par.pose_location, test_video))
    gt_pose = load_npy('{}{}.npy'.format(par.pose_location, test_video))  # (n_images, 6)
    #print(gt_pose)
    
    # Predict
    M_deepvo.eval()
    has_predict = False
    answer = [[0.0]*6, ]
    st_t = time.time()
    n_batch = len(dataloader)

    for i, batch in enumerate(dataloader):
        print('{} / {}'.format(i, n_batch), end='\r', flush=True)
        _, x, y = batch
        if use_cuda:
            x = x.cuda()
            y = y.cuda()
        batch_predict_pose = M_deepvo.forward(x)

        # Record answer
        fd.write('Batch: {}\n'.format(i))
        for seq, predict_pose_seq in enumerate(batch_predict_pose):
            for pose_idx, pose in enumerate(predict_pose_seq):
                fd.write(' {} {} {}\n'.format(seq, pose_idx, pose))
        
        batch_predict_pose = batch_predict_pose.data.cpu().numpy()
        if i == 0:
            for pose in batch_predict_pose[0]:
                # use all predicted pose in the first prediction
                for i in range(len(pose)):
                    # Convert predicted relative pose to absolute pose by adding last pose
                    pose[i] += answer[-1][i]
                answer.append(pose.tolist())
            batch_predict_pose = batch_predict_pose[1:]

        # transform from relative to absolute 
        for predict_pose_seq in batch_predict_pose:
            # predict_pose_seq[1:] = predict_pose_seq[1:] + predict_pose_seq[0:-1]
            ang = eulerAnglesToRotationMatrix([0, answer[-1][0], 0]) #eulerAnglesToRotationMatrix([answer[-1][1], answer[-1][0], answer[-1][2]])
            location = ang.dot(predict_pose_seq[-1][3:])
            predict_pose_seq[-1][3:] = location[:]

        # use only last predicted pose in the following prediction
            last_pose = predict_pose_seq[-1]
            for i in range(len(last_pose)):
                last_pose[i] += answer[-1][i]
            # normalize angle to -Pi...Pi over y axis
            last_pose[0] = (last_pose[0] + np.pi) % (2 * np.pi) - np.pi
            answer.append(last_pose.tolist())

    print('len(answer): ', len(answer))
    print('expect len: ', len(list_contents('{}{}/image_2/'.format(par.sequence_key, test_video))))
    print('Predict use {} sec'.format(time.time() - st_t))
    
    # Save answer
    with open('{}/out_{}.txt'.format(save_dir, test_video), 'w') as f:
        for pose in answer:
            if type(pose) == list:
                f.write(', '.join([str(p) for p in pose]))
            else:
                f.write(str(pose))
            f.write('\n')
    
    # Calculate loss
    gt_pose = load_npy('{}{}.npy'.format(par.pose_location, test_video))  # (n_images, 6)
    loss = 0
    for t in range(len(answer)):
        angle_loss = np.sum((answer[t][:3] - gt_pose[t,:3]) ** 2)
        translation_loss = np.sum((answer[t][3:] - gt_pose[t,3:6]) ** 2)
        loss = (100 * angle_loss + translation_loss)
    loss /= len(gt_pose)
    print('Loss = ', loss)
    print('='*50)
print("DONE")

UnpicklingError: invalid load key, '\x0a'.