From d10415c5ccb2013de02151a53675071fb2e8943e Mon Sep 17 00:00:00 2001 From: mahdinobar Date: Sun, 5 Apr 2020 00:00:42 +0200 Subject: [PATCH] iPhone rectified centertest routines added. [#1] --- src/data/dataset.py | 14 +- src/data/importers.py | 382 +++++++++++++++++++++++++++++++++- src/main_iPhone_com_refine.py | 313 ++++++++++++++++++++++++++++ src/util/handdetector.py | 5 +- 4 files changed, 704 insertions(+), 10 deletions(-) create mode 100644 src/main_iPhone_com_refine.py diff --git a/src/data/dataset.py b/src/data/dataset.py index 4d6bca1..0458a7d 100644 --- a/src/data/dataset.py +++ b/src/data/dataset.py @@ -23,7 +23,7 @@ """ import numpy -from data.importers import NYUImporter, ICVLImporter, MSRA15Importer +from data.importers import NYUImporter, ICVLImporter, MSRA15Importer, iPhoneImporter __author__ = "Paul Wohlhart , Markus Oberweger " @@ -123,6 +123,18 @@ def __init__(self, imgSeqs=None, basepath=None, localCache=True): self.lmi = ICVLImporter(basepath) +class iPhoneDataset(Dataset): + def __init__(self, imgSeqs=None, basepath=None, localCache=True): + """ + constructor + """ + super(iPhoneDataset, self).__init__(imgSeqs, localCache) + if basepath is None: + basepath = '../../data/iPhone/' + + self.lmi = iPhoneImporter(basepath) + + class MSRA15Dataset(Dataset): def __init__(self, imgSeqs=None, basepath=None, localCache=True): """ diff --git a/src/data/importers.py b/src/data/importers.py index b4988ec..f97e161 100644 --- a/src/data/importers.py +++ b/src/data/importers.py @@ -32,14 +32,16 @@ from util.handdetector import HandDetector from data.transformations import transformPoints2D import cPickle +import open3d as o3d +from PIL import Image -__author__ = "Paul Wohlhart , Markus Oberweger " -__copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" -__credits__ = ["Paul Wohlhart", "Markus Oberweger"] -__license__ = "GPL" +__author__ = "Mahdi Nobar " +__copyright__ = "Copyright 2020, EPFL" +__credits__ = ["Mahdi Nobar"] +__license__ = "confidential" __version__ = "1.0" __maintainer__ = "Markus Oberweger" -__email__ = "oberweger@icg.tugraz.at" +__email__ = "mahdi.nobar@epfl.ch" __status__ = "Development" @@ -525,6 +527,372 @@ def format_coord(x, y): plt.show() +class iPhoneImporter(DepthImporter): + """ + provide functionality to load data from iPhone Truedepth + + """ + + def __init__(self, basepath, useCache=True, cacheDir='./cache/', refineNet=None, detectorNet=None, derotNet=None, hand=None): + """ + Constructor + :param basepath: base path of the iPhone Truedepth + :return: + """ + # iPhone calibration + _h = 240 + _w = 320 + iw = 3088.0 + ih = 2316.0 + xscale = _h / ih + yscale = _w / iw + _fx = 2880.0796 * xscale + _fy = 2880.0796 * yscale + # _cx = 1546.5824 * xscale + # _cy = 1153.2035 * yscale + _cx = 1153.2035 * xscale + _cy = 1546.5824 * yscale + super(iPhoneImporter, self).__init__(_fx, _fy, _cy, _cx, hand) # see Sun et.al. + + self.depth_map_size = (320, 240) + self.basepath = basepath + self.useCache = useCache + self.cacheDir = cacheDir + self.refineNet = refineNet + self.derotNet = derotNet + self.detectorNet = detectorNet + self.numJoints = 21 + self.crop_joint_idx = 5 + self.default_cubes = {'P0': (200, 200, 200)} + self.sides = {'P0': 'right'} + + def loadDepthMap(self, filename): + """ + Read a depth-map in bin format of msra15 + :param filename: file name to load + :return: image data of depth image in mm for msra15 and image shape(240by320) + """ +# temporary: must be changed ########################################################################################### + color_raw = o3d.io.read_image('/home/mahdi/HVR/hvr/hand_pcl_iPhone/Tom_set_2/iPhone/hand30wall50_color.png') + depth_raw = o3d.io.read_image(filename) + color_raw = o3d.geometry.Image(np.asarray(color_raw)) + rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( + color_raw, depth_raw, depth_scale=0.529, depth_trunc=30.0, convert_rgb_to_intensity=False) + # iPhone calibration + h = np.asarray(color_raw).shape[0] # 480 + w = np.asarray(color_raw).shape[1] # 640 + iw = 3088.0 + ih = 2316.0 + xscale = h / ih + yscale = w / iw + _fx = 2880.0796 * xscale + _fy = 2880.0796 * yscale + # _cx = 1546.5824 * xscale + # _cy = 1153.2035 * yscale + _cx = 1153.2035 * xscale + _cy = 1546.5824 * yscale + setIntrinsic = o3d.camera.PinholeCameraIntrinsic() + setIntrinsic.set_intrinsics(width=w, height=h, fx=_fx, fy=_fy, cx=_cx, cy=_cy) + pcd = o3d.geometry.PointCloud.create_from_rgbd_image( + rgbd_image, + setIntrinsic) + # Flip it, otherwise the pointcloud will be upside down + pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) + z_values = (-np.asarray(pcd.points)[:, 2] * 1000) # in mm + depth_map = np.reshape(z_values, (480, 640)) + imgdata = np.asarray(Image.fromarray(depth_map).resize((320, 240))) +# temporary: must be changed ########################################################################################### + return np.copy(imgdata) + + def getDepthMapNV(self): + """ + Get the value of invalid depth values in the depth map + :return: value + """ + return 32001 + + def loadSequence(self, seqName, subSeq=None, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None): + """ + Load an image sequence from the dataset + :param seqName: sequence name, e.g. subject1 + :param Nmax: maximum number of samples to load + :return: returns named image sequence + """ + + if (subSeq is not None) and (not isinstance(subSeq, list)): + raise TypeError("subSeq must be None or list") + + if cube is None: + config = {'cube': self.default_cubes[seqName]} + else: + assert isinstance(cube, tuple) + assert len(cube) == 3 + config = {'cube': cube} + + if subSeq is None: + pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, self.hand, + HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) + else: + pickleCache = '{}/{}_{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, self.hand, + ''.join(subSeq), HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) + if self.useCache & os.path.isfile(pickleCache): + print("Loading cache data from {}".format(pickleCache)) + f = open(pickleCache, 'rb') + (seqName, data, config) = cPickle.load(f) + f.close() + # shuffle data + if shuffle and rng is not None: + print("Shuffling") + rng.shuffle(data) + if not(np.isinf(Nmax)): + return NamedImgSequence(seqName, data[0:Nmax], config) + else: + return NamedImgSequence(seqName, data, config) + + self.loadRefineNetLazy(self.refineNet) + + # Load the dataset + objdir = '{}/{}/'.format(self.basepath, seqName) + subdirs = sorted([name for name in os.listdir(objdir) if os.path.isdir(os.path.join(objdir, name))]) + + txt = 'Loading {}'.format(seqName) + nImgs = sum([len(files) for r, d, files in os.walk(objdir)]) // 2 + pbar = pb.ProgressBar(maxval=nImgs, widgets=[txt, pb.Percentage(), pb.Bar()]) + pbar.start() + + data = [] + pi = 0 + for subdir in subdirs: + # check for subsequences and skip them if necessary + subSeqName = '' + if subSeq is not None: + if subdir not in subSeq: + continue + + subSeqName = subdir + + # iterate all subdirectories + trainlabels = '{}/{}/joint.txt'.format(objdir, subdir) + + inputfile = open(trainlabels) + # read number of samples + nImgs = int(inputfile.readline()) + + for i in range(nImgs): + # early stop + if len(data) >= Nmax: + break + + line = inputfile.readline() + part = line.split(' ') + + # dptFileName = '{}/{}/{}_depth.bin'.format(objdir, subdir, str(i).zfill(6)) + dptFileName = '{}/{}/{}_depth.png'.format(objdir, subdir, 'hand30wall50') + + if not os.path.isfile(dptFileName): + print("File {} does not exist!".format(dptFileName)) + continue + dpt = self.loadDepthMap(dptFileName) + + # joints in image coordinates + gt3Dorig = np.zeros((self.numJoints, 3), np.float32) + for joint in range(gt3Dorig.shape[0]): + for xyz in range(0, 3): + gt3Dorig[joint, xyz] = part[joint*3+xyz] + + # invert axis + # gt3Dorig[:, 0] *= (-1.) + # gt3Dorig[:, 1] *= (-1.) + gt3Dorig[:, 2] *= (-1.) + + # normalized joints in 3D coordinates + gtorig = self.joints3DToImg(gt3Dorig) + + if self.hand is not None: + if self.hand != self.sides[seqName]: + gtorig[:, 0] -= dpt.shape[1] / 2. + gtorig[:, 0] *= (-1) + gtorig[:, 0] += dpt.shape[1] / 2. + gt3Dorig = self.jointsImgTo3D(gtorig) + dpt = dpt[:, ::-1] + + # print gt3D + # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','')) + # Detect hand + hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self) + if not hd.checkImage(1.): + print("Skipping image {}, no content".format(dptFileName)) + continue + + try: #here we initialize the com with ground truth mcp middle finger of msra15 dataset [z in mm, (x,y) in pxls] + dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom) + except UserWarning: + print("Skipping image {}, no hand detected".format(dptFileName)) + continue + + com3D = self.jointImgTo3D(com) + gt3Dcrop = gt3Dorig - com3D # normalize to com + + gtcrop = transformPoints2D(gtorig, M) + + # print("{}".format(gt3Dorig)) + # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','',{})) + + data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, + dptFileName, subSeqName, self.sides[seqName], {})) + pbar.update(pi) + # pi += 1 + + inputfile.close() + + pbar.finish() + print("Loaded {} samples.".format(len(data))) + + if self.useCache: + print("Save cache data to {}".format(pickleCache)) + f = open(pickleCache, 'wb') + cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) + f.close() + + # shuffle data + if shuffle and rng is not None: + print("Shuffling") + rng.shuffle(data) + return NamedImgSequence(seqName, data, config) + + def jointsImgTo3D(self, sample): + """ + Normalize sample to metric 3D + :param sample: joints in (x,y,z) with x,y in image coordinates and z in mm + :return: normalized joints in mm + """ + ret = np.zeros((sample.shape[0], 3), np.float32) + for i in xrange(sample.shape[0]): + ret[i] = self.jointImgTo3D(sample[i]) + return ret + + def jointImgTo3D(self, sample): + """ + Normalize sample to metric 3D + :param sample: joints in (x,y,z) with x,y in image coordinates and z in mm + :return: normalized joints in mm + """ + ret = np.zeros((3,), np.float32) + ret[0] = (sample[0] - self.ux) * sample[2] / self.fx + ret[1] = (self.uy - sample[1]) * sample[2] / self.fy + ret[2] = sample[2] + return ret + + def joints3DToImg(self, sample): + """ + Denormalize sample from metric 3D to image coordinates + :param sample: joints in (x,y,z) with x,y and z in mm + :return: joints in (x,y,z) with x,y in image coordinates and z in mm + """ + ret = np.zeros((sample.shape[0], 3), np.float32) + for i in xrange(sample.shape[0]): + ret[i] = self.joint3DToImg(sample[i]) + return ret + + def joint3DToImg(self, sample): + """ + Denormalize sample from metric 3D to image coordinates + :param sample: joints in (x,y,z) with x,y and z in mm + :return: joints in (x,y,z) with x,y in image coordinates and z in mm + """ + ret = np.zeros((3, ), np.float32) + if sample[2] == 0.: + ret[0] = self.ux + ret[1] = self.uy + return ret + ret[0] = sample[0]/sample[2]*self.fx+self.ux + ret[1] = self.uy-sample[1]/sample[2]*self.fy + ret[2] = sample[2] + return ret + + def getCameraIntrinsics(self): + """ + Get intrinsic camera matrix + :return: 3x3 intrinsic camera matrix + """ + ret = np.zeros((3, 3), np.float32) + ret[0, 0] = self.fx + ret[1, 1] = -self.fy + ret[2, 2] = 1 + ret[0, 2] = self.ux + ret[1, 2] = self.uy + return ret + + def getCameraProjection(self): + """ + Get homogenous camera projection matrix + :return: 4x4 camera projection matrix + """ + ret = np.zeros((4, 4), np.float32) + ret[0, 0] = self.fx + ret[1, 1] = -self.fy + ret[2, 2] = 1. + ret[0, 2] = self.ux + ret[1, 2] = self.uy + ret[3, 2] = 1. + return ret + + def showAnnotatedDepth(self, frame): + """ + Show the depth image + :param frame: image to show + :return: + """ + import matplotlib + import matplotlib.pyplot as plt + + print("img min {}, max {}".format(frame.dpt.min(),frame.dpt.max())) + fig = plt.figure() + ax = fig.add_subplot(111) + ax.imshow(frame.dpt, cmap=matplotlib.cm.jet, interpolation='nearest') + ax.scatter(frame.gtcrop[:, 0], frame.gtcrop[:, 1]) + + ax.plot(frame.gtcrop[0:5, 0], frame.gtcrop[0:5, 1], c='r') + ax.plot(np.hstack((frame.gtcrop[0, 0], frame.gtcrop[5:9, 0])), np.hstack((frame.gtcrop[0, 1], frame.gtcrop[5:9, 1])), c='r') + ax.plot(np.hstack((frame.gtcrop[0, 0], frame.gtcrop[9:13, 0])), np.hstack((frame.gtcrop[0, 1], frame.gtcrop[9:13, 1])), c='r') + ax.plot(np.hstack((frame.gtcrop[0, 0], frame.gtcrop[13:17, 0])), np.hstack((frame.gtcrop[0, 1], frame.gtcrop[13:17, 1])), c='r') + ax.plot(np.hstack((frame.gtcrop[0, 0], frame.gtcrop[17:21, 0])), np.hstack((frame.gtcrop[0, 1], frame.gtcrop[17:21, 1])), c='r') + + def format_coord(x, y): + numrows, numcols = frame.dpt.shape + col = int(x+0.5) + row = int(y+0.5) + if 0 <= col < numcols and 0 <= row < numrows: + z = frame.dpt[row, col] + return 'x=%1.4f, y=%1.4f, z=%1.4f'%(x, y, z) + else: + return 'x=%1.4f, y=%1.4f'%(x, y) + ax.format_coord = format_coord + + for i in range(frame.gtcrop.shape[0]): + ax.annotate(str(i), (int(frame.gtcrop[i, 0]), int(frame.gtcrop[i, 1]))) + + plt.show() + + @staticmethod + def depthToPCL(dpt, T, background_val=0.): + + # get valid points and transform + pts = np.asarray(np.where(~np.isclose(dpt, background_val))).transpose() + pts = np.concatenate([pts[:, [1, 0]] + 0.5, np.ones((pts.shape[0], 1), dtype='float32')], axis=1) + pts = np.dot(np.linalg.inv(np.asarray(T)), pts.T).T + pts = (pts[:, 0:2] / pts[:, 2][:, None]).reshape((pts.shape[0], 2)) + + # replace the invalid data + depth = dpt[(~np.isclose(dpt, background_val))] + + # get x and y data in a vectorized way + row = (pts[:, 0] - 160.) / 241.42 * depth + col = (120. - pts[:, 1]) / 241.42 * depth + + # combine x,y,depth + return np.column_stack((row, col, depth)) + class MSRA15Importer(DepthImporter): """ @@ -560,9 +928,9 @@ def __init__(self, basepath, useCache=True, cacheDir='./cache/', refineNet=None, def loadDepthMap(self, filename): """ - Read a depth-map + Read a depth-map in bin format of msra15 :param filename: file name to load - :return: image data of depth image + :return: image data of depth image in mm for msra15 and image shape(240by320) """ with open(filename, 'rb') as f: # first 6 uint define the full image diff --git a/src/main_iPhone_com_refine.py b/src/main_iPhone_com_refine.py new file mode 100644 index 0000000..9abbda7 --- /dev/null +++ b/src/main_iPhone_com_refine.py @@ -0,0 +1,313 @@ +"""This is the main file for training hand joint classifier on MSRA dataset + +Copyright 2015 Markus Oberweger, ICG, +Graz University of Technology + +This file is part of DeepPrior. + +DeepPrior is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +DeepPrior is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with DeepPrior. If not, see . +""" + +import numpy +import gc +import matplotlib + +matplotlib.use('Agg') # plot to file +import matplotlib.pyplot as plt +from net.scalenet import ScaleNetParams, ScaleNet +from trainer.scalenettrainer import ScaleNetTrainerParams, ScaleNetTrainer +from util.handdetector import HandDetector +import os +import cPickle +from data.importers import iPhoneImporter +from data.dataset import iPhoneDataset +from util.handpose_evaluation import MSRAHandposeEvaluation +from util.helpers import shuffle_many_inplace + +if __name__ == '__main__': + + eval_prefix = 'iPhone_COM_AUGMENT' + if not os.path.exists('./eval/'+eval_prefix+'/'): + os.makedirs('./eval/'+eval_prefix+'/') + + rng = numpy.random.RandomState(23455) + + print("create data") + aug_modes = ['com', 'rot', 'none'] # 'sc', + + di = iPhoneImporter('../data/iPhone/') + # Seq0_1 = di.loadSequence('P0', shuffle=True, rng=rng, docom=False) + # Seq0_1 = Seq0_1._replace(name='P0_gt') + # Seq0_2 = di.loadSequence('P0', shuffle=True, rng=rng, docom=True) + # Seq0_2 = Seq0_2._replace(name='P0_com') + # Seq1_1 = di.loadSequence('P1', shuffle=True, rng=rng, docom=False) + # Seq1_1 = Seq1_1._replace(name='P1_gt') + # Seq1_2 = di.loadSequence('P1', shuffle=True, rng=rng, docom=True) + # Seq1_2 = Seq1_2._replace(name='P1_com') + # Seq2_1 = di.loadSequence('P2', shuffle=True, rng=rng, docom=False) + # Seq2_1 = Seq2_1._replace(name='P2_gt') + # Seq2_2 = di.loadSequence('P2', shuffle=True, rng=rng, docom=True) + # Seq2_2 = Seq2_2._replace(name='P2_com') + # Seq3_1 = di.loadSequence('P3', shuffle=True, rng=rng, docom=False) + # Seq3_1 = Seq3_1._replace(name='P3_gt') + # Seq3_2 = di.loadSequence('P3', shuffle=True, rng=rng, docom=True) + # Seq3_2 = Seq3_2._replace(name='P3_com') + # Seq4_1 = di.loadSequence('P4', shuffle=True, rng=rng, docom=False) + # Seq4_1 = Seq4_1._replace(name='P4_gt') + # Seq4_2 = di.loadSequence('P4', shuffle=True, rng=rng, docom=True) + # Seq4_2 = Seq4_2._replace(name='P4_com') + # Seq5_1 = di.loadSequence('P5', shuffle=True, rng=rng, docom=False) + # Seq5_1 = Seq5_1._replace(name='P5_gt') + # Seq5_2 = di.loadSequence('P5', shuffle=True, rng=rng, docom=True) + # Seq5_2 = Seq5_2._replace(name='P5_com') + # Seq6_1 = di.loadSequence('P6', shuffle=True, rng=rng, docom=False) + # Seq6_1 = Seq6_1._replace(name='P6_gt') + # Seq6_2 = di.loadSequence('P6', shuffle=True, rng=rng, docom=True) + # Seq6_2 = Seq6_2._replace(name='P6_com') + # Seq7_1 = di.loadSequence('P7', shuffle=True, rng=rng, docom=False) + # Seq7_1 = Seq7_1._replace(name='P7_gt') + # Seq7_2 = di.loadSequence('P7', shuffle=True, rng=rng, docom=True) + # Seq7_2 = Seq7_2._replace(name='P7_com') + # Seq8_1 = di.loadSequence('P8', shuffle=True, rng=rng, docom=False) + # Seq8_1 = Seq8_1._replace(name='P8_gt') + # Seq8_2 = di.loadSequence('P8', shuffle=True, rng=rng, docom=True) + # Seq8_2 = Seq8_2._replace(name='P8_com') + # trainSeqs = [Seq0_1, Seq0_2, Seq1_1, Seq1_2, Seq2_1, Seq2_2, Seq3_1, Seq3_2, + # Seq4_1, Seq4_2, Seq5_1, Seq5_2, Seq6_1, Seq6_2, Seq7_1, Seq7_2, + # Seq8_1, Seq8_2] + + # trainSeqs = [Seq0_1] + Seq_0 = di.loadSequence('P0', docom=True) + testSeqs = [Seq_0] + + # # create training data + # trainDataSet = MSRA15Dataset(trainSeqs, localCache=False) + # nSamp = numpy.sum([len(s.data) for s in trainSeqs]) + # d1, g1 = trainDataSet.imgStackDepthOnly(trainSeqs[0].name) + # train_data = numpy.ones((nSamp, d1.shape[1], d1.shape[2], d1.shape[3]), dtype='float32') + # train_gt3D = numpy.ones((nSamp, g1.shape[1], g1.shape[2]), dtype='float32') + # train_data_com = numpy.ones((nSamp, 3), dtype='float32') + # train_data_M = numpy.ones((nSamp, 3, 3), dtype='float32') + # train_data_cube = numpy.ones((nSamp, 3), dtype='float32') + # del d1, g1 + # gc.collect() + # gc.collect() + # gc.collect() + # oldIdx = 0 + # for seq in trainSeqs: + # d, g = trainDataSet.imgStackDepthOnly(seq.name) + # train_data[oldIdx:oldIdx+d.shape[0]] = d + # train_gt3D[oldIdx:oldIdx+d.shape[0]] = g + # train_data_com[oldIdx:oldIdx+d.shape[0]] = numpy.asarray([da.com for da in seq.data]) + # train_data_M[oldIdx:oldIdx+d.shape[0]] = numpy.asarray([da.T for da in seq.data]) + # train_data_cube[oldIdx:oldIdx+d.shape[0]] = numpy.asarray([seq.config['cube']]*d.shape[0]) + # oldIdx += d.shape[0] + # del d, g + # gc.collect() + # gc.collect() + # gc.collect() + # shuffle_many_inplace([train_data, train_gt3D, train_data_com, train_data_cube, train_data_M], random_state=rng) + # + # mb = (train_data.nbytes) / (1024 * 1024) + # print("data size: {}Mb".format(mb)) + + testDataSet = iPhoneDataset(testSeqs) + test_data, test_gt3D = testDataSet.imgStackDepthOnly(testSeqs[0].name) + + val_data = test_data + val_gt3D = test_gt3D + + #################################### + # # resize data + # dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) + # xstart = int(train_data.shape[2]/2-dsize[0]/2) + # xend = xstart + dsize[0] + # ystart = int(train_data.shape[3]/2-dsize[1]/2) + # yend = ystart + dsize[1] + # train_data2 = train_data[:, :, ystart:yend, xstart:xend] + # + # dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) + # xstart = int(train_data.shape[2]/2-dsize[0]/2) + # xend = xstart + dsize[0] + # ystart = int(train_data.shape[3]/2-dsize[1]/2) + # yend = ystart + dsize[1] + # train_data4 = train_data[:, :, ystart:yend, xstart:xend] + # + # dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) + # xstart = int(train_data.shape[2]/2-dsize[0]/2) + # xend = xstart + dsize[0] + # ystart = int(train_data.shape[3]/2-dsize[1]/2) + # yend = ystart + dsize[1] + # val_data2 = val_data[:, :, ystart:yend, xstart:xend] + # + # dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) + # xstart = int(train_data.shape[2]/2-dsize[0]/2) + # xend = xstart + dsize[0] + # ystart = int(train_data.shape[3]/2-dsize[1]/2) + # yend = ystart + dsize[1] + # val_data4 = val_data[:, :, ystart:yend, xstart:xend] + # + # dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2)) + # xstart = int(train_data.shape[2]/2-dsize[0]/2) + # xend = xstart + dsize[0] + # ystart = int(train_data.shape[3]/2-dsize[1]/2) + # yend = ystart + dsize[1] + # test_data2 = test_data[:, :, ystart:yend, xstart:xend] + # + # dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4)) + # xstart = int(train_data.shape[2]/2-dsize[0]/2) + # xend = xstart + dsize[0] + # ystart = int(train_data.shape[3]/2-dsize[1]/2) + # yend = ystart + dsize[1] + # test_data4 = test_data[:, :, ystart:yend, xstart:xend] + # + # print train_gt3D.max(), test_gt3D.max(), train_gt3D.min(), test_gt3D.min() + # print train_data.max(), test_data.max(), train_data.min(), test_data.min() + # + # imgSizeW = train_data.shape[3] + # imgSizeH = train_data.shape[2] + # nChannels = train_data.shape[1] + + ############################################################################# + print("create network") + batchSize = 64 + # poseNetParams = ScaleNetParams(type=1, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize, + # resizeFactor=2, numJoints=1, nDims=3) + nChannels = 1 + imgSizeW = 128 + imgSizeH = 128 + poseNetParams = ScaleNetParams(type=1, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize, + resizeFactor=2, numJoints=1, nDims=3) + poseNet = ScaleNet(rng, cfgParams=poseNetParams) + + # poseNetTrainerParams = ScaleNetTrainerParams() + # poseNetTrainerParams.use_early_stopping = False + # poseNetTrainerParams.batch_size = batchSize + # poseNetTrainerParams.learning_rate = 0.0005 + # poseNetTrainerParams.weightreg_factor = 0.0001 + # poseNetTrainerParams.force_macrobatch_reload = True + # poseNetTrainerParams.para_augment = True + # poseNetTrainerParams.augment_fun_params = {'fun': 'augment_poses', 'args': {'normZeroOne': False, + # 'di': di, + # 'aug_modes': aug_modes, + # 'hd': HandDetector(train_data[0, 0].copy(), abs(di.fx), abs(di.fy), importer=di)}} + # + # print("setup trainer") + # poseNetTrainer = ScaleNetTrainer(poseNet, poseNetTrainerParams, rng, './eval/'+eval_prefix) + # poseNetTrainer.setData(train_data, train_gt3D[:, di.crop_joint_idx, :], val_data, val_gt3D[:, di.crop_joint_idx, :]) + # poseNetTrainer.addStaticData({'val_data_x1': val_data2, 'val_data_x2': val_data4}) + # poseNetTrainer.addManagedData({'train_data_x1': train_data2, 'train_data_x2': train_data4}) + # poseNetTrainer.addManagedData({'train_data_com': train_data_com, + # 'train_data_cube': train_data_cube, + # 'train_data_M': train_data_M, + # 'train_gt3D': train_gt3D}) + # poseNetTrainer.compileFunctions() + + # ################################################################### + # # TRAIN + # train_res = poseNetTrainer.train(n_epochs=100) + # train_costs = train_res[0] + # val_errs = train_res[2] + # + # # plot cost + # fig = plt.figure() + # plt.semilogy(train_costs) + # plt.show(block=False) + # fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_cost.png') + # + # fig = plt.figure() + # plt.semilogy(val_errs) + # plt.show(block=False) + # fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_errs.png') + + # # save results + # poseNet.save("./eval/{}/net_{}.pkl".format(eval_prefix, eval_prefix)) + poseNet.load("./eval/{}/net_{}.pkl".format(eval_prefix,eval_prefix)) + + #################################################### + # TEST + print("Testing ...") + gt3D = [j.gt3Dorig[di.crop_joint_idx].reshape(1, 3) for j in testSeqs[0].data] + # jts = poseNet.computeOutput([test_data, test_data2, test_data4]) + jts = poseNet.computeOutput([test_data, test_data[:, :, 32:96, 32:96], test_data[:, :, 48:80, 48:80]]) + joints = [] + for i in xrange(test_data.shape[0]): + joints.append(jts[i].reshape(1, 3)*(testSeqs[0].config['cube'][2]/2.) + testSeqs[0].data[i].com) + print "jts = {}".format(jts) + # 3D coordinates of the refined center = joints + print "joints = {}".format(joints) +######################################################################################################################## + # plot + import matplotlib.pyplot as plt + import matplotlib + import numpy as np + fig, ax = plt.subplots() + ax.imshow(Seq_0.data[0].dpt, cmap=matplotlib.cm.jet) + + # iPhone calibration + h = 128. + w = 128. + iw = 3088.0 + ih = 2316.0 + yscale = h / ih + xscale = w / iw + _fx = 2880.0796 * xscale + _fy = 2880.0796 * yscale + _ux = 1153.2035 * xscale + _uy = 1153.2035 * yscale + + icom = np.empty((2, 1)) + icom[0] = Seq_0.data[0].gtorig[5][0] * xscale + icom[1] = Seq_0.data[0].gtorig[5][1] * yscale + ax.scatter(icom[0], icom[1], marker='+', c='yellow', s=30, label='initial center: MSRA mcp middle finger joint') # initial hand com in IMG + + gt_com = np.empty((2, 1)) + gt_com3D = Seq_0.data[0].com + gt_com[0] = gt_com3D[0] / gt_com3D[2] * _fx + _ux + gt_com[1] = gt_com3D[1] / gt_com3D[2] * _fy + _uy + ax.scatter(gt_com[0], gt_com[1], marker='+', c='blue', s=30, label='ground truth refined hand center') # initial hand com in IMG + + refined_com = np.empty((2, 1)) + refined_com3D = joints[0][0] + refined_com[0] = refined_com3D[0] / refined_com3D[2] * _fx + _ux + refined_com[1] = refined_com3D[1] / refined_com3D[2] * _fy + _uy + ax.scatter(refined_com[0], refined_com[1], marker='*', c='lime', s=30, label='refined hand center posenet estimation') # initial hand com in IMG + # ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5)) + ax.legend() + + + plt.savefig('/home/mahdi/HVR/git_repos/deep-prior-pp/src/cache/iPhone_30hand50wall.png') + +######################################################################################################################## + + + + # hpe = MSRAHandposeEvaluation(gt3D, joints) + # hpe.subfolder += '/'+eval_prefix+'/' + # print("Mean error: {}mm, max error: {}mm".format(hpe.getMeanError(), hpe.getMaxError())) + # + # # save results + # cPickle.dump(joints, open("./eval/{}/result_{}_{}.pkl".format(eval_prefix,os.path.split(__file__)[1],eval_prefix), "wb"), protocol=cPickle.HIGHEST_PROTOCOL) + # + # print "Testing baseline" + # + # ################################# + # # BASELINE + # com = [j.com for j in testSeqs[0].data] + # hpe_com = MSRAHandposeEvaluation(gt3D, numpy.asarray(com).reshape((len(gt3D), 1, 3))) + # hpe_com.subfolder += '/'+eval_prefix+'/' + # print("Mean error: {}mm".format(hpe_com.getMeanError())) + # + # hpe.plotEvaluation(eval_prefix, methodName='Our regr', baseline=[('CoM', hpe_com)]) + print('ended') diff --git a/src/util/handdetector.py b/src/util/handdetector.py index 49ec43f..34417b5 100644 --- a/src/util/handdetector.py +++ b/src/util/handdetector.py @@ -54,11 +54,12 @@ def __init__(self, dpt, fx, fy, importer=None, refineNet=None): :param fy: camera focal lenght """ self.dpt = dpt - self.maxDepth = min(1500, dpt.max()) + self.maxDepth = min(400, dpt.max()) self.minDepth = max(10, dpt.min()) # set values out of range to 0 self.dpt[self.dpt > self.maxDepth] = 0. - self.dpt[self.dpt < self.minDepth] = 0. + # self.dpt[self.dpt < self.minDepth] = 0. + # camera settings self.fx = fx self.fy = fy