From d10415c5ccb2013de02151a53675071fb2e8943e Mon Sep 17 00:00:00 2001
From: mahdinobar <mahdi.nobar@epfl.ch>
Date: Sun, 5 Apr 2020 00:00:42 +0200
Subject: [PATCH] iPhone rectified centertest routines added. [#1]

---
 src/data/dataset.py           |  14 +-
 src/data/importers.py         | 382 +++++++++++++++++++++++++++++++++-
 src/main_iPhone_com_refine.py | 313 ++++++++++++++++++++++++++++
 src/util/handdetector.py      |   5 +-
 4 files changed, 704 insertions(+), 10 deletions(-)
 create mode 100644 src/main_iPhone_com_refine.py

diff --git a/src/data/dataset.py b/src/data/dataset.py
index 4d6bca1..0458a7d 100644
--- a/src/data/dataset.py
+++ b/src/data/dataset.py
@@ -23,7 +23,7 @@
 """
 
 import numpy
-from data.importers import NYUImporter, ICVLImporter, MSRA15Importer
+from data.importers import NYUImporter, ICVLImporter, MSRA15Importer, iPhoneImporter
 
 
 __author__ = "Paul Wohlhart <wohlhart@icg.tugraz.at>, Markus Oberweger <oberweger@icg.tugraz.at>"
@@ -123,6 +123,18 @@ def __init__(self, imgSeqs=None, basepath=None, localCache=True):
         self.lmi = ICVLImporter(basepath)
 
 
+class iPhoneDataset(Dataset):
+    def __init__(self, imgSeqs=None, basepath=None, localCache=True):
+        """
+        constructor
+        """
+        super(iPhoneDataset, self).__init__(imgSeqs, localCache)
+        if basepath is None:
+            basepath = '../../data/iPhone/'
+
+        self.lmi = iPhoneImporter(basepath)
+
+
 class MSRA15Dataset(Dataset):
     def __init__(self, imgSeqs=None, basepath=None, localCache=True):
         """
diff --git a/src/data/importers.py b/src/data/importers.py
index b4988ec..f97e161 100644
--- a/src/data/importers.py
+++ b/src/data/importers.py
@@ -32,14 +32,16 @@
 from util.handdetector import HandDetector
 from data.transformations import transformPoints2D
 import cPickle
+import open3d as o3d
+from PIL import Image
 
-__author__ = "Paul Wohlhart <wohlhart@icg.tugraz.at>, Markus Oberweger <oberweger@icg.tugraz.at>"
-__copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria"
-__credits__ = ["Paul Wohlhart", "Markus Oberweger"]
-__license__ = "GPL"
+__author__ = "Mahdi Nobar <mahdi.nobar@epfl.ch>"
+__copyright__ = "Copyright 2020, EPFL"
+__credits__ = ["Mahdi Nobar"]
+__license__ = "confidential"
 __version__ = "1.0"
 __maintainer__ = "Markus Oberweger"
-__email__ = "oberweger@icg.tugraz.at"
+__email__ = "mahdi.nobar@epfl.ch"
 __status__ = "Development"
 
 
@@ -525,6 +527,372 @@ def format_coord(x, y):
 
         plt.show()
 
+class iPhoneImporter(DepthImporter):
+    """
+    provide functionality to load data from iPhone Truedepth
+
+    """
+
+    def __init__(self, basepath, useCache=True, cacheDir='./cache/', refineNet=None, detectorNet=None, derotNet=None, hand=None):
+        """
+        Constructor
+        :param basepath: base path of the iPhone Truedepth
+        :return:
+        """
+        # iPhone calibration
+        _h = 240
+        _w = 320
+        iw = 3088.0
+        ih = 2316.0
+        xscale = _h / ih
+        yscale = _w / iw
+        _fx = 2880.0796 * xscale
+        _fy = 2880.0796 * yscale
+        # _cx = 1546.5824 * xscale
+        # _cy = 1153.2035 * yscale
+        _cx = 1153.2035 * xscale
+        _cy = 1546.5824 * yscale
+        super(iPhoneImporter, self).__init__(_fx, _fy, _cy, _cx, hand)  # see Sun et.al.
+
+        self.depth_map_size = (320, 240)
+        self.basepath = basepath
+        self.useCache = useCache
+        self.cacheDir = cacheDir
+        self.refineNet = refineNet
+        self.derotNet = derotNet
+        self.detectorNet = detectorNet
+        self.numJoints = 21
+        self.crop_joint_idx = 5
+        self.default_cubes = {'P0': (200, 200, 200)}
+        self.sides = {'P0': 'right'}
+
+    def loadDepthMap(self, filename):
+        """
+        Read a depth-map in bin format of msra15
+        :param filename: file name to load
+        :return: image data of depth image in mm for msra15 and image shape(240by320)
+        """
+# temporary: must be changed ###########################################################################################
+        color_raw = o3d.io.read_image('/home/mahdi/HVR/hvr/hand_pcl_iPhone/Tom_set_2/iPhone/hand30wall50_color.png')
+        depth_raw = o3d.io.read_image(filename)
+        color_raw = o3d.geometry.Image(np.asarray(color_raw))
+        rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+            color_raw, depth_raw, depth_scale=0.529, depth_trunc=30.0, convert_rgb_to_intensity=False)
+        # iPhone calibration
+        h = np.asarray(color_raw).shape[0]  # 480
+        w = np.asarray(color_raw).shape[1]  # 640
+        iw = 3088.0
+        ih = 2316.0
+        xscale = h / ih
+        yscale = w / iw
+        _fx = 2880.0796 * xscale
+        _fy = 2880.0796 * yscale
+        # _cx = 1546.5824 * xscale
+        # _cy = 1153.2035 * yscale
+        _cx = 1153.2035 * xscale
+        _cy = 1546.5824 * yscale
+        setIntrinsic = o3d.camera.PinholeCameraIntrinsic()
+        setIntrinsic.set_intrinsics(width=w, height=h, fx=_fx, fy=_fy, cx=_cx, cy=_cy)
+        pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+            rgbd_image,
+            setIntrinsic)
+        # Flip it, otherwise the pointcloud will be upside down
+        pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+        z_values = (-np.asarray(pcd.points)[:, 2] * 1000)  # in mm
+        depth_map = np.reshape(z_values, (480, 640))
+        imgdata = np.asarray(Image.fromarray(depth_map).resize((320, 240)))
+# temporary: must be changed ###########################################################################################
+        return np.copy(imgdata)
+
+    def getDepthMapNV(self):
+        """
+        Get the value of invalid depth values in the depth map
+        :return: value
+        """
+        return 32001
+
+    def loadSequence(self, seqName, subSeq=None, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None):
+        """
+        Load an image sequence from the dataset
+        :param seqName: sequence name, e.g. subject1
+        :param Nmax: maximum number of samples to load
+        :return: returns named image sequence
+        """
+
+        if (subSeq is not None) and (not isinstance(subSeq, list)):
+            raise TypeError("subSeq must be None or list")
+
+        if cube is None:
+            config = {'cube': self.default_cubes[seqName]}
+        else:
+            assert isinstance(cube, tuple)
+            assert len(cube) == 3
+            config = {'cube': cube}
+
+        if subSeq is None:
+            pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, self.hand,
+                                                               HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0])
+        else:
+            pickleCache = '{}/{}_{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, self.hand,
+                                                               ''.join(subSeq), HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0])
+        if self.useCache & os.path.isfile(pickleCache):
+            print("Loading cache data from {}".format(pickleCache))
+            f = open(pickleCache, 'rb')
+            (seqName, data, config) = cPickle.load(f)
+            f.close()
+            # shuffle data
+            if shuffle and rng is not None:
+                print("Shuffling")
+                rng.shuffle(data)
+            if not(np.isinf(Nmax)):
+                return NamedImgSequence(seqName, data[0:Nmax], config)
+            else:
+                return NamedImgSequence(seqName, data, config)
+
+        self.loadRefineNetLazy(self.refineNet)
+
+        # Load the dataset
+        objdir = '{}/{}/'.format(self.basepath, seqName)
+        subdirs = sorted([name for name in os.listdir(objdir) if os.path.isdir(os.path.join(objdir, name))])
+
+        txt = 'Loading {}'.format(seqName)
+        nImgs = sum([len(files) for r, d, files in os.walk(objdir)]) // 2
+        pbar = pb.ProgressBar(maxval=nImgs, widgets=[txt, pb.Percentage(), pb.Bar()])
+        pbar.start()
+
+        data = []
+        pi = 0
+        for subdir in subdirs:
+            # check for subsequences and skip them if necessary
+            subSeqName = ''
+            if subSeq is not None:
+                if subdir not in subSeq:
+                    continue
+
+                subSeqName = subdir
+
+            # iterate all subdirectories
+            trainlabels = '{}/{}/joint.txt'.format(objdir, subdir)
+
+            inputfile = open(trainlabels)
+            # read number of samples
+            nImgs = int(inputfile.readline())
+
+            for i in range(nImgs):
+                # early stop
+                if len(data) >= Nmax:
+                    break
+
+                line = inputfile.readline()
+                part = line.split(' ')
+
+                # dptFileName = '{}/{}/{}_depth.bin'.format(objdir, subdir, str(i).zfill(6))
+                dptFileName = '{}/{}/{}_depth.png'.format(objdir, subdir, 'hand30wall50')
+
+                if not os.path.isfile(dptFileName):
+                    print("File {} does not exist!".format(dptFileName))
+                    continue
+                dpt = self.loadDepthMap(dptFileName)
+
+                # joints in image coordinates
+                gt3Dorig = np.zeros((self.numJoints, 3), np.float32)
+                for joint in range(gt3Dorig.shape[0]):
+                    for xyz in range(0, 3):
+                        gt3Dorig[joint, xyz] = part[joint*3+xyz]
+
+                # invert axis
+                # gt3Dorig[:, 0] *= (-1.)
+                # gt3Dorig[:, 1] *= (-1.)
+                gt3Dorig[:, 2] *= (-1.)
+
+                # normalized joints in 3D coordinates
+                gtorig = self.joints3DToImg(gt3Dorig)
+
+                if self.hand is not None:
+                    if self.hand != self.sides[seqName]:
+                        gtorig[:, 0] -= dpt.shape[1] / 2.
+                        gtorig[:, 0] *= (-1)
+                        gtorig[:, 0] += dpt.shape[1] / 2.
+                        gt3Dorig = self.jointsImgTo3D(gtorig)
+                        dpt = dpt[:, ::-1]
+
+                # print gt3D
+                # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dcrop,com3D,dptFileName,'',''))
+                # Detect hand
+                hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self)
+                if not hd.checkImage(1.):
+                    print("Skipping image {}, no content".format(dptFileName))
+                    continue
+
+                try: #here we initialize the com with ground truth mcp middle finger of msra15 dataset [z in mm, (x,y) in pxls]
+                    dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom)
+                except UserWarning:
+                    print("Skipping image {}, no hand detected".format(dptFileName))
+                    continue
+
+                com3D = self.jointImgTo3D(com)
+                gt3Dcrop = gt3Dorig - com3D  # normalize to com
+
+                gtcrop = transformPoints2D(gtorig, M)
+
+                # print("{}".format(gt3Dorig))
+                # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','',{}))
+
+                data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D,
+                                       dptFileName, subSeqName, self.sides[seqName], {}))
+                pbar.update(pi)
+                # pi += 1
+
+            inputfile.close()
+
+        pbar.finish()
+        print("Loaded {} samples.".format(len(data)))
+
+        if self.useCache:
+            print("Save cache data to {}".format(pickleCache))
+            f = open(pickleCache, 'wb')
+            cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL)
+            f.close()
+
+        # shuffle data
+        if shuffle and rng is not None:
+            print("Shuffling")
+            rng.shuffle(data)
+        return NamedImgSequence(seqName, data, config)
+
+    def jointsImgTo3D(self, sample):
+        """
+        Normalize sample to metric 3D
+        :param sample: joints in (x,y,z) with x,y in image coordinates and z in mm
+        :return: normalized joints in mm
+        """
+        ret = np.zeros((sample.shape[0], 3), np.float32)
+        for i in xrange(sample.shape[0]):
+            ret[i] = self.jointImgTo3D(sample[i])
+        return ret
+
+    def jointImgTo3D(self, sample):
+        """
+        Normalize sample to metric 3D
+        :param sample: joints in (x,y,z) with x,y in image coordinates and z in mm
+        :return: normalized joints in mm
+        """
+        ret = np.zeros((3,), np.float32)
+        ret[0] = (sample[0] - self.ux) * sample[2] / self.fx
+        ret[1] = (self.uy - sample[1]) * sample[2] / self.fy
+        ret[2] = sample[2]
+        return ret
+
+    def joints3DToImg(self, sample):
+        """
+        Denormalize sample from metric 3D to image coordinates
+        :param sample: joints in (x,y,z) with x,y and z in mm
+        :return: joints in (x,y,z) with x,y in image coordinates and z in mm
+        """
+        ret = np.zeros((sample.shape[0], 3), np.float32)
+        for i in xrange(sample.shape[0]):
+            ret[i] = self.joint3DToImg(sample[i])
+        return ret
+
+    def joint3DToImg(self, sample):
+        """
+        Denormalize sample from metric 3D to image coordinates
+        :param sample: joints in (x,y,z) with x,y and z in mm
+        :return: joints in (x,y,z) with x,y in image coordinates and z in mm
+        """
+        ret = np.zeros((3, ), np.float32)
+        if sample[2] == 0.:
+            ret[0] = self.ux
+            ret[1] = self.uy
+            return ret
+        ret[0] = sample[0]/sample[2]*self.fx+self.ux
+        ret[1] = self.uy-sample[1]/sample[2]*self.fy
+        ret[2] = sample[2]
+        return ret
+
+    def getCameraIntrinsics(self):
+        """
+        Get intrinsic camera matrix
+        :return: 3x3 intrinsic camera matrix
+        """
+        ret = np.zeros((3, 3), np.float32)
+        ret[0, 0] = self.fx
+        ret[1, 1] = -self.fy
+        ret[2, 2] = 1
+        ret[0, 2] = self.ux
+        ret[1, 2] = self.uy
+        return ret
+
+    def getCameraProjection(self):
+        """
+        Get homogenous camera projection matrix
+        :return: 4x4 camera projection matrix
+        """
+        ret = np.zeros((4, 4), np.float32)
+        ret[0, 0] = self.fx
+        ret[1, 1] = -self.fy
+        ret[2, 2] = 1.
+        ret[0, 2] = self.ux
+        ret[1, 2] = self.uy
+        ret[3, 2] = 1.
+        return ret
+
+    def showAnnotatedDepth(self, frame):
+        """
+        Show the depth image
+        :param frame: image to show
+        :return:
+        """
+        import matplotlib
+        import matplotlib.pyplot as plt
+
+        print("img min {}, max {}".format(frame.dpt.min(),frame.dpt.max()))
+        fig = plt.figure()
+        ax = fig.add_subplot(111)
+        ax.imshow(frame.dpt, cmap=matplotlib.cm.jet, interpolation='nearest')
+        ax.scatter(frame.gtcrop[:, 0], frame.gtcrop[:, 1])
+
+        ax.plot(frame.gtcrop[0:5, 0], frame.gtcrop[0:5, 1], c='r')
+        ax.plot(np.hstack((frame.gtcrop[0, 0], frame.gtcrop[5:9, 0])), np.hstack((frame.gtcrop[0, 1], frame.gtcrop[5:9, 1])), c='r')
+        ax.plot(np.hstack((frame.gtcrop[0, 0], frame.gtcrop[9:13, 0])), np.hstack((frame.gtcrop[0, 1], frame.gtcrop[9:13, 1])), c='r')
+        ax.plot(np.hstack((frame.gtcrop[0, 0], frame.gtcrop[13:17, 0])), np.hstack((frame.gtcrop[0, 1], frame.gtcrop[13:17, 1])), c='r')
+        ax.plot(np.hstack((frame.gtcrop[0, 0], frame.gtcrop[17:21, 0])), np.hstack((frame.gtcrop[0, 1], frame.gtcrop[17:21, 1])), c='r')
+
+        def format_coord(x, y):
+            numrows, numcols = frame.dpt.shape
+            col = int(x+0.5)
+            row = int(y+0.5)
+            if 0 <= col < numcols and 0 <= row < numrows:
+                z = frame.dpt[row, col]
+                return 'x=%1.4f, y=%1.4f, z=%1.4f'%(x, y, z)
+            else:
+                return 'x=%1.4f, y=%1.4f'%(x, y)
+        ax.format_coord = format_coord
+
+        for i in range(frame.gtcrop.shape[0]):
+            ax.annotate(str(i), (int(frame.gtcrop[i, 0]), int(frame.gtcrop[i, 1])))
+
+        plt.show()
+
+    @staticmethod
+    def depthToPCL(dpt, T, background_val=0.):
+
+        # get valid points and transform
+        pts = np.asarray(np.where(~np.isclose(dpt, background_val))).transpose()
+        pts = np.concatenate([pts[:, [1, 0]] + 0.5, np.ones((pts.shape[0], 1), dtype='float32')], axis=1)
+        pts = np.dot(np.linalg.inv(np.asarray(T)), pts.T).T
+        pts = (pts[:, 0:2] / pts[:, 2][:, None]).reshape((pts.shape[0], 2))
+
+        # replace the invalid data
+        depth = dpt[(~np.isclose(dpt, background_val))]
+
+        # get x and y data in a vectorized way
+        row = (pts[:, 0] - 160.) / 241.42 * depth
+        col = (120. - pts[:, 1]) / 241.42 * depth
+
+        # combine x,y,depth
+        return np.column_stack((row, col, depth))
+
 
 class MSRA15Importer(DepthImporter):
     """
@@ -560,9 +928,9 @@ def __init__(self, basepath, useCache=True, cacheDir='./cache/', refineNet=None,
 
     def loadDepthMap(self, filename):
         """
-        Read a depth-map
+        Read a depth-map in bin format of msra15
         :param filename: file name to load
-        :return: image data of depth image
+        :return: image data of depth image in mm for msra15 and image shape(240by320)
         """
         with open(filename, 'rb') as f:
             # first 6 uint define the full image
diff --git a/src/main_iPhone_com_refine.py b/src/main_iPhone_com_refine.py
new file mode 100644
index 0000000..9abbda7
--- /dev/null
+++ b/src/main_iPhone_com_refine.py
@@ -0,0 +1,313 @@
+"""This is the main file for training hand joint classifier on MSRA dataset
+
+Copyright 2015 Markus Oberweger, ICG,
+Graz University of Technology <oberweger@icg.tugraz.at>
+
+This file is part of DeepPrior.
+
+DeepPrior is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+DeepPrior is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with DeepPrior.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import numpy
+import gc
+import matplotlib
+
+matplotlib.use('Agg')  # plot to file
+import matplotlib.pyplot as plt
+from net.scalenet import ScaleNetParams, ScaleNet
+from trainer.scalenettrainer import ScaleNetTrainerParams, ScaleNetTrainer
+from util.handdetector import HandDetector
+import os
+import cPickle
+from data.importers import iPhoneImporter
+from data.dataset import iPhoneDataset
+from util.handpose_evaluation import MSRAHandposeEvaluation
+from util.helpers import shuffle_many_inplace
+
+if __name__ == '__main__':
+
+    eval_prefix = 'iPhone_COM_AUGMENT'
+    if not os.path.exists('./eval/'+eval_prefix+'/'):
+        os.makedirs('./eval/'+eval_prefix+'/')
+
+    rng = numpy.random.RandomState(23455)
+
+    print("create data")
+    aug_modes = ['com', 'rot', 'none']  # 'sc',
+
+    di = iPhoneImporter('../data/iPhone/')
+    # Seq0_1 = di.loadSequence('P0', shuffle=True, rng=rng, docom=False)
+    # Seq0_1 = Seq0_1._replace(name='P0_gt')
+    # Seq0_2 = di.loadSequence('P0', shuffle=True, rng=rng, docom=True)
+    # Seq0_2 = Seq0_2._replace(name='P0_com')
+    # Seq1_1 = di.loadSequence('P1', shuffle=True, rng=rng, docom=False)
+    # Seq1_1 = Seq1_1._replace(name='P1_gt')
+    # Seq1_2 = di.loadSequence('P1', shuffle=True, rng=rng, docom=True)
+    # Seq1_2 = Seq1_2._replace(name='P1_com')
+    # Seq2_1 = di.loadSequence('P2', shuffle=True, rng=rng, docom=False)
+    # Seq2_1 = Seq2_1._replace(name='P2_gt')
+    # Seq2_2 = di.loadSequence('P2', shuffle=True, rng=rng, docom=True)
+    # Seq2_2 = Seq2_2._replace(name='P2_com')
+    # Seq3_1 = di.loadSequence('P3', shuffle=True, rng=rng, docom=False)
+    # Seq3_1 = Seq3_1._replace(name='P3_gt')
+    # Seq3_2 = di.loadSequence('P3', shuffle=True, rng=rng, docom=True)
+    # Seq3_2 = Seq3_2._replace(name='P3_com')
+    # Seq4_1 = di.loadSequence('P4', shuffle=True, rng=rng, docom=False)
+    # Seq4_1 = Seq4_1._replace(name='P4_gt')
+    # Seq4_2 = di.loadSequence('P4', shuffle=True, rng=rng, docom=True)
+    # Seq4_2 = Seq4_2._replace(name='P4_com')
+    # Seq5_1 = di.loadSequence('P5', shuffle=True, rng=rng, docom=False)
+    # Seq5_1 = Seq5_1._replace(name='P5_gt')
+    # Seq5_2 = di.loadSequence('P5', shuffle=True, rng=rng, docom=True)
+    # Seq5_2 = Seq5_2._replace(name='P5_com')
+    # Seq6_1 = di.loadSequence('P6', shuffle=True, rng=rng, docom=False)
+    # Seq6_1 = Seq6_1._replace(name='P6_gt')
+    # Seq6_2 = di.loadSequence('P6', shuffle=True, rng=rng, docom=True)
+    # Seq6_2 = Seq6_2._replace(name='P6_com')
+    # Seq7_1 = di.loadSequence('P7', shuffle=True, rng=rng, docom=False)
+    # Seq7_1 = Seq7_1._replace(name='P7_gt')
+    # Seq7_2 = di.loadSequence('P7', shuffle=True, rng=rng, docom=True)
+    # Seq7_2 = Seq7_2._replace(name='P7_com')
+    # Seq8_1 = di.loadSequence('P8', shuffle=True, rng=rng, docom=False)
+    # Seq8_1 = Seq8_1._replace(name='P8_gt')
+    # Seq8_2 = di.loadSequence('P8', shuffle=True, rng=rng, docom=True)
+    # Seq8_2 = Seq8_2._replace(name='P8_com')
+    # trainSeqs = [Seq0_1, Seq0_2, Seq1_1, Seq1_2, Seq2_1, Seq2_2, Seq3_1, Seq3_2,
+    #              Seq4_1, Seq4_2, Seq5_1, Seq5_2, Seq6_1, Seq6_2, Seq7_1, Seq7_2,
+    #              Seq8_1, Seq8_2]
+
+    # trainSeqs = [Seq0_1]
+    Seq_0 = di.loadSequence('P0', docom=True)
+    testSeqs = [Seq_0]
+
+    # # create training data
+    # trainDataSet = MSRA15Dataset(trainSeqs, localCache=False)
+    # nSamp = numpy.sum([len(s.data) for s in trainSeqs])
+    # d1, g1 = trainDataSet.imgStackDepthOnly(trainSeqs[0].name)
+    # train_data = numpy.ones((nSamp, d1.shape[1], d1.shape[2], d1.shape[3]), dtype='float32')
+    # train_gt3D = numpy.ones((nSamp, g1.shape[1], g1.shape[2]), dtype='float32')
+    # train_data_com = numpy.ones((nSamp, 3), dtype='float32')
+    # train_data_M = numpy.ones((nSamp, 3, 3), dtype='float32')
+    # train_data_cube = numpy.ones((nSamp, 3), dtype='float32')
+    # del d1, g1
+    # gc.collect()
+    # gc.collect()
+    # gc.collect()
+    # oldIdx = 0
+    # for seq in trainSeqs:
+    #     d, g = trainDataSet.imgStackDepthOnly(seq.name)
+    #     train_data[oldIdx:oldIdx+d.shape[0]] = d
+    #     train_gt3D[oldIdx:oldIdx+d.shape[0]] = g
+    #     train_data_com[oldIdx:oldIdx+d.shape[0]] = numpy.asarray([da.com for da in seq.data])
+    #     train_data_M[oldIdx:oldIdx+d.shape[0]] = numpy.asarray([da.T for da in seq.data])
+    #     train_data_cube[oldIdx:oldIdx+d.shape[0]] = numpy.asarray([seq.config['cube']]*d.shape[0])
+    #     oldIdx += d.shape[0]
+    #     del d, g
+    #     gc.collect()
+    #     gc.collect()
+    #     gc.collect()
+    # shuffle_many_inplace([train_data, train_gt3D, train_data_com, train_data_cube, train_data_M], random_state=rng)
+    #
+    # mb = (train_data.nbytes) / (1024 * 1024)
+    # print("data size: {}Mb".format(mb))
+
+    testDataSet = iPhoneDataset(testSeqs)
+    test_data, test_gt3D = testDataSet.imgStackDepthOnly(testSeqs[0].name)
+
+    val_data = test_data
+    val_gt3D = test_gt3D
+
+    ####################################
+    # # resize data
+    # dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2))
+    # xstart = int(train_data.shape[2]/2-dsize[0]/2)
+    # xend = xstart + dsize[0]
+    # ystart = int(train_data.shape[3]/2-dsize[1]/2)
+    # yend = ystart + dsize[1]
+    # train_data2 = train_data[:, :, ystart:yend, xstart:xend]
+    #
+    # dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4))
+    # xstart = int(train_data.shape[2]/2-dsize[0]/2)
+    # xend = xstart + dsize[0]
+    # ystart = int(train_data.shape[3]/2-dsize[1]/2)
+    # yend = ystart + dsize[1]
+    # train_data4 = train_data[:, :, ystart:yend, xstart:xend]
+    #
+    # dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2))
+    # xstart = int(train_data.shape[2]/2-dsize[0]/2)
+    # xend = xstart + dsize[0]
+    # ystart = int(train_data.shape[3]/2-dsize[1]/2)
+    # yend = ystart + dsize[1]
+    # val_data2 = val_data[:, :, ystart:yend, xstart:xend]
+    #
+    # dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4))
+    # xstart = int(train_data.shape[2]/2-dsize[0]/2)
+    # xend = xstart + dsize[0]
+    # ystart = int(train_data.shape[3]/2-dsize[1]/2)
+    # yend = ystart + dsize[1]
+    # val_data4 = val_data[:, :, ystart:yend, xstart:xend]
+    #
+    # dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2))
+    # xstart = int(train_data.shape[2]/2-dsize[0]/2)
+    # xend = xstart + dsize[0]
+    # ystart = int(train_data.shape[3]/2-dsize[1]/2)
+    # yend = ystart + dsize[1]
+    # test_data2 = test_data[:, :, ystart:yend, xstart:xend]
+    #
+    # dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4))
+    # xstart = int(train_data.shape[2]/2-dsize[0]/2)
+    # xend = xstart + dsize[0]
+    # ystart = int(train_data.shape[3]/2-dsize[1]/2)
+    # yend = ystart + dsize[1]
+    # test_data4 = test_data[:, :, ystart:yend, xstart:xend]
+    #
+    # print train_gt3D.max(), test_gt3D.max(), train_gt3D.min(), test_gt3D.min()
+    # print train_data.max(), test_data.max(), train_data.min(), test_data.min()
+    #
+    # imgSizeW = train_data.shape[3]
+    # imgSizeH = train_data.shape[2]
+    # nChannels = train_data.shape[1]
+
+    #############################################################################
+    print("create network")
+    batchSize = 64
+    # poseNetParams = ScaleNetParams(type=1, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize,
+    #                                resizeFactor=2, numJoints=1, nDims=3)
+    nChannels = 1
+    imgSizeW = 128
+    imgSizeH = 128
+    poseNetParams = ScaleNetParams(type=1, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize,
+                                   resizeFactor=2, numJoints=1, nDims=3)
+    poseNet = ScaleNet(rng, cfgParams=poseNetParams)
+
+    # poseNetTrainerParams = ScaleNetTrainerParams()
+    # poseNetTrainerParams.use_early_stopping = False
+    # poseNetTrainerParams.batch_size = batchSize
+    # poseNetTrainerParams.learning_rate = 0.0005
+    # poseNetTrainerParams.weightreg_factor = 0.0001
+    # poseNetTrainerParams.force_macrobatch_reload = True
+    # poseNetTrainerParams.para_augment = True
+    # poseNetTrainerParams.augment_fun_params = {'fun': 'augment_poses', 'args': {'normZeroOne': False,
+    #                                                                             'di': di,
+    #                                                                             'aug_modes': aug_modes,
+    #                                                                             'hd': HandDetector(train_data[0, 0].copy(), abs(di.fx), abs(di.fy), importer=di)}}
+    #
+    # print("setup trainer")
+    # poseNetTrainer = ScaleNetTrainer(poseNet, poseNetTrainerParams, rng, './eval/'+eval_prefix)
+    # poseNetTrainer.setData(train_data, train_gt3D[:, di.crop_joint_idx, :], val_data, val_gt3D[:, di.crop_joint_idx, :])
+    # poseNetTrainer.addStaticData({'val_data_x1': val_data2, 'val_data_x2': val_data4})
+    # poseNetTrainer.addManagedData({'train_data_x1': train_data2, 'train_data_x2': train_data4})
+    # poseNetTrainer.addManagedData({'train_data_com': train_data_com,
+    #                                'train_data_cube': train_data_cube,
+    #                                'train_data_M': train_data_M,
+    #                                'train_gt3D': train_gt3D})
+    # poseNetTrainer.compileFunctions()
+
+    # ###################################################################
+    # # TRAIN
+    # train_res = poseNetTrainer.train(n_epochs=100)
+    # train_costs = train_res[0]
+    # val_errs = train_res[2]
+    #
+    # # plot cost
+    # fig = plt.figure()
+    # plt.semilogy(train_costs)
+    # plt.show(block=False)
+    # fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_cost.png')
+    #
+    # fig = plt.figure()
+    # plt.semilogy(val_errs)
+    # plt.show(block=False)
+    # fig.savefig('./eval/'+eval_prefix+'/'+eval_prefix+'_errs.png')
+
+    # # save results
+    # poseNet.save("./eval/{}/net_{}.pkl".format(eval_prefix, eval_prefix))
+    poseNet.load("./eval/{}/net_{}.pkl".format(eval_prefix,eval_prefix))
+
+    ####################################################
+    # TEST
+    print("Testing ...")
+    gt3D = [j.gt3Dorig[di.crop_joint_idx].reshape(1, 3) for j in testSeqs[0].data]
+    # jts = poseNet.computeOutput([test_data, test_data2, test_data4])
+    jts = poseNet.computeOutput([test_data, test_data[:, :, 32:96, 32:96], test_data[:, :, 48:80, 48:80]])
+    joints = []
+    for i in xrange(test_data.shape[0]):
+        joints.append(jts[i].reshape(1, 3)*(testSeqs[0].config['cube'][2]/2.) + testSeqs[0].data[i].com)
+    print "jts = {}".format(jts)
+    # 3D coordinates of the refined center = joints
+    print "joints = {}".format(joints)
+########################################################################################################################
+    # plot
+    import matplotlib.pyplot as plt
+    import matplotlib
+    import numpy as np
+    fig, ax = plt.subplots()
+    ax.imshow(Seq_0.data[0].dpt, cmap=matplotlib.cm.jet)
+
+    # iPhone calibration
+    h = 128.
+    w = 128.
+    iw = 3088.0
+    ih = 2316.0
+    yscale = h / ih
+    xscale = w / iw
+    _fx = 2880.0796 * xscale
+    _fy = 2880.0796 * yscale
+    _ux = 1153.2035 * xscale
+    _uy = 1153.2035 * yscale
+
+    icom = np.empty((2, 1))
+    icom[0] = Seq_0.data[0].gtorig[5][0] * xscale
+    icom[1] = Seq_0.data[0].gtorig[5][1] * yscale
+    ax.scatter(icom[0], icom[1], marker='+', c='yellow', s=30, label='initial center: MSRA mcp middle finger joint')  # initial hand com in IMG
+
+    gt_com = np.empty((2, 1))
+    gt_com3D = Seq_0.data[0].com
+    gt_com[0] = gt_com3D[0] / gt_com3D[2] * _fx + _ux
+    gt_com[1] = gt_com3D[1] / gt_com3D[2] * _fy + _uy
+    ax.scatter(gt_com[0], gt_com[1], marker='+', c='blue', s=30, label='ground truth refined hand center')  # initial hand com in IMG
+
+    refined_com = np.empty((2, 1))
+    refined_com3D = joints[0][0]
+    refined_com[0] = refined_com3D[0] / refined_com3D[2] * _fx + _ux
+    refined_com[1] = refined_com3D[1] / refined_com3D[2] * _fy + _uy
+    ax.scatter(refined_com[0], refined_com[1], marker='*', c='lime', s=30, label='refined hand center posenet estimation')  # initial hand com in IMG
+    # ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
+    ax.legend()
+
+
+    plt.savefig('/home/mahdi/HVR/git_repos/deep-prior-pp/src/cache/iPhone_30hand50wall.png')
+
+########################################################################################################################
+
+
+
+    # hpe = MSRAHandposeEvaluation(gt3D, joints)
+    # hpe.subfolder += '/'+eval_prefix+'/'
+    # print("Mean error: {}mm, max error: {}mm".format(hpe.getMeanError(), hpe.getMaxError()))
+    #
+    # # save results
+    # cPickle.dump(joints, open("./eval/{}/result_{}_{}.pkl".format(eval_prefix,os.path.split(__file__)[1],eval_prefix), "wb"), protocol=cPickle.HIGHEST_PROTOCOL)
+    #
+    # print "Testing baseline"
+    #
+    # #################################
+    # # BASELINE
+    # com = [j.com for j in testSeqs[0].data]
+    # hpe_com = MSRAHandposeEvaluation(gt3D, numpy.asarray(com).reshape((len(gt3D), 1, 3)))
+    # hpe_com.subfolder += '/'+eval_prefix+'/'
+    # print("Mean error: {}mm".format(hpe_com.getMeanError()))
+    #
+    # hpe.plotEvaluation(eval_prefix, methodName='Our regr', baseline=[('CoM', hpe_com)])
+    print('ended')
diff --git a/src/util/handdetector.py b/src/util/handdetector.py
index 49ec43f..34417b5 100644
--- a/src/util/handdetector.py
+++ b/src/util/handdetector.py
@@ -54,11 +54,12 @@ def __init__(self, dpt, fx, fy, importer=None, refineNet=None):
         :param fy: camera focal lenght
         """
         self.dpt = dpt
-        self.maxDepth = min(1500, dpt.max())
+        self.maxDepth = min(400, dpt.max())
         self.minDepth = max(10, dpt.min())
         # set values out of range to 0
         self.dpt[self.dpt > self.maxDepth] = 0.
-        self.dpt[self.dpt < self.minDepth] = 0.
+        # self.dpt[self.dpt < self.minDepth] = 0.
+
         # camera settings
         self.fx = fx
         self.fy = fy