add the matrixDepth constraint and support sharing AVCaptureSession b…

…etween videocapturers
matrix-org · Feb 2, 2018 · 9c58465 · 9c58465
1 parent 2f5d293
commit 9c58465
Show file tree

Hide file tree

Showing 20 changed files with 447 additions and 44 deletions.
diff --git a/api/mediaconstraintsinterface.cc b/api/mediaconstraintsinterface.cc
@@ -153,6 +153,9 @@ const char MediaConstraintsInterface::kCpuOveruseDetection[] =
     "googCpuOveruseDetection";
 const char MediaConstraintsInterface::kPayloadPadding[] = "googPayloadPadding";
 
+// Matrix experiments
+const char MediaConstraintsInterface::kEnableMatrixDepth[] = "matrixDepth";
+
 
 // Set |value| to the value associated with the first appearance of |key|, or
 // return false if |key| is not found.

diff --git a/api/mediaconstraintsinterface.h b/api/mediaconstraintsinterface.h
@@ -118,6 +118,9 @@ class MediaConstraintsInterface {
   static const char kCpuOveruseDetection[];  // googCpuOveruseDetection
   static const char kPayloadPadding[];  // googPayloadPadding
 
+  // Matrix experiments
+  static const char kEnableMatrixDepth[]; // matrixDepth
+
   // The prefix of internal-only constraints whose JS set values should be
   // stripped by Chrome before passed down to Libjingle.
   static const char kInternalConstraintPrefix[];

diff --git a/api/mediastreaminterface.h b/api/mediastreaminterface.h
@@ -140,6 +140,10 @@ class VideoTrackSourceInterface
   // the encoder.
   virtual rtc::Optional<bool> needs_denoising() const = 0;
 
+  // Indicates that the capturer should emit depth data rather than video.
+  // Experimental hack for Matrix; this should be a format option instead.
+  virtual rtc::Optional<bool> enable_depth() const = 0;
+
   // Returns false if no stats are available, e.g, for a remote source, or a
   // source which has not seen its first frame yet.
   //

diff --git a/api/videosourceproxy.h b/api/videosourceproxy.h
@@ -26,6 +26,7 @@ BEGIN_PROXY_MAP(VideoTrackSource)
   PROXY_CONSTMETHOD0(bool, remote)
   PROXY_CONSTMETHOD0(bool, is_screencast)
   PROXY_CONSTMETHOD0(rtc::Optional<bool>, needs_denoising)
+  PROXY_CONSTMETHOD0(rtc::Optional<bool>, enable_depth)
   PROXY_METHOD1(bool, GetStats, Stats*)
   PROXY_WORKER_METHOD2(void,
                        AddOrUpdateSink,

diff --git a/matrix/test.c b/matrix/test.c
@@ -0,0 +1,133 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <math.h>
+
+uint8_t depthToR[65536];
+uint8_t depthToG[65536];
+uint8_t depthToB[65536];
+
+// periodicity constants as per the paper (end of sec 3)
+double np = 512.0;
+double w = 65536.0;
+double p = np / w;
+
+// RGB to depth
+
+void buildLUT() {
+  // implement depth to RGB as per http://reality.cs.ucl.ac.uk/projects/depth-streaming/depth-streaming.pdf
+  // we also convert to 420p as that's what WebRTC insists on
+
+  uint8_t * dstR = depthToR;
+  uint8_t * dstG = depthToG;
+  uint8_t * dstB = depthToB;
+
+  // assuming truedepth camera is giving us IEEE 754-2008 half-precision 16-bit floats, this means
+  // that positives lie between 0.0 through 65504.0, which when cast to a uint16_t lie between 0 and 65403
+
+  // build our depth->YUV LUT
+  for (size_t d = 0; d < 65536; d++) {
+    // the paper describes three colour components: L, Ha and Hb, which we map to BGR.
+    // L is low-res depth data; H is high-res.
+
+    double L = (d + 0.5) / w;
+
+    double Ha = fmod(L / (p / 2.0), 2.0);
+    if (Ha > 1.0) Ha = 2.0 - Ha;
+
+    // we add 1.0 to avoid taking the modulus of a negative number
+    double Hb = fmod((1.0 + L - (p / 4.0)) / (p / 2.0), 2.0);
+    if (Hb > 1.0) Hb = 2.0 - Hb;
+
+    // rescale L in order to increase its dynamic range, as in practice the data
+    // we get from the truedepth camera seems to only be between 10K and 20K, rather
+    // than the 0K-65K range we're considering here...
+    L *= 4.0;
+    L -= 0.3;
+
+    *dstR = Hb * 255;
+    *dstG = Ha * 255;
+    *dstB = L * 255;
+
+    printf("%d,%d,%d\n", *dstR, *dstG, *dstB);
+
+    dstR++;
+    dstG++;
+    dstB++;
+  }
+}
+
+// Depth to RGB
+
+int m(double L) {
+    return int(fmod(floor((4.0 * (L / p)) - 0.5), 4.0));
+}
+
+double lzero(double L) {
+    return L - fmod(L - (p / 8.0), p) + (((p / 4.0) * (double)m(L)) - (p / 8.0));
+}
+
+double delta(double L, double Ha, double Hb) {
+    int mL = m(L);
+    if (mL == 0) {
+        return (p / 2.0) * Ha;
+    } else if (mL == 1) {
+        return (p / 2.0) * Hb;
+    } else if (mL == 2) {
+        return (p / 2.0) * (1.0 - Ha);
+    } else if (mL == 3) {
+        return (p / 2.0) * (1.0 - Hb);
+    }
+}
+
+double d(double L, double Ha, double Hb) {
+    return 2000.0 * (lzero(L) + delta(L, Ha, Hb));
+}
+
+// Main method
+
+int main() {
+  // read our file
+  FILE * fp = fopen("depth-buffer.raw", "rb");
+  size_t len = 2*640*480;
+  uint8_t buf[len + 1];
+  if (fp != NULL) {
+    size_t newLen = fread(buf, sizeof(uint8_t), len, fp);
+    if (ferror( fp ) != 0) {
+      fputs("Error reading file", stderr);
+    }
+    fclose(fp);
+  }
+
+  uint16_t * src = (uint16_t *) buf;
+
+  uint8_t out[640*480*3];
+  dstR = out;
+  dstG = out+1;
+  dstB = out+2;
+
+  for (int y = 0; y < 480; y++) {
+    for (int x = 0; x < 640; x++) {
+
+      //uint16_t val = ((*src & 0xff) << 8) | ((*src & 0xff00) >> 8);
+      uint16_t val = *src;
+
+      *dstR = depthToR[val];
+      *dstG = depthToG[val];
+      *dstB = depthToB[val];
+
+      src++;
+      dstR += 3;
+      dstG += 3;
+      dstB += 3;
+    }
+  }
+
+  fp = fopen("depth-out.raw", "wb");
+  if (fp != NULL) {
+    fwrite(out, 1, 640*480*3, fp);
+  }
+  fclose(fp);
+
+  // hit it with GLSL to roundtrip back again...
+}
+
diff --git a/matrix/test2.c b/matrix/test2.c
@@ -0,0 +1,150 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <math.h>
+#include <limits.h>
+#include <float.h>
+
+uint8_t depthToR[65536];
+uint8_t depthToG[65536];
+uint8_t depthToB[65536];
+
+// periodicity constants as per the paper (end of sec 3)
+float np = 512.0;
+float w = 65536.0;
+float p = 0.0078125;// np / w;
+
+// RGB to depth
+
+void buildLUT() {
+  // implement depth to RGB as per http://reality.cs.ucl.ac.uk/projects/depth-streaming/depth-streaming.pdf
+  // we also convert to 420p as that's what WebRTC insists on
+
+  uint8_t * dstR = depthToR;
+  uint8_t * dstG = depthToG;
+  uint8_t * dstB = depthToB;
+
+  // assuming truedepth camera is giving us IEEE 754-2008 half-precision 16-bit floats, this means
+  // that positives lie between 0.0 through 65504.0, which when cast to a uint16_t lie between 0 and 65403
+
+  // build our depth->YUV LUT
+  for (size_t d = 0; d < 65536; d++) {
+    // the paper describes three colour components: L, Ha and Hb, which we map to BGR.
+    // L is low-res depth data; H is high-res.
+
+    float L = (d + 0.5) / w;
+
+    float Ha = fmod(L / (p / 2.0), 2.0);
+    if (Ha > 1.0) Ha = 2.0 - Ha;
+
+    // we add 1.0 to avoid taking the modulus of a negative number
+    float Hb = fmod((1.0 + L - (p / 4.0)) / (p / 2.0), 2.0);
+    if (Hb > 1.0) Hb = 2.0 - Hb;
+
+    // rescale L in order to increase its dynamic range, as in practice the data
+    // we get from the truedepth camera seems to only be between 10K and 20K, rather
+    // than the 0K-65K range we're considering here...
+    L *= 4.0;
+    L -= 0.3;
+
+    *dstR = Hb * 255;
+    *dstG = Ha * 255;
+    *dstB = L * 255;
+
+    //printf("%d,%d,%d\n", *dstR, *dstG, *dstB);
+
+    dstR++;
+    dstG++;
+    dstB++;
+  }
+}
+
+// Depth to RGB
+
+int m(float L) {
+    return (int)(fmod(floor((4.0 * (L / p)) - 0.5), 4.0));
+}
+
+float lzero(float L) {
+    return L - fmod(L - (p / 8.0), p) + (((p / 4.0) * (float)m(L)) - (p / 8.0));
+}
+
+float delta(float L, float Ha, float Hb) {
+    int mL = m(L);
+    if (mL == 0) {
+        return (p / 2.0) * Ha;
+    } else if (mL == 1) {
+        return (p / 2.0) * Hb;
+    } else if (mL == 2) {
+        return (p / 2.0) * (1.0 - Ha);
+    } else if (mL == 3) {
+        return (p / 2.0) * (1.0 - Hb);
+    }
+}
+
+float d(float L, float Ha, float Hb) {
+    printf("%f,%f,%f => %f\n", L, Ha, Hb, lzero(L) + delta(L, Ha, Hb));
+    return w * (lzero(L) + delta(L, Ha, Hb));
+}
+
+float half2float(uint16_t d) {
+  uint32_t out = ((((uint32_t)d & 0x8000) << 16) | 
+                 ((((uint32_t)d & 0x7c00) + 0x1C000) << 13) | 
+                  (((uint32_t)d & 0x03FF) << 13) );
+  return *(float *)&out;
+}
+
+int main() {
+  buildLUT();
+
+  // read our file
+  FILE * fp = fopen("depth-buffer.raw", "rb");
+  size_t len = 2*640*480;
+  uint8_t buf[len + 1];
+  if (fp != NULL) {
+    size_t newLen = fread(buf, sizeof(uint8_t), len, fp);
+    if (ferror( fp ) != 0) {
+      fputs("Error reading file", stderr);
+    }
+    fclose(fp);
+  }
+
+  uint16_t * src = (uint16_t *) buf;
+  uint8_t out[640*480*2];
+  uint16_t * dst = (uint16_t *)out;
+
+  uint16_t minD = USHRT_MAX;
+  uint16_t maxD = 0;
+  float minF = FLT_MAX;
+  float maxF = FLT_MIN;
+
+  for (int y = 0; y < 480; y++) {
+    for (int x = 0; x < 640; x++) {
+
+      //uint16_t val = ((*src & 0xff) << 8) | ((*src & 0xff00) >> 8);
+      uint16_t val = *src;
+      float f = half2float(val);
+      printf("%f\n", f);
+
+      if (val < minD) minD = val;
+      if (val > maxD) maxD = val;
+
+      if (f < minF) minF = f;
+      if (f > maxF) maxF = f;
+
+      *dst = (uint16_t) d((float)depthToB[val] / 255.0, (float)depthToG[val] / 255.0, (float)depthToR[val] / 255.0);
+
+      src++;
+      dst++;
+    }
+  }
+
+  printf("minD = %d, maxD = %d\n", minD, maxD);
+  printf("minF = %f, maxF = %f\n", minF, maxF);
+
+  fp = fopen("depth-buffer-out.raw", "wb");
+  if (fp != NULL) {
+    fwrite(out, 1, 640*480*2, fp);
+  }
+  fclose(fp);
+}
+
diff --git a/media/base/videocapturer.cc b/media/base/videocapturer.cc
@@ -42,6 +42,7 @@ VideoCapturer::VideoCapturer() : apply_rotation_(false) {
 
 void VideoCapturer::Construct() {
   enable_camera_list_ = false;
+  enable_depth_ = false;
   capture_state_ = CS_STOPPED;
   scaled_width_ = 0;
   scaled_height_ = 0;

diff --git a/media/base/videocapturer.h b/media/base/videocapturer.h
@@ -151,6 +151,13 @@ class VideoCapturer : public sigslot::has_slots<>,
   // The fourcc component is ignored.
   void ConstrainSupportedFormats(const VideoFormat& max_format);
 
+  void set_enable_depth(bool enable_depth) {
+    enable_depth_ = enable_depth;
+  }
+  bool enable_depth() {
+    return enable_depth_;
+  }
+
   void set_enable_camera_list(bool enable_camera_list) {
     enable_camera_list_ = enable_camera_list;
   }
@@ -262,6 +269,7 @@ class VideoCapturer : public sigslot::has_slots<>,
   std::vector<VideoFormat> filtered_supported_formats_;
 
   bool enable_camera_list_;
+  bool enable_depth_;
   int scaled_width_;  // Current output size from ComputeScale.
   int scaled_height_;
 

diff --git a/pc/videocapturertracksource.cc b/pc/videocapturertracksource.cc
@@ -155,6 +155,9 @@ bool NewFormatWithConstraints(
     // These are actually options, not constraints, so they can be satisfied
     // regardless of the format.
     return true;
+  } else if (constraint.key == MediaConstraintsInterface::kEnableMatrixDepth) {
+    // XXX: claim we can do it whatever for now
+    return true;
   }
   RTC_LOG(LS_WARNING) << "Found unknown MediaStream constraint. Name:"
                       << constraint.key << " Value:" << constraint.value;
@@ -351,6 +354,17 @@ void VideoCapturerTrackSource::Initialize(
     return;
   }
 
+  if (!ExtractOption(constraints, MediaConstraintsInterface::kEnableMatrixDepth,
+                     &enable_depth_)) {
+    RTC_LOG(LS_WARNING) << "Invalid mandatory value for"
+                        << MediaConstraintsInterface::kEnableMatrixDepth;
+    SetState(kEnded);
+    return;
+  }
+  if (enable_depth_) {
+    video_capturer_->set_enable_depth(true);
+  }
+
   format_ = GetBestCaptureFormat(formats);
   // Start the camera with our best guess.
   if (!worker_thread_->Invoke<bool>(

diff --git a/pc/videocapturertracksource.h b/pc/videocapturertracksource.h
@@ -49,6 +49,7 @@ class VideoCapturerTrackSource : public VideoTrackSource,
 
   bool is_screencast() const final { return video_capturer_->IsScreencast(); }
   rtc::Optional<bool> needs_denoising() const final { return needs_denoising_; }
+  rtc::Optional<bool> enable_depth() const final { return enable_depth_; }
 
   bool GetStats(Stats* stats) final;
 
@@ -72,6 +73,7 @@ class VideoCapturerTrackSource : public VideoTrackSource,
   bool started_;
   cricket::VideoFormat format_;
   rtc::Optional<bool> needs_denoising_;
+  rtc::Optional<bool> enable_depth_;
 };
 
 }  // namespace webrtc