Add frame differencing FE for VIF. (Netflix#210)

legrosbuffle · Aug 16, 2018 · 71ab54c · 71ab54c
1 parent 483ac18
commit 71ab54c
Show file tree

Hide file tree

Showing 6 changed files with 297 additions and 4 deletions.
diff --git a/feature/src/common/file_io.c b/feature/src/common/file_io.c
@@ -20,6 +20,18 @@
 #include <stdlib.h>
 #include <assert.h>
 
+/**
+ * Note: stride is in terms of bytes
+ */
+float apply_frame_differencing(const float *current_frame, const float *previous_frame, float *frame_difference, int width, int height, int stride)
+{
+    for (int i = 0; i < height; ++i) {
+        for (int j = 0; j < width; ++j) {
+            frame_difference[i * stride + j] = current_frame[i * stride + j] - previous_frame[i * stride + j];
+        }
+    }
+}
+
 /**
  * Note: stride is in terms of bytes
  */

diff --git a/feature/src/vif.c b/feature/src/vif.c
@@ -299,7 +299,7 @@ int compute_vif(const float *ref, const float *dis, int w, int h, int ref_stride
 int vif(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data), void *user_data, int w, int h, const char *fmt)
 {
     double score = 0;
-    double scores[4*2];
+    double scores[4 * 2];
     double score_num = 0;
     double score_den = 0;
     float *ref_buf = 0;
@@ -376,9 +376,164 @@ int vif(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, i
         fflush(stdout);
         printf("vif_den: %d %f\n", frm_idx, score_den);
         fflush(stdout);
+        for(int scale = 0; scale < 4; scale++){
+            printf("vif_num_scale%d: %d %f\n", scale, frm_idx, scores[2 * scale]);
+            printf("vif_den_scale%d: %d %f\n", scale, frm_idx, scores[2 * scale + 1]);
+        }
+
+        frm_idx++;
+    }
+
+    ret = 0;
+
+fail_or_end:
+
+    aligned_free(ref_buf);
+    aligned_free(dis_buf);
+    aligned_free(temp_buf);
+
+    return ret;
+}
+
+int vifdiff(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, int stride, void *user_data), void *user_data, int w, int h, const char *fmt)
+{
+    double score = 0;
+    double scores[4 * 2];
+    double score_num = 0;
+    double score_den = 0;
+    float *ref_buf = 0;
+    float *ref_diff_buf = 0;
+    float *prev_ref_buf = 0;
+    float *dis_buf = 0;
+    float *dis_diff_buf = 0;
+    float *prev_dis_buf = 0;
+    float *temp_buf = 0;
+    size_t data_sz;
+    int stride;
+    int ret = 1;
+
+    if (w <= 0 || h <= 0 || (size_t)w > ALIGN_FLOOR(INT_MAX) / sizeof(float))
+    {
+        goto fail_or_end;
+    }
+
+    stride = ALIGN_CEIL(w * sizeof(float));
+
+    if ((size_t)h > SIZE_MAX / stride)
+    {
+        goto fail_or_end;
+    }
+
+    data_sz = (size_t)stride * h;
+
+    if (!(ref_buf = aligned_malloc(data_sz, MAX_ALIGN)))
+    {
+        printf("error: aligned_malloc failed for ref_buf.\n");
+        fflush(stdout);
+        goto fail_or_end;
+    }
+    if (!(ref_diff_buf = aligned_malloc(data_sz, MAX_ALIGN)))
+    {
+        printf("error: aligned_malloc failed for ref_diff_buf.\n");
+        fflush(stdout);
+        goto fail_or_end;
+    }
+    if (!(prev_ref_buf = aligned_malloc(data_sz, MAX_ALIGN)))
+    {
+        printf("error: aligned_malloc failed for prev_ref_buf.\n");
+        fflush(stdout);
+        goto fail_or_end;
+    }
+    if (!(dis_buf = aligned_malloc(data_sz, MAX_ALIGN)))
+    {
+        printf("error: aligned_malloc failed for dis_buf.\n");
+        fflush(stdout);
+        goto fail_or_end;
+    }
+    if (!(dis_diff_buf = aligned_malloc(data_sz, MAX_ALIGN)))
+    {
+        printf("error: aligned_malloc failed for dis_diff_buf.\n");
+        fflush(stdout);
+        goto fail_or_end;
+    }
+    if (!(prev_dis_buf = aligned_malloc(data_sz, MAX_ALIGN)))
+    {
+        printf("error: aligned_malloc failed for prev_dis_buf.\n");
+        fflush(stdout);
+        goto fail_or_end;
+    }
+    if (!(temp_buf = aligned_malloc(data_sz * 2, MAX_ALIGN)))
+    {
+        printf("error: aligned_malloc failed for temp_buf.\n");
+        fflush(stdout);
+        goto fail_or_end;
+    }
+
+    int frm_idx = 0;
+    while (1)
+    {
+        ret = read_frame(ref_buf, dis_buf, temp_buf, stride, user_data);
+
+        if(ret == 1){
+            goto fail_or_end;
+        }
+        if (ret == 2)
+        {
+            break;
+        }
+
+        // ===============================================================
+        // offset pixel by OPT_RANGE_PIXEL_OFFSET
+        // ===============================================================
+        offset_image(ref_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
+        offset_image(dis_buf, OPT_RANGE_PIXEL_OFFSET, w, h, stride);
+
+        if (frm_idx > 0)
+        {
+            apply_frame_differencing(ref_buf, prev_ref_buf, ref_diff_buf, w, h, stride / sizeof(float));
+		    apply_frame_differencing(dis_buf, prev_dis_buf, dis_diff_buf, w, h, stride / sizeof(float));
+		}
+
+        // copy the current frame to the previous frame buffer to have it available for next time you apply frame differencing
+        memcpy(prev_ref_buf, ref_buf, data_sz);
+        memcpy(prev_dis_buf, dis_buf, data_sz);
+
+        // Pay attention to extracting T-VIF for first frame. Since we are doing subtracting the previous frame from the current frame,
+        // we cannot apply T-VIF differencing for the first video frame. Therefore we initialize with a default value (e.g. 0 for num and something
+        // very small for den, e.g. 1e-5). Why not difference the other way (next frame minus current frame)? Because the current choice will give us
+        // unreliable scores for an earlier video frame, rather than the latest one. This might be better for video quality calculations, since recency effects
+        // places more weight on later frames.
+        if (frm_idx == 0)
+		{
+		    score = 0.0;
+		    score_num = 0.0;
+		    score_den = 0.0;
+		    for(int scale = 0; scale < 4; scale++){
+		    	scores[2 * scale] = 0.0;
+		    	scores[2 * scale + 1] = 0.0 + 1e-5;
+		    }
+		}
+		else
+		{
+            // compute
+            if ((ret = compute_vif(ref_diff_buf, dis_diff_buf, w, h, stride, stride, &score, &score_num, &score_den, scores)))
+            {
+                printf("error: compute_vifdiff failed.\n");
+                fflush(stdout);
+                goto fail_or_end;
+            }
+        }
+
+        // print
+        printf("vifdiff: %d %f\n", frm_idx, score);
+        fflush(stdout);
+        printf("vifdiff_num: %d %f\n", frm_idx, score_num);
+        fflush(stdout);
+        printf("vifdiff_den: %d %f\n", frm_idx, score_den);
+        fflush(stdout);
         for(int scale=0;scale<4;scale++){
-            printf("vif_num_scale%d: %d %f\n", scale, frm_idx, scores[2*scale]);
-            printf("vif_den_scale%d: %d %f\n", scale, frm_idx, scores[2*scale+1]);
+            printf("vifdiff_num_scale%d: %d %f\n", scale, frm_idx, scores[2*scale]);
+            printf("vifdiff_den_scale%d: %d %f\n", scale, frm_idx, scores[2*scale+1]);
         }
 
         frm_idx++;
@@ -389,7 +544,11 @@ int vif(int (*read_frame)(float *ref_data, float *main_data, float *temp_data, i
 fail_or_end:
 
     aligned_free(ref_buf);
+    aligned_free(ref_diff_buf);
+    aligned_free(prev_ref_buf);
     aligned_free(dis_buf);
+    aligned_free(dis_diff_buf);
+    aligned_free(prev_dis_buf);
     aligned_free(temp_buf);
 
     return ret;

diff --git a/feature/src/vmaf_main.c b/feature/src/vmaf_main.c
@@ -127,6 +127,8 @@ int run_vmaf(const char *app, const char *fmt, const char *ref_path, const char
             ret = vif(read_frame, s, w, h, fmt);
         else if (!strcmp(app, "all"))
             ret = all(read_frame, s, w, h, fmt);
+        else if (!strcmp(app, "vifdiff"))
+            ret = vifdiff(read_frame, s, w, h, fmt);
         else
             ret = 2;
 

diff --git a/python/src/vmaf/__init__.py b/python/src/vmaf/__init__.py
@@ -110,6 +110,25 @@ def call_vmaf_feature(yuv_type, ref_path, dis_path, w, h, log_file_path, logger=
             logger.info(vmaf_feature_cmd)
         run_process(vmaf_feature_cmd, shell=True)
 
+    @staticmethod
+    def call_vifdiff_feature(yuv_type, ref_path, dis_path, w, h, log_file_path, logger=None):
+
+        # APPEND (>>) result (since _prepare_generate_log_file method has already created the file
+        # and written something in advance).
+        vifdiff_feature_cmd = "{vmaf} vifdiff {yuv_type} {ref_path} {dis_path} {w} {h} >> {log_file_path}" \
+            .format(
+            vmaf=required(ExternalProgram.vmaf),
+            yuv_type=yuv_type,
+            ref_path=ref_path,
+            dis_path=dis_path,
+            w=w,
+            h=h,
+            log_file_path=log_file_path,
+        )
+        if logger:
+            logger.info(vifdiff_feature_cmd)
+        run_process(vifdiff_feature_cmd, shell=True)
+
     @staticmethod
     def call_vmafossexec(fmt, w, h, ref_path, dis_path, model, log_file_path, disable_clip_score,
                          enable_transform_score, phone_model, disable_avx, n_thread, n_subsample,

diff --git a/python/src/vmaf/core/feature_extractor.py b/python/src/vmaf/core/feature_extractor.py
@@ -269,6 +269,84 @@ def _post_process_result(cls, result):
 
         return result
 
+class VifFrameDifferenceFeatureExtractor(FeatureExtractor):
+
+    TYPE = "VifDiff_feature"
+
+    VERSION = '0.1'
+
+    ATOM_FEATURES = ['vifdiff',
+                     'vifdiff_num', 'vifdiff_den',
+                     'vifdiff_num_scale0', 'vifdiff_den_scale0',
+                     'vifdiff_num_scale1', 'vifdiff_den_scale1',
+                     'vifdiff_num_scale2', 'vifdiff_den_scale2',
+                     'vifdiff_num_scale3', 'vifdiff_den_scale3',
+                     ]
+
+    DERIVED_ATOM_FEATURES = ['vifdiff_scale0', 'vifdiff_scale1', 'vifdiff_scale2', 'vifdiff_scale3',
+                             ]
+
+    ADM2_CONSTANT = 0
+    ADM_SCALE_CONSTANT = 0
+
+    def _generate_result(self, asset):
+        # routine to call the command-line executable and generate feature
+        # scores in the log file.
+
+        quality_width, quality_height = asset.quality_width_height
+        log_file_path = self._get_log_file_path(asset)
+
+        yuv_type=self._get_workfile_yuv_type(asset)
+        ref_path=asset.ref_workfile_path
+        dis_path=asset.dis_workfile_path
+        w=quality_width
+        h=quality_height
+        logger = self.logger
+
+        ExternalProgramCaller.call_vifdiff_feature(yuv_type, ref_path, dis_path, w, h, log_file_path, logger)
+
+    @classmethod
+    def _post_process_result(cls, result):
+        # override Executor._post_process_result
+
+        result = super(VifFrameDifferenceFeatureExtractor, cls)._post_process_result(result)
+
+        # vifdiff_scalei = vifdiff_num_scalei / vifdiff_den_scalei, i = 0, 1, 2, 3
+        vifdiff_num_scale0_scores_key = cls.get_scores_key('vifdiff_num_scale0')
+        vifdiff_den_scale0_scores_key = cls.get_scores_key('vifdiff_den_scale0')
+        vifdiff_num_scale1_scores_key = cls.get_scores_key('vifdiff_num_scale1')
+        vifdiff_den_scale1_scores_key = cls.get_scores_key('vifdiff_den_scale1')
+        vifdiff_num_scale2_scores_key = cls.get_scores_key('vifdiff_num_scale2')
+        vifdiff_den_scale2_scores_key = cls.get_scores_key('vifdiff_den_scale2')
+        vifdiff_num_scale3_scores_key = cls.get_scores_key('vifdiff_num_scale3')
+        vifdiff_den_scale3_scores_key = cls.get_scores_key('vifdiff_den_scale3')
+        vifdiff_scale0_scores_key = cls.get_scores_key('vifdiff_scale0')
+        vifdiff_scale1_scores_key = cls.get_scores_key('vifdiff_scale1')
+        vifdiff_scale2_scores_key = cls.get_scores_key('vifdiff_scale2')
+        vifdiff_scale3_scores_key = cls.get_scores_key('vifdiff_scale3')
+        result.result_dict[vifdiff_scale0_scores_key] = list(
+            (np.array(result.result_dict[vifdiff_num_scale0_scores_key])
+             / np.array(result.result_dict[vifdiff_den_scale0_scores_key]))
+        )
+        result.result_dict[vifdiff_scale1_scores_key] = list(
+            (np.array(result.result_dict[vifdiff_num_scale1_scores_key])
+             / np.array(result.result_dict[vifdiff_den_scale1_scores_key]))
+        )
+        result.result_dict[vifdiff_scale2_scores_key] = list(
+            (np.array(result.result_dict[vifdiff_num_scale2_scores_key])
+             / np.array(result.result_dict[vifdiff_den_scale2_scores_key]))
+        )
+        result.result_dict[vifdiff_scale3_scores_key] = list(
+            (np.array(result.result_dict[vifdiff_num_scale3_scores_key])
+             / np.array(result.result_dict[vifdiff_den_scale3_scores_key]))
+        )
+
+        # validate
+        for feature in cls.DERIVED_ATOM_FEATURES:
+            assert cls.get_scores_key(feature) in result.result_dict
+
+        return result
+
 class PsnrFeatureExtractor(FeatureExtractor):
 
     TYPE = "PSNR_feature"

diff --git a/python/test/feature_extractor_test.py b/python/test/feature_extractor_test.py
@@ -7,7 +7,7 @@
 
 from vmaf.config import VmafConfig
 from vmaf.core.feature_extractor import VmafFeatureExtractor, MomentFeatureExtractor, \
-    PsnrFeatureExtractor, SsimFeatureExtractor, MsSsimFeatureExtractor
+    PsnrFeatureExtractor, SsimFeatureExtractor, MsSsimFeatureExtractor, VifFrameDifferenceFeatureExtractor
 from vmaf.core.asset import Asset
 from vmaf.core.result_store import FileSystemResultStore
 from vmaf.tools.testutil import set_default_576_324_videos_for_testing, set_default_flat_1920_1080_videos_for_testing
@@ -105,6 +105,29 @@ def test_run_vmaf_fextractor(self):
         self.assertAlmostEqual(results[1]['VMAF_feature_vif2_score'], 1.0, places=4)
         self.assertAlmostEqual(results[1]['VMAF_feature_adm3_score'], 1.0, places=4)
 
+    def test_run_vif_frame_difference_fextractor(self):
+        print 'test on running VIF frame difference feature extractor...'
+        ref_path, dis_path, asset, asset_original = set_default_576_324_videos_for_testing()
+
+        self.fextractor = VifFrameDifferenceFeatureExtractor(
+            [asset, asset_original],
+            None, fifo_mode=True,
+            result_store=None
+        )
+        self.fextractor.run()
+
+        results = self.fextractor.results
+
+        self.assertAlmostEqual(results[0]['VifDiff_feature_vifdiff_score'], 0.26745858333333333, places=4)
+
+        self.assertAlmostEqual(results[0]['VifDiff_feature_vifdiff_num_score'], 305412.7661844375, places=0)
+        self.assertAlmostEqual(results[0]['VifDiff_feature_vifdiff_den_score'], 1113927.6002349583, places=0)
+
+        self.assertAlmostEqual(results[1]['VifDiff_feature_vifdiff_score'], 0.9791655833333334, places=4)
+
+        self.assertAlmostEqual(results[1]['VifDiff_feature_vifdiff_num_score'], 1113926.2941030415, places=0)
+        self.assertAlmostEqual(results[1]['VifDiff_feature_vifdiff_den_score'], 1113927.6002349583, places=0)
+
     def test_run_vmaf_fextractor_with_result_store(self):
         print 'test on running VMAF feature extractor with result store...'
         ref_path, dis_path, asset, asset_original = set_default_576_324_videos_for_testing()