Process Mask every X frames of video (#31)

* remove arch package from repo * Feature - Process Mask every X frames of video Added the feature to Process the Mask every X frames of video. This can greatly decrease the CPU usage of the plugin making it usable on lower-spec'd systems, and people may even prefer the visual effect of processing the mask less frequently as it can make the contours of the mask less "jittery". * refactoring Co-authored-by: iBenMoore <44276701+iBenMoore@users.noreply.github.com>
locaal-ai · Jul 15, 2021 · 8d64b35 · 8d64b35
1 parent 0f101de
commit 8d64b35
Show file tree

Hide file tree

Showing 2 changed files with 82 additions and 28 deletions.
diff --git a/scripts/obs-backgroundremoval-0.2beta-1-x86_64.pkg.tar.zst b/scripts/obs-backgroundremoval-0.2beta-1-x86_64.pkg.tar.zst
diff --git a/src/background-filter.cpp b/src/background-filter.cpp
@@ -57,13 +57,19 @@ struct background_removal_filter {
 	video_scaler_t* scalerToBGR;
 	video_scaler_t* scalerFromBGR;
 
+	cv::Mat backgroundMask;
+	int maskEveryXFrames = 1;
+	int maskEveryXFramesCount = 0;
+
+
 #if _WIN32
 	const wchar_t* modelFilepath = nullptr;
 #else
 	const char* modelFilepath = nullptr;
 #endif
 };
 
+
 static const char *filter_getname(void *unused)
 {
 	UNUSED_PARAMETER(unused);
@@ -138,8 +144,8 @@ static obs_properties_t *filter_properties(void *data)
 		"useGPU",
 		obs_module_text("Inference device"),
 		OBS_COMBO_TYPE_LIST,
-		OBS_COMBO_FORMAT_STRING
-	);
+		OBS_COMBO_FORMAT_STRING);
+
 	obs_property_list_add_string(p_use_gpu, obs_module_text("CPU"), USEGPU_CPU);
 #if _WIN32
 	obs_property_list_add_string(p_use_gpu, obs_module_text("GPU - DirectML"), USEGPU_DML);
@@ -151,25 +157,34 @@ static obs_properties_t *filter_properties(void *data)
 		"model_select",
 		obs_module_text("Segmentation model"),
 		OBS_COMBO_TYPE_LIST,
-		OBS_COMBO_FORMAT_STRING
-	);
+		OBS_COMBO_FORMAT_STRING);
+
 	obs_property_list_add_string(p_model_select, obs_module_text("SINet"), MODEL_SINET);
 	obs_property_list_add_string(p_model_select, obs_module_text("MODNet"), MODEL_MODNET);
 	obs_property_list_add_string(p_model_select, obs_module_text("MediaPipe"), MODEL_MEDIAPIPE);
 	obs_property_list_add_string(p_model_select, obs_module_text("Selfie Segmentation"), MODEL_SELFIE);
 
+	obs_property_t *p_mask_every_x_frames = obs_properties_add_int(
+		props,
+		"mask_every_x_frames",
+		obs_module_text("Calculate mask every X frame"),
+		0,
+		300,
+		1);
+
 	UNUSED_PARAMETER(data);
 	return props;
 }
 
 static void filter_defaults(obs_data_t *settings) {
-	obs_data_set_default_double(settings, "threshold", 0.5);
-	obs_data_set_default_double(settings, "contour_filter", 0.05);
-	obs_data_set_default_double(settings, "smooth_contour", 0.5);
-	obs_data_set_default_double(settings, "feather", 0.0);
-	obs_data_set_default_int(settings, "replaceColor", 0x000000);
-	obs_data_set_default_string(settings, "useGPU", USEGPU_CPU);
-	obs_data_set_default_string(settings, "model_select", MODEL_MEDIAPIPE);
+	obs_data_set_default_double(settings, "threshold", 					 0.5);
+	obs_data_set_default_double(settings, "contour_filter", 		 0.05);
+	obs_data_set_default_double(settings, "smooth_contour", 		 0.5);
+	obs_data_set_default_double(settings, "feather", 						 0.0);
+	obs_data_set_default_int(		settings, "replaceColor", 			 0x000000);
+	obs_data_set_default_string(settings, "useGPU", 						 USEGPU_CPU);
+	obs_data_set_default_string(settings, "model_select", 			 MODEL_MEDIAPIPE);
+	obs_data_set_default_int(		settings, "mask_every_x_frames", 1);
 }
 
 static void createOrtSession(struct background_removal_filter *tf) {
@@ -279,9 +294,12 @@ static void filter_update(void *data, obs_data_t *settings)
 	tf->backgroundColor.val[1] = (double)((color >> 8) & 0x0000ff);
 	tf->backgroundColor.val[2] = (double)(color & 0x0000ff);
 
-	tf->contourFilter = (float)obs_data_get_double(settings, "contour_filter");
-	tf->smoothContour = (float)obs_data_get_double(settings, "smooth_contour");
-	tf->feather       = (float)obs_data_get_double(settings, "feather");
+	tf->contourFilter         = (float)obs_data_get_double(settings, "contour_filter");
+	tf->smoothContour         = (float)obs_data_get_double(settings, "smooth_contour");
+	tf->feather               = (float)obs_data_get_double(settings, "feather");
+	tf->maskEveryXFrames      = (int)obs_data_get_int(settings, "mask_every_x_frames");
+	tf->maskEveryXFramesCount = (int)(0);
+
 
 	const std::string newUseGpu = obs_data_get_string(settings, "useGPU");
 	const std::string newModel = obs_data_get_string(settings, "model_select");
@@ -383,15 +401,12 @@ static void convertBGRToFrame(
 }
 
 
-static struct obs_source_frame * filter_render(void *data, struct obs_source_frame *frame)
+static void processImageForBackground(
+	struct background_removal_filter *tf,
+	const cv::Mat& imageBGR,
+	cv::Mat& backgroundMask)
 {
-	struct background_removal_filter *tf = reinterpret_cast<background_removal_filter *>(data);
-
-	// Convert to BGR
-	cv::Mat imageBGR = convertFrameToBGR(frame, tf);
-
 	try {
-
 		// To RGB
 		cv::Mat imageRGB;
 		cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
@@ -442,7 +457,7 @@ static struct obs_source_frame * filter_render(void *data, struct obs_source_fra
 			tf->outputNames.data(), &(tf->outputTensor), 1);
 
 		uint32_t outputWidth, outputHeight;
-		int64_t outputChannels;
+		int32_t outputChannels;
 		if (tf->modelSelection == MODEL_SINET || tf->modelSelection == MODEL_MODNET) {
 			// BCHW
 			outputWidth = (int)tf->outputDims.at(3);
@@ -476,7 +491,6 @@ static struct obs_source_frame * filter_render(void *data, struct obs_source_fra
 			cv::normalize(outputImage, outputImage, 1.0, 0.0, cv::NORM_MINMAX);
 		}
 
-		cv::Mat backgroundMask;
 		if (tf->modelSelection == MODEL_SINET || tf->modelSelection == MODEL_MEDIAPIPE) {
 			backgroundMask = outputImage > tf->threshold;
 		} else {
@@ -498,38 +512,78 @@ static struct obs_source_frame * filter_render(void *data, struct obs_source_fra
 			drawContours(backgroundMask, filteredContours, -1, cv::Scalar(255), -1);
 		}
 
-		// Mask the input
+		// Resize the size of the mask back to the size of the original input.
 		cv::resize(backgroundMask, backgroundMask, imageBGR.size());
 
-		// Smooth mask with a fast filter (box)
+		// Smooth mask with a fast filter (box).
 		if (tf->smoothContour > 0.0) {
 			int k_size = (int)(100 * tf->smoothContour);
 			cv::boxFilter(backgroundMask, backgroundMask, backgroundMask.depth(), cv::Size(k_size, k_size));
 			backgroundMask = backgroundMask > 128;
 		}
+	}
+	catch(const std::exception& e) {
+		blog(LOG_ERROR, "%s", e.what());
+	}
+}
 
+
+static struct obs_source_frame * filter_render(void *data, struct obs_source_frame *frame)
+{
+	struct background_removal_filter *tf = reinterpret_cast<background_removal_filter *>(data);
+
+	// Convert to BGR
+	cv::Mat imageBGR = convertFrameToBGR(frame, tf);
+
+	cv::Mat backgroundMask;
+	tf->maskEveryXFramesCount = ++(tf->maskEveryXFramesCount) % tf->maskEveryXFrames;
+	if (tf->maskEveryXFramesCount != 0) {
+		// We are skipping processing of the mask for this frame.
+		// Get the background mask previously generated.
+		tf->backgroundMask.copyTo(backgroundMask);
+	} else {
+		// Process the image to find the mask.
+		processImageForBackground(tf, imageBGR, backgroundMask);
+
+		// Now that the mask is completed, save it off so it can be used on a later frame
+		// if we've chosen to only process the mask every X frames.
+		backgroundMask.copyTo(tf->backgroundMask);
+	}
+
+	// Apply the mask back to the main image.
+	try {
 		if (tf->feather > 0.0) {
-			// Feather mask
-			int k_size = (int)(40 * tf->feather);
+			// If we're going to feather/alpha blend, we need to do some processing that
+			// will combine the blended "foreground" and "masked background" images onto the main image.
 			cv::Mat maskFloat;
+			int k_size = (int)(40 * tf->feather);
+
+			// Convert Mat to float and Normalize the alpha mask to keep intensity between 0 and 1.
 			backgroundMask.convertTo(maskFloat, CV_32FC1, 1.0 / 255.0);
+			//Feather the normalized mask.
 			cv::boxFilter(maskFloat, maskFloat, maskFloat.depth(), cv::Size(k_size, k_size));
 
 			// Alpha blend
 			cv::Mat maskFloat3c;
 			cv::cvtColor(maskFloat, maskFloat3c, cv::COLOR_GRAY2BGR);
 			cv::Mat tmpImage, tmpBackground;
+			// Mutiply the unmasked foreground area of the image with ( 1 - alpha matte).
 			cv::multiply(imageBGR, cv::Scalar(1, 1, 1) - maskFloat3c, tmpImage, 1.0, CV_32FC3);
+			// Multiply the masked background area (with the background color applied) with the alpha matte.
 			cv::multiply(cv::Mat(imageBGR.size(), CV_32FC3, tf->backgroundColor), maskFloat3c, tmpBackground);
+			// Add the foreground and background images together, rescale back to an 8bit integer image
+			// and apply onto the main image.
 			cv::Mat(tmpImage + tmpBackground).convertTo(imageBGR, CV_8UC3);
 		} else {
+			// If we're not feathering/alpha blending, we can
+			// apply the mask as-is back onto the main image.
 			imageBGR.setTo(tf->backgroundColor, backgroundMask);
 		}
 	}
 	catch(const std::exception& e) {
 		blog(LOG_ERROR, "%s", e.what());
 	}
-	// Put masked image back on frame
+	// Put masked image back on frame,
 	convertBGRToFrame(imageBGR, frame, tf);
 	return frame;
 }