Merge branch 'dev'

osai-ai · Apr 28, 2020 · 0827723 · 0827723
2 parents 0072af2 + e385e3d
commit 0827723
Show file tree

Hide file tree

Showing 13 changed files with 180 additions and 51 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
+FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
 
 RUN apt-get update &&\
     apt-get -y install build-essential yasm nasm cmake unzip git wget \

diff --git a/README.md b/README.md
@@ -16,13 +16,13 @@ reader.initialize()
 reader.start()
 
 while need_predictions:
-    # read the latest available frame from the stream 
+    # read the latest available frame from the stream
     tensor = reader.read(pixel_format=FourCC.BGR24,
                          width=256,                 # resize to 256x256 px
                          height=256,
                          normalization=True,        # normalize to range [0, 1]
                          planes_pos=Planes.PLANAR)  # dimension order [C, H, W]
-                         
+
     # tensor dtype is torch.float32, device is 'cuda:0', shape is (3, 256, 256)
     prediction = model(tensor.unsqueeze(0))
 ```
@@ -102,15 +102,15 @@ Extension for Python can be installed via pip:
 - **CUDA 10:**
 TensorStream compiled with different versions of Pytorch:
 ```
-pip install https://tensorstream.argus-ai.com/wheel/cu10/torch1.3.1/linux/tensor_stream-0.3.0-cp36-cp36m-linux_x86_64.whl
+pip install https://tensorstream.argus-ai.com/wheel/cu10/torch1.4.0/linux/tensor_stream-0.4.0-cp36-cp36m-linux_x86_64.whl
 ```
 ```
-pip install https://tensorstream.argus-ai.com/wheel/cu10/torch1.4.0/linux/tensor_stream-0.3.0-cp36-cp36m-linux_x86_64.whl
+pip install https://tensorstream.argus-ai.com/wheel/cu10/torch1.5.0/linux/tensor_stream-0.4.0-cp36-cp36m-linux_x86_64.whl
 ```
 
 #### Building examples and tests
 Examples for Python and C++ can be found in [c_examples](c_examples) and [python_examples](python_examples) folders.  Tests for C++ can be found in [tests](tests) folder.
-#### Python example 
+#### Python example
 Can be executed via Python after TensorStream [C++ extension for Python](#c-extension-for-python) installation.
 ```
 cd python_examples
@@ -136,7 +136,7 @@ cmake -DCMAKE_PREFIX_PATH=%cd%\..\..\cmake -G "Visual Studio 15 2017 Win64" -T v
 ## Docker image
 To build TensorStream need to pass Pytorch version via TORCH_VERSION argument:
 ```
-docker build --build-arg TORCH_VERSION=1.4.0 -t tensorstream .
+docker build --build-arg TORCH_VERSION=1.5.0 -t tensorstream .
 ```
 Run with a bash command line and follow the [installation guide](#install-tensorstream)
 ```
@@ -155,7 +155,7 @@ docker run --gpus=all -ti tensorstream bash
 1. Simple [example](python_examples/simple.py) demonstrates RTMP to PyTorch tensor conversion. Let's consider some usage scenarios:
 > **Note:** You can pass **--help** to get the list of all available options, their description and default values
 
-* Convert an RTMP bitstream to RGB24 PyTorch tensors and dump the result to a dump.yuv file: 
+* Convert an RTMP bitstream to RGB24 PyTorch tensors and dump the result to a dump.yuv file:
 ```
 python simple.py -i rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4 -fc RGB24 -o dump
 ```
@@ -223,7 +223,6 @@ python different_streams.py -i1 <path-to-first-stream> -i2 <path-to-second-strea
 
 Real-time video style transfer example: [fast-neural-style](python_examples/fast_neural_style).
 
-
 ## Documentation
 
 Documentation for Python and C++ API can be found on the [site](https://tensorstream.argus-ai.com/).

diff --git a/include/Common.h b/include/Common.h
@@ -45,7 +45,8 @@ enum CloseLevel {
  @details Used in @ref TensorStream::initPipeline() function
 */
 enum FrameRateMode {
-	NATIVE, /**< Read at native stream/camera frame rate */
+	NATIVE, /**< Read at native stream frame rate */
+	NATIVE_SIMPLE, /**< Read at fixed stream frame rate */
 	FAST,   /**< Read frames as fast as possible */
 	BLOCKING /**< Read frame by frame without skipping (only local files) */
 };

diff --git a/include/Wrappers/WrapperC.h b/include/Wrappers/WrapperC.h
@@ -80,6 +80,8 @@ class TensorStream {
 	std::shared_ptr<VideoProcessor> vpp;
 	AVPacket* parsed;
 	int realTimeDelay = 0;
+	double indexToDTSCoeff = 0;
+	double DTSToMsCoeff = 0;
 	std::pair<int, int> frameRate;
 	FrameRateMode frameRateMode;
 	bool shouldWork;

diff --git a/include/Wrappers/WrapperPython.h b/include/Wrappers/WrapperPython.h
@@ -47,6 +47,8 @@ class TensorStream {
 	std::shared_ptr<VideoProcessor> vpp;
 	AVPacket* parsed;
 	int realTimeDelay = 0;
+	double indexToDTSCoeff = 0;
+	double DTSToMsCoeff = 0;
 	std::pair<int, int> frameRate;
 	FrameRateMode frameRateMode;
 	bool shouldWork;

diff --git a/setup.py b/setup.py
@@ -72,10 +72,17 @@ def find_version(*file_paths):
     if version.parse(torch.__version__) <= version.parse("1.1.0"):
         library += ["caffe2"]
         library += ["caffe2_gpu"]
+    library += ["_C"]
+    library += ["c10"]
+    library += ["c10_cuda"]
+
+if version.parse(torch.__version__) >= version.parse("1.5.0"):
+    library += ["torch_cpu"]
+    library += ["torch_cuda"]
+
+if version.parse(torch.__version__) >= version.parse("1.5.0") or platform.system() == 'Windows':
     library += ["torch"]
     library += ["torch_python"]
-    library += ["c10"]
-    library += ["_C"]
 
 if platform.system() == 'Windows':
     library += ["nvToolsExt64_1"]
@@ -110,7 +117,7 @@ def find_version(*file_paths):
             language='c++')
     ],
     cmdclass={
-        'build_ext': BuildExtension
+        'build_ext': BuildExtension.with_options(use_ninja=False)
     },
     packages=find_packages(),
     zip_safe=True,

diff --git a/src/Wrappers/WrapperC.cpp b/src/Wrappers/WrapperC.cpp
@@ -72,6 +72,15 @@ int TensorStream::initPipeline(std::string inputFile, uint8_t maxConsumers, uint
 	realTimeDelay = ((float)frameRate.first /
 		(float)frameRate.second) * 1000;
 	LOG_VALUE(std::string("Frame rate: ") + std::to_string((int) (frameRate.second / frameRate.first)), LogsLevel::LOW);
+
+	//1) frameindex * framerate.den / framerate.num = frame time in seconds
+	//2) 1) * framerate.den / framerate.num = frame time in time base units
+	indexToDTSCoeff = (double)(videoStream->r_frame_rate.den * videoStream->time_base.den) / (int64_t(videoStream->r_frame_rate.num) * videoStream->time_base.num);
+
+	//need convert DTS to ms
+	//first of all converting DTS to seconds (DTS is measured in timebase.num / timebase.den seconds, so 1 dts = timebase.num / timebase.den seconds)
+	//after converting from seconds to ms by dividing by 1000
+	DTSToMsCoeff = (double)videoStream->time_base.num / (double)videoStream->time_base.den * (double)1000;
 	END_LOG_FUNCTION(std::string("Initializing() "));
 	return sts;
 }
@@ -118,6 +127,8 @@ int checkGetComplete(std::map<std::string, bool>& blockingStatuses) {
 int TensorStream::processingLoop() {
 	std::unique_lock<std::mutex> locker(closeSync);
 	int sts = VREADER_OK;
+	std::pair<int64_t, bool> startDTS = { 0, false };
+	std::pair<std::chrono::high_resolution_clock::time_point, bool> startTime = { std::chrono::high_resolution_clock::now(), false };
 	SET_CUDA_DEVICE();
 	while (shouldWork) {
 		PUSH_RANGE("TensorStream::processingLoop", NVTXColors::GREEN);
@@ -133,6 +144,10 @@ int TensorStream::processingLoop() {
 		sts = parser->Get(parsed);
 		CHECK_STATUS(sts);
 		END_LOG_BLOCK(std::string("parser->Get"));
+		int64_t frameDTS = parsed->dts;
+		if (frameDTS == AV_NOPTS_VALUE && frameRateMode == FrameRateMode::NATIVE) {
+			frameDTS = int64_t(decoder->getFrameIndex()) * indexToDTSCoeff;
+		}
 		if (!skipAnalyze) {
 			START_LOG_BLOCK(std::string("parser->Analyze"));
 			//Parse package to find some syntax issues, don't handle errors returned from this function
@@ -147,18 +162,39 @@ int TensorStream::processingLoop() {
 			continue;
 		CHECK_STATUS(sts);
 
-		if (frameRateMode == FrameRateMode::NATIVE) {
-			START_LOG_BLOCK(std::string("sleep"));
-			PUSH_RANGE("TensorStream::Sleep", NVTXColors::PURPLE);
-			//wait here
-			int sleepTime = realTimeDelay - std::chrono::duration_cast<std::chrono::milliseconds>(
+		START_LOG_BLOCK(std::string("sleep"));
+		PUSH_RANGE("TensorStream::Sleep", NVTXColors::PURPLE);
+		int sleepTime = 0;
+		if (frameRateMode == FrameRateMode::NATIVE_SIMPLE) {
+			sleepTime = realTimeDelay - std::chrono::duration_cast<std::chrono::milliseconds>(
 				std::chrono::high_resolution_clock::now() - waitTime).count();
-			if (sleepTime > 0) {
-				std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime));
+		}
+		else if (frameRateMode == FrameRateMode::NATIVE) {
+			if (!startDTS.second) {
+				startDTS.first = frameDTS;
+				startDTS.second = true;
+			}
+
+			frameDTS -= startDTS.first;
+
+			frameDTS = frameDTS * DTSToMsCoeff;
+			if (!startTime.second) {
+				startTime.first = std::chrono::high_resolution_clock::now();
+				startTime.second = true;
+			}
+
+			int64_t now = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - startTime.first).count();
+			LOG_VALUE("Dts: " + std::to_string(frameDTS) + " now: " + std::to_string(now), LogsLevel::HIGH);
+			if (frameDTS > now) {
+				sleepTime = frameDTS - now;
 			}
-			LOG_VALUE(std::string("Should sleep for: ") + std::to_string(sleepTime), LogsLevel::HIGH);
-			END_LOG_BLOCK(std::string("sleep"));
 		}
+		LOG_VALUE(std::string("Should sleep for: ") + std::to_string(sleepTime), LogsLevel::HIGH);
+		if (sleepTime > 0) {
+			std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime));
+		}
+		END_LOG_BLOCK(std::string("sleep"));
+
 		if (frameRateMode == FrameRateMode::BLOCKING) {
 			std::unique_lock<std::mutex> locker(blockingSync);
 			START_LOG_BLOCK(std::string("blocking wait"));

diff --git a/src/Wrappers/WrapperPython.cpp b/src/Wrappers/WrapperPython.cpp
@@ -71,6 +71,16 @@ int TensorStream::initPipeline(std::string inputFile, uint8_t maxConsumers, uint
 	realTimeDelay = ((float)frameRate.first /
 		(float)frameRate.second) * 1000;
 	LOG_VALUE(std::string("Frame rate: ") + std::to_string((int)(frameRate.second / frameRate.first)), LogsLevel::LOW);
+
+	//1) frameindex * framerate.den / framerate.num = frame time in seconds
+	//2) 1) * framerate.den / framerate.num = frame time in time base units
+	indexToDTSCoeff = (double)(videoStream->r_frame_rate.den * videoStream->time_base.den) / (int64_t(videoStream->r_frame_rate.num) * videoStream->time_base.num);
+
+	//need convert DTS to ms
+	//first of all converting DTS to seconds (DTS is measured in timebase.num / timebase.den seconds, so 1 dts = timebase.num / timebase.den seconds)
+	//after converting from seconds to ms by dividing by 1000
+	DTSToMsCoeff = (double)videoStream->time_base.num / (double)videoStream->time_base.den * (double)1000;
+
 	END_LOG_FUNCTION(std::string("Initializing() "));
 	return sts;
 }
@@ -116,6 +126,8 @@ int checkGetComplete(std::map<std::string, bool>& blockingStatuses) {
 int TensorStream::processingLoop() {
 	std::unique_lock<std::mutex> locker(closeSync);
 	int sts = VREADER_OK;
+	std::pair<int64_t, bool> startDTS = { 0, false };
+	std::pair<std::chrono::high_resolution_clock::time_point, bool> startTime = { std::chrono::high_resolution_clock::now(), false };
 	SET_CUDA_DEVICE();
 	while (shouldWork) {
 		PUSH_RANGE("TensorStream::processingLoop", NVTXColors::GREEN);
@@ -131,6 +143,10 @@ int TensorStream::processingLoop() {
 		sts = parser->Get(parsed);
 		CHECK_STATUS(sts);
 		END_LOG_BLOCK(std::string("parser->Get"));
+		int64_t frameDTS = parsed->dts;
+		if (frameDTS == AV_NOPTS_VALUE && frameRateMode == FrameRateMode::NATIVE) {
+			frameDTS = int64_t(decoder->getFrameIndex()) * indexToDTSCoeff;
+		}
 		if (!skipAnalyze) {
 			START_LOG_BLOCK(std::string("parser->Analyze"));
 			//Parse package to find some syntax issues, don't handle errors returned from this function
@@ -162,18 +178,37 @@ int TensorStream::processingLoop() {
 				}
 			), tensors.end());
 		END_LOG_BLOCK(std::string("check tensor to free"));
-		if (frameRateMode == FrameRateMode::NATIVE) {
-			START_LOG_BLOCK(std::string("sleep"));
-			PUSH_RANGE("TensorStream::Sleep", NVTXColors::PURPLE);
-			//wait here
-			int sleepTime = realTimeDelay - std::chrono::duration_cast<std::chrono::milliseconds>(
+		START_LOG_BLOCK(std::string("sleep"));
+		PUSH_RANGE("TensorStream::Sleep", NVTXColors::PURPLE);
+		int sleepTime = 0;
+		if (frameRateMode == FrameRateMode::NATIVE_SIMPLE) {
+			sleepTime = realTimeDelay - std::chrono::duration_cast<std::chrono::milliseconds>(
 				std::chrono::high_resolution_clock::now() - waitTime).count();
-			if (sleepTime > 0) {
-				std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime));
+		}
+		else if (frameRateMode == FrameRateMode::NATIVE) {
+			if (!startDTS.second) {
+				startDTS.first = frameDTS;
+				startDTS.second = true;
 			}
-			LOG_VALUE(std::string("Should sleep for: ") + std::to_string(sleepTime), LogsLevel::HIGH);
-			END_LOG_BLOCK(std::string("sleep"));
+
+			frameDTS -= startDTS.first;
+			frameDTS = frameDTS * DTSToMsCoeff;
+			if (!startTime.second) {
+				startTime.first = std::chrono::high_resolution_clock::now();
+				startTime.second = true;
+			}
+
+			int64_t now = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - startTime.first).count();
+			LOG_VALUE("Dts: " + std::to_string(frameDTS) + " now: " + std::to_string(now), LogsLevel::HIGH);
+			if (frameDTS > now) {
+				sleepTime = frameDTS - now;
+			}
+		}
+		LOG_VALUE(std::string("Should sleep for: ") + std::to_string(sleepTime), LogsLevel::HIGH);
+		if (sleepTime > 0) {
+			std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime));
 		}
+		END_LOG_BLOCK(std::string("sleep"));
 		if (frameRateMode == FrameRateMode::BLOCKING) {
 			std::unique_lock<std::mutex> locker(blockingSync);
 			START_LOG_BLOCK(std::string("blocking wait"));
@@ -216,7 +251,7 @@ int TensorStream::startProcessing(int cudaDevice) {
 	LOG_VALUE(std::string("Processing was interrupted or stream has ended"), LogsLevel::LOW);
 	//we should unlock mutex to allow get() function end execution
 	if (decoder)
-	decoder->notifyConsumers();
+		decoder->notifyConsumers();
 	LOG_VALUE(std::string("All consumers were notified about processing end"), LogsLevel::LOW);
 	CHECK_STATUS(sts);
 	return sts;
@@ -391,7 +426,6 @@ int TensorStream::dumpFrame(at::Tensor stream, std::string consumerName, FramePa
 			//in this case size of Tensor is (1, height * channels, width)
 			frameParameters.resize.width = stream.size(2);
 		}
-		std::cout << frameParameters.resize.width << std::endl;
 	}
 
 	if (!frameParameters.resize.height) {
@@ -403,7 +437,6 @@ int TensorStream::dumpFrame(at::Tensor stream, std::string consumerName, FramePa
 			//in this case size of Tensor is (1, height * channels, width)
 			frameParameters.resize.height = stream.size(1) / channelsByFourCC(frameParameters.color.dstFourCC);
 		}
-		std::cout << frameParameters.resize.height << std::endl;
 	}
 
 	//Kind of magic, need to concatenate string from Python with std::string to avoid issues in frame dumping (some strange artifacts appeared if create file using consumerName)
@@ -465,6 +498,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 
 	py::enum_<FrameRateMode>(m, "FrameRateMode")
 		.value("NATIVE", FrameRateMode::NATIVE)
+		.value("NATIVE_SIMPLE", FrameRateMode::NATIVE_SIMPLE)
 		.value("FAST", FrameRateMode::FAST)
 		.value("BLOCKING", FrameRateMode::BLOCKING)
 		.export_values();

diff --git a/tensor_stream/__init__.py b/tensor_stream/__init__.py
@@ -10,4 +10,4 @@
     FrameParameters
 )
 
-__version__ = '0.3.0'
+__version__ = '0.4.0'
diff --git a/tensor_stream/tensor_stream.py b/tensor_stream/tensor_stream.py
@@ -11,7 +11,7 @@
 # @{
 
 ## Class with list of possible error statuses can be returned from TensorStream extension
-# @warning These statuses are used only in Python wrapper that communicates with TensorStream C++ extension 
+# @warning These statuses are used only in Python wrapper that communicates with TensorStream C++ extension
 class StatusLevel(Enum):
     ## No errors
     OK = 0
@@ -80,17 +80,19 @@ class Planes(Enum):
     ## Color components R, G, B are stored in memory separately like RRRRR, GGGGG, BBBBB
     PLANAR = 0
     ## Color components R, G, B are stored in memory one by one like RGBRGBRGB
-    MERGED = 1 
+    MERGED = 1
 
 
 ## Enum with possible stream reading modes
 class FrameRate(Enum):
-    ## Read at native stream/camera frame rate
+    ## Read at native stream frame rate
     NATIVE = 0
+    ## Read at fixed stream frame rate
+    NATIVE_SIMPLE = 1
     ## Read frames as fast as possible
-    FAST = 1
+    FAST = 2
     ## Read frame by frame without skipping (only local files)
-    BLOCKING = 2
+    BLOCKING = 3
 
 
 ## Class that stores frame parameters
@@ -166,9 +168,9 @@ def __init__(self,
         self.tensor_stream = TensorStream.TensorStream()
         self.thread = None
         ## Amount of frames per second obtained from input bitstream, set by @ref initialize() function
-        self.fps = None 
+        self.fps = None
         ## Size (width and height) of frames in input bitstream, set by @ref initialize() function
-        self.frame_size = None 
+        self.frame_size = None
 
         self.max_consumers = max_consumers
         self.cuda_device = cuda_device
@@ -239,7 +241,7 @@ def skip_analyze(self):
     # @param[in] normalization Should final colors be normalized or not
     # @param[in] delay Specify which frame should be read from decoded buffer. Can take values in range [-buffer_size, 0]
     # @param[in] return_index Specify whether need return index of decoded frame or not
-    
+
     # @return Decoded frame in CUDA memory wrapped to Pytorch tensor and index of decoded frame if @ref return_index option set
     def read(self,
              name="default",