Merge branch 'dev'

osai-ai · Jan 24, 2020 · 6f28dc3 · 6f28dc3
2 parents d975b2a + 433e3d0
commit 6f28dc3
Show file tree

Hide file tree

Showing 26 changed files with 644 additions and 250 deletions.
diff --git a/docker/Dockerfile_cu10 → Dockerfile b/docker/Dockerfile_cu10 → Dockerfile
@@ -40,10 +40,9 @@ RUN pip3 install --no-cache-dir \
     numpy==1.16.4 \
     packaging
 
+ARG TORCH_VERSION
 # Install PyTorch
-RUN pip3 install --no-cache-dir \
-    torch==1.2.0 \
-    torchvision==0.4.0
+RUN pip3 install --no-cache-dir torch==$TORCH_VERSION
 
 RUN git clone https://github.com/doxygen/doxygen.git &&\
     cd doxygen &&\

diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
 NAME?=argus-tensor-stream
-CUDA?=cu10
-DOCKER_NAME="$(NAME)-$(CUDA)"
+TORCH_VERSION?=1.4.0
+DOCKER_NAME="$(NAME)-$(TORCH_VERSION)"
 
 GPUS?=all
 ifeq ($(GPUS),none)
@@ -9,12 +9,14 @@ else
 	GPUS_OPTION=--gpus=$(GPUS)
 endif
 
-.PHONY: all build-docker stop build-whl
+.PHONY: all build stop build-whl
 
-all: stop build-docker build-whl
+all: stop build build-whl
 
-build-docker:
-	docker build -t $(DOCKER_NAME) -f docker/Dockerfile_$(CUDA) .
+build:
+	docker build \
+	--build-arg TORCH_VERSION=${TORCH_VERSION} \
+	-t $(DOCKER_NAME) .
 
 stop:
 	-docker stop $(DOCKER_NAME)
@@ -28,7 +30,7 @@ build-whl:
 		$(DOCKER_NAME) \
 		python3 setup.py sdist bdist_wheel
 
-run-bash:
+run-dev:
 	docker run --rm -it \
 		$(GPUS_OPTION) \
 		--net=host \

diff --git a/README.md b/README.md
@@ -2,8 +2,9 @@
 TensorStream is a C++ library for real-time video streams (e.g., RTMP) decoding to CUDA memory which supports some additional features:
 * CUDA memory conversion to ATen Tensor for using it via Python in [PyTorch Deep Learning models](#pytorch-example)
 * Detecting basic video stream issues related to frames reordering/loss
-* Video Post Processing (VPP) operations: downscaling/upscaling, color conversion from NV12 to RGB24/BGR24/Y800  
-The library supports Linux and Windows.
+* Video Post Processing (VPP) operations: downscaling/upscaling, crops, color conversions, etc.
+
+The library supports both Linux and Windows.
 
 Simple example how to use TensorStream for deep learning tasks:
 
@@ -97,12 +98,14 @@ cmake -G "Visual Studio 15 2017 Win64" -T v141,version=14.11 ..
 Extension for Python can be installed via pip:
 
 - **CUDA 9:**
+> **Warning:** CUDA 9 isn't supported by TensorStream anymore so new releases won't be built and distributed in binary format.
+- **CUDA 10:**
+TensorStream compiled with different versions of Pytorch:
 ```
-pip install https://tensorstream.argus-ai.com/wheel/cu9/linux/tensor_stream-0.2.1-cp36-cp36m-linux_x86_64.whl
+pip install https://tensorstream.argus-ai.com/wheel/cu10/torch1.3.1/linux/tensor_stream-0.3.0-cp36-cp36m-linux_x86_64.whl
 ```
-- **CUDA 10:**
 ```
-pip install https://tensorstream.argus-ai.com/wheel/cu10/linux/tensor_stream-0.2.1-cp36-cp36m-linux_x86_64.whl
+pip install https://tensorstream.argus-ai.com/wheel/cu10/torch1.4.0/linux/tensor_stream-0.3.0-cp36-cp36m-linux_x86_64.whl
 ```
 
 #### Building examples and tests
@@ -131,9 +134,9 @@ cmake -DCMAKE_PREFIX_PATH=%cd%\..\..\cmake -G "Visual Studio 15 2017 Win64" -T v
 ```
 
 ## Docker image
-Dockerfiles can be found in [docker](docker) folder. Please note that different Dockerfiles are required for different CUDA versions. To distinguish them name suffix is used, i.e., for **CUDA 9** Dockerfile name is Dockerfile_**cu9**, for **CUDA 10** Dockerfile_**cu10** and so on. 
+To build TensorStream need to pass Pytorch version via TORCH_VERSION argument:
 ```
-docker build -t tensorstream -f docker/Dockerfile_cu10 .
+docker build --build-arg TORCH_VERSION=1.4.0 -t tensorstream .
 ```
 Run with a bash command line and follow the [installation guide](#install-tensorstream)
 ```
@@ -169,6 +172,11 @@ python simple.py -i rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4 -fc RGB24 -
 ```
 python simple.py -i rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4 -fc RGB24 -w 720 -h 480 -o dump -n 100
 ```
+* The result file can be cropped via --crop option which takes coordinates of left top and right bottom corners as parameters:
+```
+python simple.py -i rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4 -fc RGB24 -w 720 -h 480 --crop 0,0,320,240 -o dump -n 100
+```
+>**Warning:** Crop is applied before resize algorithm.
 * Output pixels format can be either torch.float32 or torch.uint8 depending on normalization option which can be True, False or not set so TensorStream will decide which value should be used:
 ```
 python simple.py -i rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4 -fc RGB24 -w 720 -h 480 -o dump -n 100 --normalize True
@@ -194,6 +202,10 @@ python simple.py -i rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4 -fc RGB24 -
 ```
 python simple.py -i rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4 -fc RGB24 -w 720 -h 480 -o dump -n 100 --planes MERGED --skip_analyze
 ```
+* Timeout for input frame reading can be set via --timeout option (time in seconds):
+```
+python simple.py -i rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4 -fc RGB24 -w 720 -h 480 -o dump -n 100 --planes MERGED --timeout 2
+```
 * Logs types and levels can be configured with -v, -vd and --nvtx options. Check help to find available values and description:
 ```
 python simple.py -i rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4 -fc RGB24 -w 720 -h 480 -o dump -n 100 --planes MERGED -v HIGH -vd CONSOLE --nvtx

diff --git a/c_examples/src/Sample.cpp b/c_examples/src/Sample.cpp
@@ -45,13 +45,13 @@ void get_cycle(FrameParameters frameParameters, std::map<std::string, std::strin
 
 int main()
 {
-	//reader.enableLogs(-HIGH);
+	reader.enableLogs(-MEDIUM);
 	reader.enableNVTX();
 	int sts = VREADER_OK;
 	int initNumber = 10;
 
 	while (initNumber--) {
-		sts = reader.initPipeline("rtmp://37.228.119.44:1935/vod/big_buck_bunny.mp4", 5, 0, 5);
+		sts = reader.initPipeline("rtmp://streaming.sportlevel.com/relay/Efie7shoo3aeriexnhl", 5, 0, 5);
 		if (sts != VREADER_OK)
 			reader.endProcessing();
 		else
@@ -61,16 +61,19 @@ int main()
 	reader.skipAnalyzeStage();
 	CHECK_STATUS(sts);
 	std::thread pipeline([] { reader.startProcessing(); });
-	int dstWidth = 1920;
-	int dstHeight = 1080;
+	int dstWidth = 720;
+	int dstHeight = 480;
+	std::tuple<int, int> cropTopLeft = { 0, 0 };
+	std::tuple<int, int> cropBotRight = { 0, 0 };
 	ColorOptions colorOptions = { FourCC::NV12 };
 	colorOptions.planesPos = Planes::PLANAR;
 	colorOptions.normalization = false;
 	ResizeOptions resizeOptions = { dstWidth, dstHeight };
-	resizeOptions.type = ResizeType::BICUBIC;
-	FrameParameters frameParameters = {resizeOptions, colorOptions};
+	CropOptions cropOptions = { cropTopLeft, cropBotRight };
+	FrameParameters frameParameters = {resizeOptions, colorOptions, cropOptions};
 
-	std::map<std::string, std::string> executionParameters = { {"name", "first"}, {"delay", "0"}, {"frames", "20"}, {"dumpName", std::to_string(dstWidth) + "x" + std::to_string(dstHeight) + ".yuv"} };
+	std::map<std::string, std::string> executionParameters = { {"name", "first"}, {"delay", "0"}, {"frames", "50"}, 
+															   {"dumpName", std::to_string(std::get<0>(cropBotRight) - std::get<0>(cropTopLeft)) + "x" + std::to_string(std::get<1>(cropBotRight) - std::get<1>(cropTopLeft)) + ".yuv"} };
 	std::thread get(get_cycle, frameParameters, executionParameters);
 	get.join();
 	reader.endProcessing();

diff --git a/docker/Dockerfile_cu9 b/docker/Dockerfile_cu9
diff --git a/include/Common.h b/include/Common.h
@@ -218,6 +218,7 @@ class NVTXTracer {
 
 const int defaultCUDADevice = 0;
 const int frameRateConstraints = 240;
+extern int timeoutFrame;
 
 template <class T>
 T findFree(std::string consumerName, std::vector<std::pair<std::string, T> >& entities) {

diff --git a/include/Parser.h b/include/Parser.h
@@ -157,4 +157,6 @@ class Parser {
 	Instance of Logger class
 	*/
 	std::shared_ptr<Logger> logger;
+
+	std::chrono::time_point<std::chrono::system_clock> latestFrameTimestamp;
 };
diff --git a/include/VideoProcessor.h b/include/VideoProcessor.h
@@ -64,31 +64,44 @@ enum ResizeType {
 /** Parameters specific for resize
 */
 struct ResizeOptions {
-	ResizeOptions(int width = 0, int height = 0, ResizeType resize = ResizeType::NEAREST) {
+	//if destination size == 0 so no resize will be applied
+	ResizeOptions(int width = 0, int height = 0) {
 		this->width = (unsigned int)width;
 		this->height = (unsigned int)height;
-		this->type = resize;
+		this->type = ResizeType::NEAREST;
 	}
 
 	unsigned int width; /**< Width of destination image */
 	unsigned int height; /**< Height of destination image */
 	ResizeType type; /**< Resize algorithm. See @ref ::ResizeType for more information */
 };
 
+/** Parameters specific for crop
+*/
+struct CropOptions {
+	//If size of crop == 0 so no crop will be applied
+	CropOptions(std::tuple<int, int> leftTopCorner = { 0, 0 }, std::tuple<int, int> rightBottomCorner = { 0, 0 }) {
+		this->leftTopCorner = leftTopCorner;
+		this->rightBottomCorner = rightBottomCorner;
+	}
+
+	std::tuple<int, int> leftTopCorner; /**< Coordinates of top-left corner of crop box */
+	std::tuple<int, int> rightBottomCorner; /**< Coordinates of right-bottom corner of crop box */
+};
+
 /** Parameters used to configure VPP
  @details These parameters can be passed via @ref TensorStream::getFrame() function
 */
 struct FrameParameters {
-	FrameParameters() {
-
-	}
-
-	FrameParameters(ResizeOptions resize, ColorOptions color) {
+	FrameParameters(ResizeOptions resize = ResizeOptions(), ColorOptions color = ColorOptions(), CropOptions crop = CropOptions()) {
 		this->resize = resize;
 		this->color = color;
+		this->crop = crop;
 	}
+
 	ResizeOptions resize; /**< Resize options, see @ref ::ResizeOptions for more information */
 	ColorOptions color; /**< Color conversion options, see @ref ::ColorParameters for more information*/
+	CropOptions crop; /**< Crop options, see @ref ::CropOptions for more information */
 };
 
 /**
@@ -97,7 +110,9 @@ struct FrameParameters {
 template <class T>
 int colorConversionKernel(AVFrame* src, AVFrame* dst, ColorOptions color, int maxThreadsPerBlock, cudaStream_t* stream);
 
-int resizeKernel(AVFrame* src, AVFrame* dst, ResizeType resize, int maxThreadsPerBlock, cudaStream_t * stream);
+int resizeKernel(AVFrame* src, AVFrame* dst, bool crop, ResizeOptions resize, int maxThreadsPerBlock, cudaStream_t * stream);
+
+int cropHost(AVFrame* src, AVFrame* dst, CropOptions crop, int maxThreadsPerBlock, cudaStream_t * stream);
 
 float channelsByFourCC(FourCC fourCC);
 float channelsByFourCC(std::string fourCC);
@@ -110,7 +125,7 @@ class VideoProcessor {
 	Notice: VPP doesn't allocate memory for output frame, so correctly allocated Tensor with correct FourCC and resolution
 	should be passed via Python API	and this allocated CUDA memory will be filled.
 	*/
-	int Convert(AVFrame* input, AVFrame* output, FrameParameters options, std::string consumerName);
+	int Convert(AVFrame* input, AVFrame* output, FrameParameters& options, std::string consumerName);
 	template <class T>
 	int DumpFrame(T* output, FrameParameters options, std::shared_ptr<FILE> dumpFile);
 	void Close();

diff --git a/include/Wrappers/WrapperC.h b/include/Wrappers/WrapperC.h
@@ -61,8 +61,16 @@ class TensorStream {
 /** Enable NVTX logs from TensorStream
 */
 	void enableNVTX();
-	int getDelay();
+/** Allow to skip stage with bitstream analyzing (skip frames, some bitstream conformance checks)
+*/
 	void skipAnalyzeStage();
+/** Set timeout for frame reading (default: -1, means no timeout)
+@param[in] value of timeout in ms
+*/
+	void setTimeout(int timeout);
+
+	int getTimeout();
+	int getDelay();
 private:
 	int processingLoop();
 	std::mutex syncDecoded;

diff --git a/include/Wrappers/WrapperPython.h b/include/Wrappers/WrapperPython.h
@@ -36,6 +36,8 @@ class TensorStream {
 	void enableNVTX();
 	int dumpFrame(at::Tensor stream, std::string consumerName, FrameParameters frameParameters);
 	void skipAnalyzeStage();
+	void setTimeout(int timeout);
+	int getTimeout();
 private:
 	int processingLoop();
 	std::mutex syncDecoded;

diff --git a/python_examples/simple.py b/python_examples/simple.py
@@ -9,6 +9,13 @@ def string_bool(s):
         raise ValueError('Not a valid boolean string')
     return s == 'True'
 
+def crop_coords(s):
+    try:
+        x1, y1, x2, y2 = map(int, s.split(','))
+        return x1, y1, x2, y2
+    except:
+        raise argparse.ArgumentTypeError("Coordinates must be x1,y1,x2,y2")
+
 def parse_arguments():
     parser = argparse.ArgumentParser(add_help=False,
                                      description="Simple usage example")
@@ -61,6 +68,12 @@ def parse_arguments():
     parser.add_argument("--skip_analyze",
                         help="Skip bitstream frames reordering / loss analyze stage",
                         action='store_true')
+    parser.add_argument("--timeout",
+                        help="Set timeout in seconds for input frame reading (default: None, means disabled)",
+                        type=float, default=None)
+    parser.add_argument("--crop", 
+                        help="set crop, left top corner and right bottom corner (default: disabled)",
+                        type=crop_coords, default=(0,0,0,0))
 
     return parser.parse_args()
 
@@ -72,8 +85,9 @@ def parse_arguments():
                                    max_consumers=5,
                                    cuda_device=args.cuda_device,
                                    buffer_size=args.buffer_size,
-                                   framerate_mode=FrameRate[args.framerate_mode])
-    #To log initialize stage, logs should be defined before initialize call
+                                   framerate_mode=FrameRate[args.framerate_mode],
+                                   timeout=args.timeout)
+    # To log initialize stage, logs should be defined before initialize call
     reader.enable_logs(LogsLevel[args.verbose], LogsType[args.verbose_destination])
 
     if args.nvtx:
@@ -90,13 +104,14 @@ def parse_arguments():
         if os.path.exists(args.output + ".yuv"):
             os.remove(args.output + ".yuv")
 
-    print(f"Normalize {args.normalize}")
+    print(f"Normalize {args.crop}")
     tensor = None
     try:
         while True:
             parameters = {'pixel_format': FourCC[args.fourcc],
                           'width': args.width,
                           'height': args.height,
+                          'crop_coords' : args.crop,
                           'normalization': args.normalize,
                           'planes_pos': Planes[args.planes],
                           'resize_type': ResizeType[args.resize_type]}

diff --git a/setup.py b/setup.py
@@ -87,6 +87,7 @@ def find_version(*file_paths):
 app_src_path += ["src/Common.cpp"]
 app_src_path += ["src/ColorConversion.cu"]
 app_src_path += ["src/Resize.cu"]
+app_src_path += ["src/Crop.cu"]
 app_src_path += ["src/Parser.cpp"]
 app_src_path += ["src/VideoProcessor.cpp"]
 app_src_path += ["src/Wrappers/WrapperPython.cpp"]