Skip to content

Commit

Permalink
tensorRT in C++ (#47)
Browse files Browse the repository at this point in the history
* init tensorRT

* add makefiles

* fix cmake

* fix typo

* profile

* add a slow implementation of gauss smooth

* fix cmake

* Paf cpp (#61)

* WIP

* install libopencv-dev

* remove python

* optimize select_peak

* cleanup header

* use channel first

* build in docker

* optimize smooth

* fix path

* optimize resize_area

* download-testdata.sh

* fix

* optimize max pool

* optimize

* draw results

* pre allocate temp memory

* remove unused files

* move folder

* remove duplicated files

* rm unused file

* flatten folder

* mv

* remove unused file

* rm

* fix cmake

* rm

* flatten folder

* cleanup constants

* simplify

* fix yaml

* create a docker image for building with CUDA

* fix

* fix missing dependencies

* fix

* add a cli tool

* cleanup

* add tf-runner interface

* finish OpenposeRunnerImpl

* export base model

* use in-tree build

* fix build

* use CHANNEL FIRST!

* cleanup debug log

* support batch

* update

* cleanup

* merge master

* cleanup

* update tests

* remove debug code

* remove bazel

* remove debug code

* rm

* rm

* fix test

* fix test
  • Loading branch information
lgarithm committed Sep 20, 2018
1 parent 02888df commit ae2686d
Show file tree
Hide file tree
Showing 41 changed files with 1,966 additions and 25 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docker/*.deb
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
__pycache__
.idea
*.a
*.avi
*.dylib
*.gz
*.jpg
*.log
*.out
*.png
*.uff
/3rdparty
/bazel-*
/checkpoints
/cmake-build
/coco
/data
/models
Expand Down
14 changes: 11 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
# https://docs.travis-ci.com/user/languages/python/
language: python

# https://docs.travis-ci.com/user/docker/
services:
- docker

python:
- '3.6'
- '2.7'
- '3.6'
- '2.7'

sudo: required

before_install:
- sudo apt install -y swig
- docker info
- sudo apt install -y swig # FIXME: can't install libopencv-dev on travis CI
- pip install -r requirements.txt
- pip install pycocotools # must be installed after cython is installed
- pip install git+https://github.com/tensorlayer/tensorlayer.git # TODO: create a latest release for TL
- ./scripts/install-pafprocess.sh
- ./scripts/download-test-data.sh

script:
- make docker-build
- python ./test_inference.py --path-to-npz='' --images=$(ls data/media/*.jpg | sort | head -n 3 | tr '\n' ',') --base-model=vgg
- python ./test_inference.py --path-to-npz='' --images=$(ls data/media/*.jpg | sort | head -n 3 | tr '\n' ',') --base-model=vggtiny
- python ./test_inference.py --path-to-npz='' --images=$(ls data/media/*.jpg | sort | head -n 3 | tr '\n' ',') --base-model=mobilenet
# TODO: reenable when data_format is supported in BN
# - python ./test_inference.py --path-to-npz='' --images=$(ls data/media/*.jpg | sort | head -n 3 | tr '\n' ',') --base-model=hao28_experimental
# TODO: add move tests
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CMAKE_MINIMUM_REQUIRED(VERSION 3.5)
PROJECT(openpose-plus)

SET(CMAKE_CXX_STANDARD 11)
INCLUDE(src/build.cmake)

IF(DEFINED ENV{HAVE_CUDA})
INCLUDE(src/build-gpu.cmake)
ENDIF()
20 changes: 20 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
MAKEFILE ?= Makefile.config
include $(MAKEFILE)

default: build_with_cmake
# default: docker-build-gpu

cmake_targets:
mkdir -p $(BUILD_DIR)
cd $(BUILD_DIR); cmake $(CMAKE_FLAGS) $(CURDIR)

build_with_cmake: cmake_targets
make -C $(BUILD_DIR) -j $(NPROC)

CPU_TAG = openpose-plus:builder
docker-build:
docker build --rm -t $(CPU_TAG) -f docker/Dockerfile.builder-cpu .

GPU_TAG = openpose-plus:builder-gpu
docker-build-gpu:
docker build --rm -t $(GPU_TAG) -f docker/Dockerfile.builder-gpu .
13 changes: 13 additions & 0 deletions Makefile.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
BUILD_DIR = $(CURDIR)/cmake-build/$(shell uname)


ifeq ($(shell uname), Darwin)
NPROC = $(shell sysctl -n hw.ncpu)
else
NPROC = $(shell nproc)
endif


CMAKE_FLAGS = \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-DCMAKE_BUILD_TYPE=Release \
1 change: 1 addition & 0 deletions docker/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.deb
10 changes: 10 additions & 0 deletions docker/Dockerfile.builder-cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM ubuntu:xenial

RUN apt update && \
apt install -y g++ cmake libopencv-dev libgflags-dev
ADD . /openpose-plus
WORKDIR /openpose-plus
RUN make build_with_cmake
RUN curl -sLOJ https://github.com/tensorlayer/fast-openpose/files/2378505/network-outputs.gz && \
gzip -d network-outputs.gz && \
tar -xf network-outputs
8 changes: 8 additions & 0 deletions docker/Dockerfile.builder-gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM tensorrt:snapshot

RUN apt install -y g++ cmake libopencv-dev
ADD . /openpose-plus
WORKDIR /openpose-plus

ENV HAVE_CUDA=1
RUN make build_with_cmake
28 changes: 28 additions & 0 deletions docker/Dockerfile.tensorrt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
FROM ubuntu:xenial

ARG NVIDIA_CUDA_PREFIX=http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64
ARG NVIDIA_ML_PREFIX=http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/

ARG CUDA_REPO=cuda-repo-ubuntu1604_9.0.176-1_amd64.deb
ARG ML_REPO=nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb
ARG RT_REPO=nv-tensorrt-repo-ubuntu1604-cuda9.0-ga-trt4.0.1.6-20180612_1-1_amd64.deb

ADD sources.list.ustc /etc/apt/sources.list
RUN apt update && apt install -y curl && \
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub

RUN curl -sLOJ ${NVIDIA_CUDA_PREFIX}/${CUDA_REPO} && \
curl -sLOJ ${NVIDIA_ML_PREFIX}/${ML_REPO} && \
dpkg -i ${CUDA_REPO} && \
dpkg -i ${ML_REPO}

ADD ${RT_REPO} /tmp/
RUN dpkg -i /tmp/${RT_REPO} \
&& apt update

RUN apt install -y \
libnvinfer-dev=4.1.2-1+cuda9.0 \
cuda-cudart-dev-9-0=9.0.176-1 \
cuda-libraries-dev-9-0

RUN apt install -y g++ cmake libopencv-dev
12 changes: 12 additions & 0 deletions docker/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
TAG = tensorrt:snapshot

TENSORRT_REPO_DEB = nv-tensorrt-repo-ubuntu1604-cuda9.0-ga-trt4.0.1.6-20180612_1-1_amd64.deb

tensorrt-image: $(TENSORRT_REPO_DEB)
docker build --rm -t $(TAG) -f Dockerfile.tensorrt .

$(TENSORRT_REPO_DEB):
cp ~/Downloads/$(TENSORRT_REPO_DEB) .

run:
docker run --rm -it $(TAG) bash
15 changes: 15 additions & 0 deletions docker/sources.list.ustc
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
deb http://mirrors.ustc.edu.cn/ubuntu/ xenial main restricted

deb http://mirrors.ustc.edu.cn/ubuntu/ xenial-updates main restricted

deb http://mirrors.ustc.edu.cn/ubuntu/ xenial universe
deb http://mirrors.ustc.edu.cn/ubuntu/ xenial-updates universe

deb http://mirrors.ustc.edu.cn/ubuntu/ xenial multiverse
deb http://mirrors.ustc.edu.cn/ubuntu/ xenial-updates multiverse

deb http://mirrors.ustc.edu.cn/ubuntu/ xenial-backports main restricted universe multiverse

deb http://mirrors.ustc.edu.cn/ubuntu xenial-security main restricted
deb http://mirrors.ustc.edu.cn/ubuntu xenial-security universe
deb http://mirrors.ustc.edu.cn/ubuntu xenial-security multiverse
10 changes: 5 additions & 5 deletions scripts/freeze-graph.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ GRAPH_FILE=${CHECKPOINT_DIR}/graph.pb.txt
CHECKPOINT=${CHECKPOINT_DIR}/saved_checkpoint-0
OUTPUT_GRAPH=${CHECKPOINT_DIR}/freezed

OUTPUT_NODE_NAMES=image,upsample_size,upsample_heatmat,tensor_peaks,upsample_pafmat
OUTPUT_NODE_NAMES=image,outputs/conf,outputs/paf

freeze() {
python3 ${FREEZE_GRAPH_BIN} \
Expand All @@ -41,11 +41,11 @@ freeze() {
--output_node_names ${OUTPUT_NODE_NAMES}
}

# BASE_MODEL=vgg
# PATH_TO_NPZ=${HOME}/Downloads/vgg450000.npz
BASE_MODEL=vgg
PATH_TO_NPZ=${HOME}/Downloads/vgg450000_no_cpm.npz

BASE_MODEL=mobilenet
PATH_TO_NPZ=${HOME}/Downloads/mbn28000.npz
# BASE_MODEL=mobilenet
# PATH_TO_NPZ=${HOME}/Downloads/mbn28000.npz

measure ./export.py --base-model=${BASE_MODEL} --full=True --path-to-npz=${PATH_TO_NPZ}
measure freeze
10 changes: 5 additions & 5 deletions scripts/profile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@ measure() {
cd $(dirname $0)/..

export PYTHONUNBUFFERED=1
# DATA_DIR=$(pwd)/data/media

MODEL_DIR=${HOME}/Downloads
DATA_DIR=${HOME}/Downloads/new-tests
IMAGES=$(ls ${DATA_DIR}/*.png | sort | tr '\n' ',')
DATA_DIR=$HOME/var/data/openpose

# DATA_DIR=$(pwd)/data/media
# IMAGES=$(ls ${DATA_DIR}/*.jpg | sort | tr '\n' ',')
IMAGES=$(ls ${DATA_DIR}/examples/media/*.png | sort | tr '\n' ',')

LIMIT=100

Expand Down Expand Up @@ -54,10 +53,11 @@ profile_model() {
}

mkdir -p logs
measure profile_model vggtiny new-models/hao18/pose350000.npz NHWC
# measure profile_model vggtiny new-models/hao18/pose350000.npz NHWC
# measure profile_model mobilenet mbn280000.npz NHWC
# measure profile_model vgg vgg450000_no_cpm.npz NHWC
# measure profile_model vgg vgg450000_no_cpm.npz NCHW # npz shape, is the same, but inference doesn't work yet
# measure profile_model hao28_experimental hao28/pose345000.npz NHWC

# measure profile_model hao28_experimental hao28/pose345000.npz NHWC
measure profile_model hao28_experimental hao28/pose345000.npz NCHW
18 changes: 18 additions & 0 deletions scripts/run-uff-cpp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/sh
set -e

export HAVE_CUDA=1

make

D=$HOME/var/data/openpose/126

# IMAGE=$HOME/Downloads/new-tests/cam0_27.png
IMAGES=$D/cam2_3938.png,$D/cam1_2386.png

# MODEL_FILE=$HOME/Downloads/vgg.uff
MODEL_FILE=$HOME/Downloads/vggtiny.uff

./cmake-build/$(uname -s)/uff-runner_main \
--model_file=${MODEL_FILE} \
--image_files=${IMAGES}
18 changes: 9 additions & 9 deletions scripts/run-uff.sh → scripts/run-uff-py.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -e

MODEL_DIR=$HOME/Downloads
DATA_DIR=$HOME/Downloads/new-tests
DATA_DIR=$HOME/var/data/openpose

# cam0_27.png
# cam0_59.png
Expand Down Expand Up @@ -41,13 +41,13 @@ test_hao28_model() {
}

# test_vgg_model \
# ./data/media/COCO_val2014_000000000192.jpg \
# ${DATA_DIR}/cam0_27.png
# ${DATA_DIR}/examples/media/COCO_val2014_000000000192.jpg \
# ${DATA_DIR}/new-tests/cam0_27.png

test_vggtiny_model \
./data/media/COCO_val2014_000000000192.jpg \
${DATA_DIR}/cam0_27.png
# test_vggtiny_model \
# ${DATA_DIR}/examples/media/COCO_val2014_000000000192.jpg \
# ${DATA_DIR}/new-tests/cam0_27.png

# test_hao28_model \
# ./data/media/COCO_val2014_000000000192.jpg \
# ${DATA_DIR}/cam0_27.png
test_hao28_model \
${DATA_DIR}/examples/media/COCO_val2014_000000000192.jpg \
${DATA_DIR}/new-tests/cam0_27.png
1 change: 1 addition & 0 deletions src/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/pafprocess
11 changes: 11 additions & 0 deletions src/build-gpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# FIXME: use TARGET_LINK_DIRECTORIES and TARGET_INCLUDE_DIRECTORIES
LINK_DIRECTORIES(/usr/local/cuda-9.0/targets/x86_64-linux/lib)
INCLUDE_DIRECTORIES(/usr/local/cuda-9.0/targets/x86_64-linux/include
/usr/local/cuda-9.0/targets/x86_64-linux/include/crt)

ADD_EXECUTABLE(uff-runner_main
${CMAKE_CURRENT_LIST_DIR}/uff-runner.cpp
${CMAKE_CURRENT_LIST_DIR}/tracer.cpp
${CMAKE_CURRENT_LIST_DIR}/uff-runner_main.cpp
${CMAKE_CURRENT_LIST_DIR}/cuda_buffer.cpp)
TARGET_LINK_LIBRARIES(uff-runner_main input_image paf vis gflags nvinfer cudart nvparsers)
20 changes: 20 additions & 0 deletions src/build.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FIND_PACKAGE(opencv)
FIND_PACKAGE(gflags)


ADD_LIBRARY(input_image ${CMAKE_CURRENT_LIST_DIR}/input.cpp)
TARGET_LINK_LIBRARIES(input_image opencv_core opencv_imgproc opencv_highgui)

ADD_LIBRARY(paf
${CMAKE_CURRENT_LIST_DIR}/paf.cpp
${CMAKE_CURRENT_LIST_DIR}/post-process.cpp)
TARGET_LINK_LIBRARIES(paf opencv_core opencv_imgproc opencv_highgui)

ADD_LIBRARY(vis ${CMAKE_CURRENT_LIST_DIR}/vis.cpp)
TARGET_LINK_LIBRARIES(vis opencv_core opencv_imgproc opencv_highgui)

ADD_EXECUTABLE(fake-runner
${CMAKE_CURRENT_LIST_DIR}/fake_uff-runner.cpp
${CMAKE_CURRENT_LIST_DIR}/tracer.cpp
${CMAKE_CURRENT_LIST_DIR}/uff-runner_main.cpp)
TARGET_LINK_LIBRARIES(fake-runner input_image paf vis gflags)
55 changes: 55 additions & 0 deletions src/coco.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#pragma once
#include <utility>
#include <vector>

constexpr int COCO_N_PARTS = 18;
constexpr int COCO_N_PAIRS = 19;

using idx_pair_t = std::pair<int, int>;
using coco_pair_list_t = std::vector<idx_pair_t>;

const coco_pair_list_t COCOPAIRS_NET = {
{12, 13}, // 6
{20, 21}, // 10
{14, 15}, // 7
{16, 17}, // 8
{22, 23}, // 11
{24, 25}, // 12
{0, 1}, // 0
{2, 3}, // 1
{4, 5}, // 2
{6, 7}, // 3
{8, 9}, // 4
{10, 11}, // 5
{28, 29}, // 14
{30, 31}, // 15
{34, 35}, // 17
{32, 33}, // 16
{36, 37}, // 18
{18, 19}, // 9
{26, 27}, // 13
};

const coco_pair_list_t COCOPAIRS = {
{1, 2}, // 6
{1, 5}, // 10
{2, 3}, // 7
{3, 4}, // 8
{5, 6}, // 11
{6, 7}, // 12
{1, 8}, // 0
{8, 9}, // 1
{9, 10}, // 2
{1, 11}, // 3
{11, 12}, // 4
{12, 13}, // 5
{1, 0}, // 14
{0, 14}, // 15
{14, 16}, // 17
{0, 15}, // 16
{15, 17}, // 18
{2, 16}, // * 9
{5, 17}, // * 13
};

inline bool is_virtual_pair(int pair_id) { return pair_id > 16; }
Loading

0 comments on commit ae2686d

Please sign in to comment.