Skip to content

Commit

Permalink
862 add json output video demo (#906)
Browse files Browse the repository at this point in the history
* add json output for video demo

* add json output for long_video_demo

* add json output video demo
  • Loading branch information
rlleshi committed Jun 9, 2021
1 parent 9afe256 commit f8b595a
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 23 deletions.
11 changes: 9 additions & 2 deletions demo/README.md
Expand Up @@ -282,7 +282,7 @@ python demo/long_video_demo.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${VIDEO_FILE} $

Optional arguments:

- `OUT_FILE`: Path to the output video file.
- `OUT_FILE`: Path to the output, either video or json file
- `INPUT_STEP`: Input step for sampling frames, which can help to get more spare input. If not specified , it will be set to 1.
- `DEVICE_TYPE`: Type of device to run the demo. Allowed values are cuda device like `cuda:0` or `cpu`. If not specified, it will be set to `cuda:0`.
- `THRESHOLD`: Threshold of prediction score for action recognition. Only label with score higher than the threshold will be shown. If not specified, it will be set to 0.01.
Expand Down Expand Up @@ -325,7 +325,14 @@ or use checkpoint url from `configs/` to directly load corresponding checkpoint,

```shell
python demo/long_video_demo.py configs/recognition/i3d/i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py \
checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt PATH_TO_SAVED_VIDEO \
checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt PATH_TO_SAVED_VIDEO
```

5. Predict different labels in a long video by using a I3D model on gpu and save the results as a `json` file

```shell
python demo/long_video_demo.py configs/recognition/i3d/i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py \
checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt ./results.json
```

## SpatioTemporal Action Detection Webcam Demo
Expand Down
78 changes: 57 additions & 21 deletions demo/long_video_demo.py
@@ -1,4 +1,5 @@
import argparse
import json
import random
from collections import deque
from operator import itemgetter
Expand Down Expand Up @@ -32,7 +33,7 @@ def parse_args():
parser.add_argument('checkpoint', help='checkpoint file/url')
parser.add_argument('video_path', help='video file/url')
parser.add_argument('label', help='label file')
parser.add_argument('out_file', help='output filename')
parser.add_argument('out_file', help='output result file in video/json')
parser.add_argument(
'--input-step',
type=int,
Expand All @@ -58,6 +59,47 @@ def parse_args():
return args


def show_results_video(result_queue, text_info, thr, msg, frame, video_writer):
if len(result_queue) != 0:
text_info = {}
results = result_queue.popleft()
for i, result in enumerate(results):
selected_label, score = result
if score < thr:
break
location = (0, 40 + i * 20)
text = selected_label + ': ' + str(round(score, 2))
text_info[location] = text
cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR,
THICKNESS, LINETYPE)
elif len(text_info):
for location, text in text_info.items():
cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR,
THICKNESS, LINETYPE)
else:
cv2.putText(frame, msg, (0, 40), FONTFACE, FONTSCALE, MSGCOLOR,
THICKNESS, LINETYPE)
video_writer.write(frame)
return text_info


def get_results_json(result_queue, text_info, thr, msg, ind, out_json):
if len(result_queue) != 0:
text_info = {}
results = result_queue.popleft()
for i, result in enumerate(results):
selected_label, score = result
if score < thr:
break
text_info[i + 1] = selected_label + ': ' + str(round(score, 2))
out_json[ind] = text_info
elif len(text_info):
out_json[ind] = text_info
else:
out_json[ind] = msg
return text_info, out_json


def show_results(model, data, label, args):
frame_queue = deque(maxlen=args.sample_length)
result_queue = deque(maxlen=1)
Expand All @@ -70,11 +112,13 @@ def show_results(model, data, label, args):

msg = 'Preparing action recognition ...'
text_info = {}
out_json = {}
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
frame_size = (frame_width, frame_height)

ind = 0
video_writer = cv2.VideoWriter(args.out_file, fourcc, fps, frame_size)
video_writer = None if args.out_file.endswith('.json') \
else cv2.VideoWriter(args.out_file, fourcc, fps, frame_size)
prog_bar = mmcv.ProgressBar(num_frames)
backup_frames = []

Expand Down Expand Up @@ -108,28 +152,20 @@ def show_results(model, data, label, args):
results = scores_sorted[:num_selected_labels]
result_queue.append(results)

if len(result_queue) != 0:
text_info = {}
results = result_queue.popleft()
for i, result in enumerate(results):
selected_label, score = result
if score < args.threshold:
break
location = (0, 40 + i * 20)
text = selected_label + ': ' + str(round(score, 2))
text_info[location] = text
cv2.putText(frame, text, location, FONTFACE, FONTSCALE,
FONTCOLOR, THICKNESS, LINETYPE)
elif len(text_info):
for location, text in text_info.items():
cv2.putText(frame, text, location, FONTFACE, FONTSCALE,
FONTCOLOR, THICKNESS, LINETYPE)
if args.out_file.endswith('.json'):
text_info, out_json = get_results_json(result_queue, text_info,
args.threshold, msg, ind,
out_json)
else:
cv2.putText(frame, msg, (0, 40), FONTFACE, FONTSCALE, MSGCOLOR,
THICKNESS, LINETYPE)
video_writer.write(frame)
text_info = show_results_video(result_queue, text_info,
args.threshold, msg, frame,
video_writer)

cap.release()
cv2.destroyAllWindows()
if args.out_file.endswith('.json'):
with open(args.out_file, 'w') as js:
json.dump(out_json, js)


def inference(model, data, args, frame_queue):
Expand Down

0 comments on commit f8b595a

Please sign in to comment.