From b1b6c85c6c4c24c5f04bc6a8f6089b3322e239e3 Mon Sep 17 00:00:00 2001 From: dreamerlin <528557675@qq.com> Date: Sat, 24 Oct 2020 10:30:36 +0800 Subject: [PATCH 1/2] fix data preparation bug --- tools/data/build_file_list.py | 6 +++--- tools/data/parse_file_list.py | 3 +-- tools/data/sthv1/encode_videos.sh | 2 +- tools/data/sthv1/generate_rawframes_filelist.sh | 4 ++-- tools/data/sthv1/generate_videos_filelist.sh | 8 ++++++++ tools/data/sthv1/preparing_sthv1.md | 17 +++++++++++------ tools/data/sthv2/preparing_sthv2.md | 9 +++++---- 7 files changed, 31 insertions(+), 18 deletions(-) create mode 100644 tools/data/sthv1/generate_videos_filelist.sh diff --git a/tools/data/build_file_list.py b/tools/data/build_file_list.py index d168f8c2df..9db7239a44 100644 --- a/tools/data/build_file_list.py +++ b/tools/data/build_file_list.py @@ -218,8 +218,8 @@ def main(): with open(osp.join(out_path, val_name), 'w') as f: f.writelines(file_lists[0][1]) elif args.output_format == 'json': - train_list = lines2dictlist(file_lists[0][0]) - val_list = lines2dictlist(file_lists[0][1]) + train_list = lines2dictlist(file_lists[0][0], args.format) + val_list = lines2dictlist(file_lists[0][1], args.format) train_name = train_name.replace('.txt', '.json') val_name = val_name.replace('.txt', '.json') with open(osp.join(out_path, train_name), 'w') as f: @@ -244,7 +244,7 @@ def main(): with open(osp.join(out_path, filename), 'w') as f: f.writelines(lists[0][ind]) elif args.output_format == 'json': - data_list = lines2dictlist(lists[0][ind]) + data_list = lines2dictlist(lists[0][ind], args.format) filename = filename.replace('.txt', '.json') with open(osp.join(out_path, filename), 'w') as f: json.dump(data_list, f) diff --git a/tools/data/parse_file_list.py b/tools/data/parse_file_list.py index bae0c919ac..9cbfaac284 100644 --- a/tools/data/parse_file_list.py +++ b/tools/data/parse_file_list.py @@ -74,7 +74,6 @@ def count_files(directory, prefix_list): f'of flow images in video directory: {frame_dir}') if i % 200 == 0: print(f'{i} videos parsed') - frame_dict[dir_name] = (frame_dir, total_num[0], num_x) print('frame directory analysis done') @@ -214,7 +213,7 @@ def line_to_map(item, test_mode=False): else: template = item['template'].replace('[', '') template = template.replace(']', '') - label = class_mapping[template] + label = int(class_mapping[template]) return video, label with open(train_file, 'r') as fin: diff --git a/tools/data/sthv1/encode_videos.sh b/tools/data/sthv1/encode_videos.sh index 67d5a40a3a..79a49ab3c4 100644 --- a/tools/data/sthv1/encode_videos.sh +++ b/tools/data/sthv1/encode_videos.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash cd ../ -python build_videos.py ../../data/sthv1/rawframes/ ../../data/sthv1/videos/ --fps 12 --level 2 --start-idx 1 --filename-tmpl '%05d' +python build_videos.py ../../data/sthv1/rawframes/ ../../data/sthv1/videos/ --fps 12 --level 1 --start-idx 1 --filename-tmpl '%05d' echo "Encode videos" cd sthv1/ diff --git a/tools/data/sthv1/generate_rawframes_filelist.sh b/tools/data/sthv1/generate_rawframes_filelist.sh index ea1300c0ce..b6a3935ae5 100644 --- a/tools/data/sthv1/generate_rawframes_filelist.sh +++ b/tools/data/sthv1/generate_rawframes_filelist.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash cd ../../../ -PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --num-split 1 --level 1 --subset train --format rawframes --shuffle -PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --num-split 1 --level 1 --subset val --format rawframes --shuffle +PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset train --format rawframes --shuffle +PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset val --format rawframes --shuffle echo "Filelist for rawframes generated." cd tools/data/sthv1/ diff --git a/tools/data/sthv1/generate_videos_filelist.sh b/tools/data/sthv1/generate_videos_filelist.sh new file mode 100644 index 0000000000..4da50b9316 --- /dev/null +++ b/tools/data/sthv1/generate_videos_filelist.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +cd ../../../ +PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/videos/ --num-split 1 --level 1 --subset train --format videos --shuffle +PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/videos/ --num-split 1 --level 1 --subset val --format videos --shuffle +echo "Filelist for videos generated." + +cd tools/data/sthv1/ diff --git a/tools/data/sthv1/preparing_sthv1.md b/tools/data/sthv1/preparing_sthv1.md index 4dd6a2454c..02ce63f651 100644 --- a/tools/data/sthv1/preparing_sthv1.md +++ b/tools/data/sthv1/preparing_sthv1.md @@ -85,7 +85,7 @@ You can run the follow script to generate file list in the format of rawframes. ```shell cd $MMACTION2/tools/data/sthv1/ -bash generate_rawframes_filelist.sh +bash generate_{rawframes, videos}_filelist.sh ``` ## Step 5. Check Directory Structure @@ -96,18 +96,23 @@ you will get the rawframes (RGB + Flow), and annotation files for Something-Some In the context of the whole project (for Something-Something V1 only), the folder structure will look like: ``` -mmaction +mmaction2 ├── mmaction ├── tools ├── configs ├── data │ ├── sthv1 │ │ ├── sthv1_{train,val}_list_rawframes.txt +│ │ ├── sthv1_{train,val}_list_videos.txt │ │ ├── annotations +│ | ├── videos +│ | | ├── 1.mp4 +│ | | ├── 2.mp4 +│ | | ├──... │ | ├── rawframes -│ | | ├── 100000 -│ | | | ├── img_00001.jpg -│ | | | ├── img_00002.jpg +│ | | ├── 1 +│ | | | ├── 00001.jpg +│ | | | ├── 00002.jpg │ | | | ├── ... │ | | | ├── flow_x_00001.jpg │ | | | ├── flow_x_00002.jpg @@ -115,7 +120,7 @@ mmaction │ | | | ├── flow_y_00001.jpg │ | | | ├── flow_y_00002.jpg │ | | | ├── ... -│ | | ├── 100001 +│ | | ├── 2 │ | | ├── ... ``` diff --git a/tools/data/sthv2/preparing_sthv2.md b/tools/data/sthv2/preparing_sthv2.md index 0845bc9b65..ad3f923dc2 100644 --- a/tools/data/sthv2/preparing_sthv2.md +++ b/tools/data/sthv2/preparing_sthv2.md @@ -14,6 +14,7 @@ Then, you can download all data parts to `$MMACTION2/data/sthv2/` and use the fo ```shell cd $MMACTION2/data/sthv2/ cat 20bn-something-something-v2-?? | tar zx +cd $MMACTION2/tools/data/sthv2/ ``` ## Step 3. Extract RGB and Flow @@ -80,11 +81,11 @@ mmaction2 │ │ ├── sthv2_{train,val}_list_videos.txt │ │ ├── annotations │ | ├── videos -│ | | ├── 100000.mp4 -│ | | ├── 100001.mp4 +│ | | ├── 1.mp4 +│ | | ├── 2.mp4 │ | | ├──... │ | ├── rawframes -│ | | ├── 100000 +│ | | ├── 1 │ | | | ├── img_00001.jpg │ | | | ├── img_00002.jpg │ | | | ├── ... @@ -94,7 +95,7 @@ mmaction2 │ | | | ├── flow_y_00001.jpg │ | | | ├── flow_y_00002.jpg │ | | | ├── ... -│ | | ├── 100001 +│ | | ├── 2 │ | | ├── ... ``` From 17bb82077e090173fca6b1d35464b4a31fc579af Mon Sep 17 00:00:00 2001 From: dreamerlin <528557675@qq.com> Date: Sat, 24 Oct 2020 10:38:01 +0800 Subject: [PATCH 2/2] update changelog --- docs/changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.md b/docs/changelog.md index 9a20513881..8d9aab7be1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -21,6 +21,7 @@ **Bug Fixes** - Register `FrameSelector` in `PIPELINES` ([#268](https://github.com/open-mmlab/mmaction2/pull/268)) - Fix the potential bug for default value in dataset_setting ([#245](https://github.com/open-mmlab/mmaction2/pull/245)) +- Fix the data preparation bug for `something-something` dataset ([#278](https://github.com/open-mmlab/mmaction2/pull/278)) - Fix the invalid config url in slowonly README data benchmark ([#249](https://github.com/open-mmlab/mmaction2/pull/249)) - Validate that the performance of models trained with videos have no significant difference comparing to the performance of models trained with rawframes ([#256](https://github.com/open-mmlab/mmaction2/pull/256))