open-mmlab · innerlee · Oct 24, 2020 · Oct 24, 2020 · Oct 24, 2020
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -21,6 +21,7 @@
 **Bug Fixes**
 - Register `FrameSelector` in `PIPELINES` ([#268](https://github.com/open-mmlab/mmaction2/pull/268))
 - Fix the potential bug for default value in dataset_setting ([#245](https://github.com/open-mmlab/mmaction2/pull/245))
+- Fix the data preparation bug for `something-something` dataset ([#278](https://github.com/open-mmlab/mmaction2/pull/278))
 - Fix the invalid config url in slowonly README data benchmark ([#249](https://github.com/open-mmlab/mmaction2/pull/249))
 - Validate that the performance of models trained with videos have no significant difference comparing to the performance of models trained with rawframes ([#256](https://github.com/open-mmlab/mmaction2/pull/256))
 

diff --git a/tools/data/build_file_list.py b/tools/data/build_file_list.py
@@ -218,8 +218,8 @@ def main():
                 with open(osp.join(out_path, val_name), 'w') as f:
                     f.writelines(file_lists[0][1])
             elif args.output_format == 'json':
-                train_list = lines2dictlist(file_lists[0][0])
-                val_list = lines2dictlist(file_lists[0][1])
+                train_list = lines2dictlist(file_lists[0][0], args.format)
+                val_list = lines2dictlist(file_lists[0][1], args.format)
                 train_name = train_name.replace('.txt', '.json')
                 val_name = val_name.replace('.txt', '.json')
                 with open(osp.join(out_path, train_name), 'w') as f:
@@ -244,7 +244,7 @@ def main():
             with open(osp.join(out_path, filename), 'w') as f:
                 f.writelines(lists[0][ind])
         elif args.output_format == 'json':
-            data_list = lines2dictlist(lists[0][ind])
+            data_list = lines2dictlist(lists[0][ind], args.format)
             filename = filename.replace('.txt', '.json')
             with open(osp.join(out_path, filename), 'w') as f:
                 json.dump(data_list, f)

diff --git a/tools/data/parse_file_list.py b/tools/data/parse_file_list.py
@@ -74,7 +74,6 @@ def count_files(directory, prefix_list):
                              f'of flow images in video directory: {frame_dir}')
         if i % 200 == 0:
             print(f'{i} videos parsed')
-
         frame_dict[dir_name] = (frame_dir, total_num[0], num_x)
 
     print('frame directory analysis done')
@@ -214,7 +213,7 @@ def line_to_map(item, test_mode=False):
         else:
             template = item['template'].replace('[', '')
             template = template.replace(']', '')
-            label = class_mapping[template]
+            label = int(class_mapping[template])
             return video, label
 
     with open(train_file, 'r') as fin:

diff --git a/tools/data/sthv1/encode_videos.sh b/tools/data/sthv1/encode_videos.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
 cd ../
-python build_videos.py ../../data/sthv1/rawframes/ ../../data/sthv1/videos/ --fps 12 --level 2 --start-idx 1 --filename-tmpl '%05d'
+python build_videos.py ../../data/sthv1/rawframes/ ../../data/sthv1/videos/ --fps 12 --level 1 --start-idx 1 --filename-tmpl '%05d'
 echo "Encode videos"
 
 cd sthv1/
diff --git a/tools/data/sthv1/generate_rawframes_filelist.sh b/tools/data/sthv1/generate_rawframes_filelist.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
 
 cd ../../../
-PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --num-split 1 --level 1 --subset train --format rawframes --shuffle
-PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --num-split 1 --level 1 --subset val --format rawframes --shuffle
+PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset train --format rawframes --shuffle
+PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset val --format rawframes --shuffle
 echo "Filelist for rawframes generated."
 
 cd tools/data/sthv1/
diff --git a/tools/data/sthv1/generate_videos_filelist.sh b/tools/data/sthv1/generate_videos_filelist.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+cd ../../../
+PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/videos/ --num-split 1 --level 1 --subset train --format videos --shuffle
+PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/videos/ --num-split 1 --level 1 --subset val --format videos --shuffle
+echo "Filelist for videos generated."
+
+cd tools/data/sthv1/
diff --git a/tools/data/sthv1/preparing_sthv1.md b/tools/data/sthv1/preparing_sthv1.md
@@ -85,7 +85,7 @@ You can run the follow script to generate file list in the format of rawframes.
 
 ```shell
 cd $MMACTION2/tools/data/sthv1/
-bash generate_rawframes_filelist.sh
+bash generate_{rawframes, videos}_filelist.sh
 ```
 
 ## Step 5. Check Directory Structure
@@ -96,26 +96,31 @@ you will get the rawframes (RGB + Flow), and annotation files for Something-Some
 In the context of the whole project (for Something-Something V1 only), the folder structure will look like:
 
 ```
-mmaction
+mmaction2
 ├── mmaction
 ├── tools
 ├── configs
 ├── data
 │   ├── sthv1
 │   │   ├── sthv1_{train,val}_list_rawframes.txt
+│   │   ├── sthv1_{train,val}_list_videos.txt
 │   │   ├── annotations
+│   |   ├── videos
+│   |   |   ├── 1.mp4
+│   |   |   ├── 2.mp4
+│   |   |   ├──...
 │   |   ├── rawframes
-│   |   |   ├── 100000
-│   |   |   |   ├── img_00001.jpg
-│   |   |   |   ├── img_00002.jpg
+│   |   |   ├── 1
+│   |   |   |   ├── 00001.jpg
+│   |   |   |   ├── 00002.jpg
 │   |   |   |   ├── ...
 │   |   |   |   ├── flow_x_00001.jpg
 │   |   |   |   ├── flow_x_00002.jpg
 │   |   |   |   ├── ...
 │   |   |   |   ├── flow_y_00001.jpg
 │   |   |   |   ├── flow_y_00002.jpg
 │   |   |   |   ├── ...
-│   |   |   ├── 100001
+│   |   |   ├── 2
 │   |   |   ├── ...
 
 ```

diff --git a/tools/data/sthv2/preparing_sthv2.md b/tools/data/sthv2/preparing_sthv2.md
@@ -14,6 +14,7 @@ Then, you can download all data parts to `$MMACTION2/data/sthv2/` and use the fo
 ```shell
 cd $MMACTION2/data/sthv2/
 cat 20bn-something-something-v2-?? | tar zx
+cd $MMACTION2/tools/data/sthv2/
 ```
 
 ## Step 3. Extract RGB and Flow
@@ -80,11 +81,11 @@ mmaction2
 │   │   ├── sthv2_{train,val}_list_videos.txt
 │   │   ├── annotations
 │   |   ├── videos
-│   |   |   ├── 100000.mp4
-│   |   |   ├── 100001.mp4
+│   |   |   ├── 1.mp4
+│   |   |   ├── 2.mp4
 │   |   |   ├──...
 │   |   ├── rawframes
-│   |   |   ├── 100000
+│   |   |   ├── 1
 │   |   |   |   ├── img_00001.jpg
 │   |   |   |   ├── img_00002.jpg
 │   |   |   |   ├── ...
@@ -94,7 +95,7 @@ mmaction2
 │   |   |   |   ├── flow_y_00001.jpg
 │   |   |   |   ├── flow_y_00002.jpg
 │   |   |   |   ├── ...
-│   |   |   ├── 100001
+│   |   |   ├── 2
 │   |   |   ├── ...
 
 ```