From c5f00a8d7091b46b0ecd175f722d98ea8bea485e Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Fri, 16 Oct 2020 17:35:47 +0800
Subject: [PATCH 01/15] resolve comments

---
 tools/data/hvu/generate_sub_file_list.py | 49 ++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 tools/data/hvu/generate_sub_file_list.py

diff --git a/tools/data/hvu/generate_sub_file_list.py b/tools/data/hvu/generate_sub_file_list.py
new file mode 100644
index 0000000000..77c7bed651
--- /dev/null
+++ b/tools/data/hvu/generate_sub_file_list.py
@@ -0,0 +1,49 @@
+import argparse
+import os.path as osp
+
+import mmcv
+
+
+def main(annotation_file, category):
+    assert category in [
+        'action', 'attribute', 'concept', 'event', 'object', 'scene'
+    ]
+
+    data = mmcv.load(annotation_file)
+    basename = osp.basename(annotation_file)
+    dirname = osp.dirname(annotation_file)
+    basename = basename.replace('hvu', f'hvu_{category}')
+
+    target_file = osp.join(dirname, basename)
+
+    def parse_item(item, category):
+        label = item['label']
+        if category in label:
+            item['label'] = label[category]
+            return item
+        else:
+            return None
+
+    result = []
+    for item in data:
+        label = item['label']
+        if category in label:
+            item['label'] = label[category]
+            result.append(item)
+
+    mmcv.dump(data, target_file)
+
+
+if __name__ == '__main__':
+    description = 'Helper script for generating HVU per-category file list.'
+    p = argparse.ArgumentParser(description=description)
+    p.add_argument(
+        'annotation_file',
+        type=str,
+        help=('The annotation file which contains tags of all categories.'))
+    p.add_argument(
+        'category',
+        type=str,
+        choices=['action', 'attribute', 'concept', 'event', 'object', 'scene'],
+        help='The tag category that you want to generate file list for.')
+    main(**vars(p.parse_args()))

From 05575c18bf7dbba7e6e9f9a0f1c4f973668d2c44 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Fri, 16 Oct 2020 17:37:19 +0800
Subject: [PATCH 02/15] update changelog

---
 docs/changelog.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/changelog.md b/docs/changelog.md
index d0fe20a249..3cff48664d 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -4,6 +4,7 @@
 
 **Improvements**
 - Set default values of 'average_clips' in each config file so that there is no need to set it explicitly during testing in most cases ([#232](https://github.com/open-mmlab/mmaction2/pull/232))
+- Extend HVU datatools to generate individual file list for each tag category ([#258](https://github.com/open-mmlab/mmaction2/pull/258))
 
 **Bug Fixes**
 - Fix the potential bug for default value in dataset_setting ([#245](https://github.com/open-mmlab/mmaction2/pull/245))

From 441bb0fe25c986782999c937b3eac5af6d9f51c3 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Tue, 10 Nov 2020 15:40:06 +0800
Subject: [PATCH 03/15] add gym pipeline

---
 tools/data/gym/download.py             | 101 +++++++++++++++++++++++++
 tools/data/gym/download_annotations.sh |  14 ++++
 tools/data/gym/download_videos.sh      |  13 ++++
 tools/data/gym/environment.yml         |  36 +++++++++
 tools/data/gym/generate_file_list.py   |  26 +++++++
 tools/data/gym/preparing_gym.md        |  95 +++++++++++++++++++++++
 tools/data/gym/trim_event.py           |  58 ++++++++++++++
 tools/data/gym/trim_subaction.py       |  52 +++++++++++++
 8 files changed, 395 insertions(+)
 create mode 100644 tools/data/gym/download.py
 create mode 100644 tools/data/gym/download_annotations.sh
 create mode 100644 tools/data/gym/download_videos.sh
 create mode 100644 tools/data/gym/environment.yml
 create mode 100644 tools/data/gym/generate_file_list.py
 create mode 100644 tools/data/gym/preparing_gym.md
 create mode 100644 tools/data/gym/trim_event.py
 create mode 100644 tools/data/gym/trim_subaction.py

diff --git a/tools/data/gym/download.py b/tools/data/gym/download.py
new file mode 100644
index 0000000000..14631ce1bc
--- /dev/null
+++ b/tools/data/gym/download.py
@@ -0,0 +1,101 @@
+# This scripts is copied from
+# https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py  # noqa: E501
+# The code is licensed under the MIT licence.
+import argparse
+import os
+import subprocess
+
+import mmcv
+
+import ssl  # isort:skip
+
+from joblib import Parallel, delayed  # isort:skip
+
+ssl._create_default_https_context = ssl._create_unverified_context
+
+
+def download(video_identifier,
+             output_filename,
+             num_attempts=5,
+             url_base='https://www.youtube.com/watch?v='):
+    """Download a video from youtube if exists and is not blocked.
+    arguments:
+    ---------
+    video_identifier: str
+        Unique YouTube video identifier (11 characters)
+    output_filename: str
+        File path where the video will be stored.
+    """
+    # Defensive argument checking.
+    assert isinstance(video_identifier, str), 'video_identifier must be string'
+    assert isinstance(output_filename, str), 'output_filename must be string'
+    assert len(video_identifier) == 11, 'video_identifier must have length 11'
+
+    status = False
+
+    if not os.path.exists(output_filename):
+        command = [
+            'youtube-dl', '--quiet', '--no-warnings', '--no-check-certificate',
+            '-f', 'mp4', '-o',
+            '"%s"' % output_filename,
+            '"%s"' % (url_base + video_identifier)
+        ]
+        command = ' '.join(command)
+        print(command)
+        attempts = 0
+        while True:
+            try:
+                subprocess.check_output(
+                    command, shell=True, stderr=subprocess.STDOUT)
+            except subprocess.CalledProcessError:
+                attempts += 1
+                if attempts == num_attempts:
+                    return status, 'Fail'
+            else:
+                break
+    # Check if the video was successfully saved.
+    status = os.path.exists(output_filename)
+    return status, 'Downloaded'
+
+
+def download_wrapper(youtube_id, output_dir):
+    """Wrapper for parallel processing purposes."""
+    # we do this to align with names in annotations
+    output_filename = os.path.join(output_dir, youtube_id + '.mp4')
+    if os.path.exists(output_filename):
+        status = tuple([youtube_id, True, 'Exists'])
+        return status
+
+    downloaded, log = download(youtube_id, output_filename)
+    status = tuple([youtube_id, downloaded, log])
+    return status
+
+
+def main(input, output_dir, num_jobs=24):
+    # Reading and parsing ActivityNet.
+    youtube_ids = mmcv.load(input).keys()
+    # Creates folders where videos will be saved later.
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Download all clips.
+    if num_jobs == 1:
+        status_list = []
+        for index in youtube_ids:
+            status_list.append(download_wrapper(index, output_dir))
+    else:
+        status_list = Parallel(n_jobs=num_jobs)(
+            delayed(download_wrapper)(index, output_dir)
+            for index in youtube_ids)
+
+    # Save download report.
+    mmcv.dump(status_list, 'download_report.json')
+
+
+if __name__ == '__main__':
+    description = 'Helper script for downloading GYM videos.'
+    p = argparse.ArgumentParser(description=description)
+    p.add_argument('input', type=str, help='The gym annotation file')
+    p.add_argument(
+        'output_dir', type=str, help='Output directory to save videos.')
+    p.add_argument('-n', '--num-jobs', type=int, default=24)
+    main(**vars(p.parse_args()))
diff --git a/tools/data/gym/download_annotations.sh b/tools/data/gym/download_annotations.sh
new file mode 100644
index 0000000000..4922104995
--- /dev/null
+++ b/tools/data/gym/download_annotations.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+set -e
+
+DATA_DIR="../../../data/gym/annotations"
+
+if [[ ! -d "${DATA_DIR}" ]]; then
+  echo "${DATA_DIR} does not exist. Creating";
+  mkdir -p ${DATA_DIR}
+fi
+
+wget https://sdolivia.github.io/FineGym/resources/dataset/finegym_annotation_info_v1.0.json -O $DATA_DIR/annotation.json
+wget https://sdolivia.github.io/FineGym/resources/dataset/gym99_train_element_v1.0.txt -O $DATA_DIR/gym99_train_org.txt
+wget https://sdolivia.github.io/FineGym/resources/dataset/gym99_val_element.txt -O $DATA_DIR/gym99_val_org.txt
diff --git a/tools/data/gym/download_videos.sh b/tools/data/gym/download_videos.sh
new file mode 100644
index 0000000000..2788a4265d
--- /dev/null
+++ b/tools/data/gym/download_videos.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# set up environment
+conda env create -f environment.yml
+source activate gym
+pip install --upgrade youtube-dl
+
+DATA_DIR="../../../data/gym"
+ANNO_DIR="../../../data/gym/annotations"
+python download.py ${ANNO_DIR}/annotation.json ${DATA_DIR}/videos
+
+source deactivate gym
+conda remove -n gym --all
diff --git a/tools/data/gym/environment.yml b/tools/data/gym/environment.yml
new file mode 100644
index 0000000000..86e7e1a24c
--- /dev/null
+++ b/tools/data/gym/environment.yml
@@ -0,0 +1,36 @@
+name: kinetics
+channels:
+  - anaconda
+  - menpo
+  - conda-forge
+  - defaults
+dependencies:
+  - ca-certificates=2020.1.1
+  - certifi=2020.4.5.1
+  - ffmpeg=2.8.6
+  - libcxx=10.0.0
+  - libedit=3.1.20181209
+  - libffi=3.3
+  - ncurses=6.2
+  - openssl=1.1.1g
+  - pip=20.0.2
+  - python=3.7.7
+  - readline=8.0
+  - setuptools=46.4.0
+  - sqlite=3.31.1
+  - tk=8.6.8
+  - wheel=0.34.2
+  - xz=5.2.5
+  - zlib=1.2.11
+  - pip:
+    - decorator==4.4.2
+    - intel-openmp==2019.0
+    - joblib==0.15.1
+    - mkl==2019.0
+    - numpy==1.18.4
+    - olefile==0.46
+    - pandas==1.0.3
+    - python-dateutil==2.8.1
+    - pytz==2020.1
+    - six==1.14.0
+    - youtube-dl==2020.5.8
diff --git a/tools/data/gym/generate_file_list.py b/tools/data/gym/generate_file_list.py
new file mode 100644
index 0000000000..26b8c1ab20
--- /dev/null
+++ b/tools/data/gym/generate_file_list.py
@@ -0,0 +1,26 @@
+import os
+import os.path as osp
+
+annotation_root = '../../../data/gym/annotations'
+data_root = '../../../data/gym/subactions'
+videos = os.listdir(data_root)
+videos = set(videos)
+
+train_file_org = osp.join(annotation_root, 'gym99_train_org.txt')
+val_file_org = osp.join(annotation_root, 'gym99_val_org.txt')
+train_file = osp.join(annotation_root, 'gym99_train.txt')
+val_file = osp.join(annotation_root, 'gym99_val.txt')
+
+train_org = open(train_file_org).readlines()
+train_org = [x.strip().split() for x in train_org]
+train = [x for x in train_org if x[0] + '.mp4' in videos]
+train = [x[0] + '.mp4 ' + x[1] for x in train]
+with open(train_file, 'w') as fout:
+    fout.write('\n'.join(train_org))
+
+val_org = open(val_file_org).readlines()
+val_org = [x.strip().split() for x in val_org]
+val = [x for x in val_org if x[0] + '.mp4' in videos]
+val = [x[0] + '.mp4 ' + x[1] for x in val]
+with open(val_file, 'w') as fout:
+    fout.write('\n'.join(val_org))
diff --git a/tools/data/gym/preparing_gym.md b/tools/data/gym/preparing_gym.md
new file mode 100644
index 0000000000..d93a91b11d
--- /dev/null
+++ b/tools/data/gym/preparing_gym.md
@@ -0,0 +1,95 @@
+# Preparing GYM
+
+## Introduction
+
+```
+@inproceedings{shao2020finegym,
+  title={Finegym: A hierarchical video dataset for fine-grained action understanding},
+  author={Shao, Dian and Zhao, Yue and Dai, Bo and Lin, Dahua},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={2616--2625},
+  year={2020}
+}
+```
+
+For basic dataset information, please refer to the official [project](https://sdolivia.github.io/FineGym/) and the [paper](https://arxiv.org/abs/2004.06704).
+We currently provide the data pre-processing pipeline for GYM99.
+Before we start, please make sure that the directory is located at `$MMACTION2/tools/data/gym/`.
+
+## Step 1. Prepare Annotations
+
+First of all, you can run the following script to prepare annotations.
+
+```shell
+bash download_annotations.sh
+```
+
+## Step 2. Prepare Videos
+
+Then, you can run the following script to prepare videos.
+The codes are adapted from the [official crawler](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics). Note that this might take a long time.
+
+```shell
+bash download_videos.sh
+```
+
+## Step 3. Trim Videos into Events.
+
+First, you need to trim long videos into events based on the annotation of GYM with the following scripts.
+
+```shell
+python trim_event.py
+```
+
+## Step 4. Trim Events into Subactions.
+
+Then, you need to trim events into subactions based on the annotation of GYM with the following scripts. We use the two stage trimming for better efficiency (trimming multiple short clips from a long video can be extremely inefficient, since you need to go over the video many times).
+
+```shell
+python trim_subaction.py
+```
+
+## Step 5. Generate file list for GYM99 based on extracted subactions.
+
+You can use the following script to generate train / val lists for GYM99.
+
+```shell
+python generate_file_list.py
+```
+
+
+## Step 6. Folder Structure
+
+After the whole data pipeline for GYM preparation. You can get the subaction clips, event clips, raw videos and GYM99 train/val lists.
+
+In the context of the whole project (for GYM only), the full folder structure will look like:
+
+```
+mmaction2
+├── mmaction
+├── tools
+├── configs
+├── data
+│   ├── gym
+|   |   ├── annotations
+|   |   |   ├── gym99_train_org.txt
+|   |   |   ├── gym99_val_org.txt
+|   |   |   ├── gym99_train.txt
+|   |   |   ├── gym99_val.txt
+|   |   |   ├── annotation.json
+|   |   |   └── event_annotation.json
+│   │   ├── videos
+|   |   |   ├── 0LtLS9wROrk.mp4
+|   |   |   ├── ...
+|   |   |   └── zfqS-wCJSsw.mp4
+│   │   ├── events
+|   |   |   ├── 0LtLS9wROrk_E_002407_002435.mp4
+|   |   |   ├── ...
+|   |   |   └── zfqS-wCJSsw_E_006732_006824.mp4
+│   │   └── subactions
+|   |       ├── 0LtLS9wROrk_E_002407_002435_A_0003_0005.mp4
+|   |       ├── ...
+|   |       └── zfqS-wCJSsw_E_006244_006252_A_0000_0007.mp4
+```
+
+For training and evaluating on GYM, please refer to [getting_started](/docs/getting_started.md).
diff --git a/tools/data/gym/trim_event.py b/tools/data/gym/trim_event.py
new file mode 100644
index 0000000000..8903829c19
--- /dev/null
+++ b/tools/data/gym/trim_event.py
@@ -0,0 +1,58 @@
+import os
+import os.path as osp
+import subprocess
+
+import mmcv
+
+data_root = '../../../data/gym'
+video_root = f'{data_root}/videos'
+anno_root = f'{data_root}/annotations'
+anno_file = f'{anno_root}/annotation.json'
+
+event_anno_file = f'{anno_root}/event_annotation.json'
+event_root = f'{data_root}/events'
+
+videos = os.listdir(video_root)
+videos = set(videos)
+annotation = mmcv.load(anno_file)
+event_annotation = {}
+
+if not osp.exists(event_root):
+    os.makedirs(event_root)
+
+for k, v in annotation.items():
+    if k + '.mp4' not in videos:
+        print(f'video {k} has not been downloaded')
+        continue
+
+    video_path = osp.join(video_root, k + '.mp4')
+
+    for event_id, event_anno in v.items():
+        timestamps = event_anno['timestamps'][0]
+        start_time, end_time = timestamps
+        event_name = k + '_' + event_id
+
+        output_filename = event_name + '.mp4'
+
+        command = [
+            'ffmpeg', '-i',
+            '"%s"' % video_path, '-ss',
+            str(start_time), '-t',
+            str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy',
+            '-threads', '1', '-loglevel', 'panic',
+            '"%s"' % osp.join(event_root, output_filename)
+        ]
+        command = ' '.join(command)
+        try:
+            subprocess.check_output(
+                command, shell=True, stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError:
+            print(
+                f'Trimming of the Event {event_name} of Video {k} Failed',
+                flush=True)
+
+        segments = event_anno['segments']
+        if segments is not None:
+            event_annotation[event_name] = segments
+
+mmcv.dump(event_annotation, event_anno_file)
diff --git a/tools/data/gym/trim_subaction.py b/tools/data/gym/trim_subaction.py
new file mode 100644
index 0000000000..311d04b46b
--- /dev/null
+++ b/tools/data/gym/trim_subaction.py
@@ -0,0 +1,52 @@
+import os
+import os.path as osp
+import subprocess
+
+import mmcv
+
+data_root = '../../../data/gym'
+anno_root = f'{data_root}/annotations'
+
+event_anno_file = f'{anno_root}/event_annotation.json'
+event_root = f'{data_root}/events'
+subaction_root = f'{data_root}/subactions'
+
+events = os.listdir(event_root)
+events = set(events)
+annotation = mmcv.load(event_anno_file)
+
+if not osp.exists(subaction_root):
+    os.makedirs(subaction_root)
+
+for k, v in annotation.items():
+    if k + '.mp4' not in events:
+        print(f'video {k[:11]} has not been downloaded '
+              f'or the event clip {k} not generated')
+        continue
+
+    video_path = osp.join(event_root, k + '.mp4')
+
+    for subaction_id, subaction_anno in v.items():
+        timestamps = subaction_anno['timestamps']
+        start_time, end_time = timestamps[0][0], timestamps[-1][1]
+        subaction_name = k + '_' + subaction_id
+
+        output_filename = subaction_name + '.mp4'
+
+        command = [
+            'ffmpeg', '-i',
+            '"%s"' % video_path, '-ss',
+            str(start_time), '-t',
+            str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy',
+            '-threads', '1', '-loglevel', 'panic',
+            '"%s"' % osp.join(subaction_root, output_filename)
+        ]
+        command = ' '.join(command)
+        try:
+            subprocess.check_output(
+                command, shell=True, stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError:
+            print(
+                f'Trimming of the Subaction {subaction_name} of Event '
+                f'{k} Failed',
+                flush=True)

From 3c006a294943e87f3df594e50ad6732cec11a1f0 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Tue, 10 Nov 2020 16:06:53 +0800
Subject: [PATCH 04/15] use 8-thread instead of 1 for performance

---
 tools/data/gym/trim_event.py     | 2 +-
 tools/data/gym/trim_subaction.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/data/gym/trim_event.py b/tools/data/gym/trim_event.py
index 8903829c19..78e3624a76 100644
--- a/tools/data/gym/trim_event.py
+++ b/tools/data/gym/trim_event.py
@@ -39,7 +39,7 @@
             '"%s"' % video_path, '-ss',
             str(start_time), '-t',
             str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy',
-            '-threads', '1', '-loglevel', 'panic',
+            '-threads', '8', '-loglevel', 'panic',
             '"%s"' % osp.join(event_root, output_filename)
         ]
         command = ' '.join(command)
diff --git a/tools/data/gym/trim_subaction.py b/tools/data/gym/trim_subaction.py
index 311d04b46b..ec8c1e5711 100644
--- a/tools/data/gym/trim_subaction.py
+++ b/tools/data/gym/trim_subaction.py
@@ -38,7 +38,7 @@
             '"%s"' % video_path, '-ss',
             str(start_time), '-t',
             str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy',
-            '-threads', '1', '-loglevel', 'panic',
+            '-threads', '8', '-loglevel', 'panic',
             '"%s"' % osp.join(subaction_root, output_filename)
         ]
         command = ' '.join(command)

From f252497a994fb039044afdd76de4c979cab768e3 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Tue, 10 Nov 2020 16:48:54 +0800
Subject: [PATCH 05/15] update

---
 tools/data/gym/generate_file_list.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/data/gym/generate_file_list.py b/tools/data/gym/generate_file_list.py
index 26b8c1ab20..54c0c21f5c 100644
--- a/tools/data/gym/generate_file_list.py
+++ b/tools/data/gym/generate_file_list.py
@@ -16,11 +16,11 @@
 train = [x for x in train_org if x[0] + '.mp4' in videos]
 train = [x[0] + '.mp4 ' + x[1] for x in train]
 with open(train_file, 'w') as fout:
-    fout.write('\n'.join(train_org))
+    fout.write('\n'.join(train))
 
 val_org = open(val_file_org).readlines()
 val_org = [x.strip().split() for x in val_org]
 val = [x for x in val_org if x[0] + '.mp4' in videos]
 val = [x[0] + '.mp4 ' + x[1] for x in val]
 with open(val_file, 'w') as fout:
-    fout.write('\n'.join(val_org))
+    fout.write('\n'.join(val))

From 22f8a94a6a9817e525a324844bab5d7f0df1ffda Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Tue, 10 Nov 2020 20:26:47 +0800
Subject: [PATCH 06/15] add frame extraction, since we will also add flow
 models

---
 tools/data/gym/extract_frames.sh     |  7 +++++++
 tools/data/gym/generate_file_list.py | 22 ++++++++++++++++++++++
 tools/data/gym/preparing_gym.md      | 17 ++++++++++++++---
 3 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 tools/data/gym/extract_frames.sh

diff --git a/tools/data/gym/extract_frames.sh b/tools/data/gym/extract_frames.sh
new file mode 100644
index 0000000000..cfcc8c044d
--- /dev/null
+++ b/tools/data/gym/extract_frames.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+cd ../
+python build_rawframes.py ../../data/gym/subactions/ ../../data/gym/subaction_frames/ --level 1 --flow-type tvl1 --ext mp4 --task both --new-short 256
+echo "Raw frames (RGB and tv-l1) Generated"
+
+cd gym/
diff --git a/tools/data/gym/generate_file_list.py b/tools/data/gym/generate_file_list.py
index 54c0c21f5c..bbbaeae209 100644
--- a/tools/data/gym/generate_file_list.py
+++ b/tools/data/gym/generate_file_list.py
@@ -3,6 +3,8 @@
 
 annotation_root = '../../../data/gym/annotations'
 data_root = '../../../data/gym/subactions'
+frame_data_root = '../../../data/gym/subaction_frames'
+
 videos = os.listdir(data_root)
 videos = set(videos)
 
@@ -10,10 +12,21 @@
 val_file_org = osp.join(annotation_root, 'gym99_val_org.txt')
 train_file = osp.join(annotation_root, 'gym99_train.txt')
 val_file = osp.join(annotation_root, 'gym99_val.txt')
+train_frame_file = osp.join(annotation_root, 'gym99_train_frame.txt')
+val_frame_file = osp.join(annotation_root, 'gym99_val_frame.txt')
 
 train_org = open(train_file_org).readlines()
 train_org = [x.strip().split() for x in train_org]
 train = [x for x in train_org if x[0] + '.mp4' in videos]
+if osp.exists(frame_data_root):
+    train_frames = []
+    for line in train:
+        length = len(os.listdir(osp.join(frame_data_root, line[0])))
+        train_frames.append([line[0], str(length // 3), line[1]])
+    train_frames = [' '.join(x) for x in train_frames]
+    with open(train_frame_file, 'w') as fout:
+        fout.write('\n'.join(train_frames))
+
 train = [x[0] + '.mp4 ' + x[1] for x in train]
 with open(train_file, 'w') as fout:
     fout.write('\n'.join(train))
@@ -21,6 +34,15 @@
 val_org = open(val_file_org).readlines()
 val_org = [x.strip().split() for x in val_org]
 val = [x for x in val_org if x[0] + '.mp4' in videos]
+if osp.exists(frame_data_root):
+    val_frames = []
+    for line in val:
+        length = len(os.listdir(osp.join(frame_data_root, line[0])))
+        val_frames.append([line[0], str(length // 3), line[1]])
+    val_frames = [' '.join(x) for x in val_frames]
+    with open(val_frame_file, 'w') as fout:
+        fout.write('\n'.join(val_frames))
+
 val = [x[0] + '.mp4 ' + x[1] for x in val]
 with open(val_file, 'w') as fout:
     fout.write('\n'.join(val))
diff --git a/tools/data/gym/preparing_gym.md b/tools/data/gym/preparing_gym.md
index d93a91b11d..a86c855da0 100644
--- a/tools/data/gym/preparing_gym.md
+++ b/tools/data/gym/preparing_gym.md
@@ -49,7 +49,19 @@ Then, you need to trim events into subactions based on the annotation of GYM wit
 python trim_subaction.py
 ```
 
-## Step 5. Generate file list for GYM99 based on extracted subactions.
+## Step 5. Extract RGB and Flow
+
+This part is **optional** if you only want to use the video loader for RGB model training.
+
+Before extracting, please refer to [install.md](/docs/install.md) for installing [denseflow](https://github.com/open-mmlab/denseflow).
+
+Run the following script to extract both rgb and flow using "tvl1" algorithm.
+
+```shell
+bash extract_frames.sh
+```
+
+## Step 6. Generate file list for GYM99 based on extracted subactions.
 
 You can use the following script to generate train / val lists for GYM99.
 
@@ -57,8 +69,7 @@ You can use the following script to generate train / val lists for GYM99.
 python generate_file_list.py
 ```
 
-
-## Step 6. Folder Structure
+## Step 7. Folder Structure
 
 After the whole data pipeline for GYM preparation. You can get the subaction clips, event clips, raw videos and GYM99 train/val lists.
 

From 7cb0e98f88c5be8d6ccb704090041a10c020edd1 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Wed, 11 Nov 2020 10:35:10 +0800
Subject: [PATCH 07/15] resolve comments

---
 tools/data/activitynet/download.py | 6 ++----
 tools/data/gym/download.py         | 6 ++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/tools/data/activitynet/download.py b/tools/data/activitynet/download.py
index e4d9ba7902..f767863918 100644
--- a/tools/data/activitynet/download.py
+++ b/tools/data/activitynet/download.py
@@ -2,13 +2,11 @@
 # https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py  # noqa: E501
 # The code is licensed under the MIT licence.
 import os
+import ssl
 import subprocess
 
 import mmcv
-
-import ssl  # isort:skip
-
-from joblib import Parallel, delayed  # isort:skip
+from joblib import Parallel, delayed
 
 ssl._create_default_https_context = ssl._create_unverified_context
 data_file = '../../../data/ActivityNet'
diff --git a/tools/data/gym/download.py b/tools/data/gym/download.py
index 14631ce1bc..51b54c24a8 100644
--- a/tools/data/gym/download.py
+++ b/tools/data/gym/download.py
@@ -3,13 +3,11 @@
 # The code is licensed under the MIT licence.
 import argparse
 import os
+import ssl
 import subprocess
 
 import mmcv
-
-import ssl  # isort:skip
-
-from joblib import Parallel, delayed  # isort:skip
+from joblib import Parallel, delayed
 
 ssl._create_default_https_context = ssl._create_unverified_context
 

From e8da54c0d1bf834d5de854706dfd0064e73ffda1 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Wed, 11 Nov 2020 16:31:37 +0800
Subject: [PATCH 08/15] resolve comments

---
 tools/data/gym/trim_event.py     | 3 +--
 tools/data/gym/trim_subaction.py | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/data/gym/trim_event.py b/tools/data/gym/trim_event.py
index 78e3624a76..9ae22262b5 100644
--- a/tools/data/gym/trim_event.py
+++ b/tools/data/gym/trim_event.py
@@ -17,8 +17,7 @@
 annotation = mmcv.load(anno_file)
 event_annotation = {}
 
-if not osp.exists(event_root):
-    os.makedirs(event_root)
+mmcv.mkdir_or_exist(event_root)
 
 for k, v in annotation.items():
     if k + '.mp4' not in videos:
diff --git a/tools/data/gym/trim_subaction.py b/tools/data/gym/trim_subaction.py
index ec8c1e5711..fa705e97a4 100644
--- a/tools/data/gym/trim_subaction.py
+++ b/tools/data/gym/trim_subaction.py
@@ -15,8 +15,7 @@
 events = set(events)
 annotation = mmcv.load(event_anno_file)
 
-if not osp.exists(subaction_root):
-    os.makedirs(subaction_root)
+mmcv.mkdir_or_exist(subaction_root)
 
 for k, v in annotation.items():
     if k + '.mp4' not in events:

From 1aac9330e7b2adb8e5ca7811ae376f79f70635fa Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Tue, 10 Nov 2020 15:40:06 +0800
Subject: [PATCH 09/15] add gym pipeline

---
 tools/data/gym/download.py             | 101 +++++++++++++++++++++++++
 tools/data/gym/download_annotations.sh |  14 ++++
 tools/data/gym/download_videos.sh      |  13 ++++
 tools/data/gym/environment.yml         |  36 +++++++++
 tools/data/gym/generate_file_list.py   |  26 +++++++
 tools/data/gym/preparing_gym.md        |  95 +++++++++++++++++++++++
 tools/data/gym/trim_event.py           |  58 ++++++++++++++
 tools/data/gym/trim_subaction.py       |  52 +++++++++++++
 8 files changed, 395 insertions(+)
 create mode 100644 tools/data/gym/download.py
 create mode 100644 tools/data/gym/download_annotations.sh
 create mode 100644 tools/data/gym/download_videos.sh
 create mode 100644 tools/data/gym/environment.yml
 create mode 100644 tools/data/gym/generate_file_list.py
 create mode 100644 tools/data/gym/preparing_gym.md
 create mode 100644 tools/data/gym/trim_event.py
 create mode 100644 tools/data/gym/trim_subaction.py

diff --git a/tools/data/gym/download.py b/tools/data/gym/download.py
new file mode 100644
index 0000000000..14631ce1bc
--- /dev/null
+++ b/tools/data/gym/download.py
@@ -0,0 +1,101 @@
+# This scripts is copied from
+# https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py  # noqa: E501
+# The code is licensed under the MIT licence.
+import argparse
+import os
+import subprocess
+
+import mmcv
+
+import ssl  # isort:skip
+
+from joblib import Parallel, delayed  # isort:skip
+
+ssl._create_default_https_context = ssl._create_unverified_context
+
+
+def download(video_identifier,
+             output_filename,
+             num_attempts=5,
+             url_base='https://www.youtube.com/watch?v='):
+    """Download a video from youtube if exists and is not blocked.
+    arguments:
+    ---------
+    video_identifier: str
+        Unique YouTube video identifier (11 characters)
+    output_filename: str
+        File path where the video will be stored.
+    """
+    # Defensive argument checking.
+    assert isinstance(video_identifier, str), 'video_identifier must be string'
+    assert isinstance(output_filename, str), 'output_filename must be string'
+    assert len(video_identifier) == 11, 'video_identifier must have length 11'
+
+    status = False
+
+    if not os.path.exists(output_filename):
+        command = [
+            'youtube-dl', '--quiet', '--no-warnings', '--no-check-certificate',
+            '-f', 'mp4', '-o',
+            '"%s"' % output_filename,
+            '"%s"' % (url_base + video_identifier)
+        ]
+        command = ' '.join(command)
+        print(command)
+        attempts = 0
+        while True:
+            try:
+                subprocess.check_output(
+                    command, shell=True, stderr=subprocess.STDOUT)
+            except subprocess.CalledProcessError:
+                attempts += 1
+                if attempts == num_attempts:
+                    return status, 'Fail'
+            else:
+                break
+    # Check if the video was successfully saved.
+    status = os.path.exists(output_filename)
+    return status, 'Downloaded'
+
+
+def download_wrapper(youtube_id, output_dir):
+    """Wrapper for parallel processing purposes."""
+    # we do this to align with names in annotations
+    output_filename = os.path.join(output_dir, youtube_id + '.mp4')
+    if os.path.exists(output_filename):
+        status = tuple([youtube_id, True, 'Exists'])
+        return status
+
+    downloaded, log = download(youtube_id, output_filename)
+    status = tuple([youtube_id, downloaded, log])
+    return status
+
+
+def main(input, output_dir, num_jobs=24):
+    # Reading and parsing ActivityNet.
+    youtube_ids = mmcv.load(input).keys()
+    # Creates folders where videos will be saved later.
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Download all clips.
+    if num_jobs == 1:
+        status_list = []
+        for index in youtube_ids:
+            status_list.append(download_wrapper(index, output_dir))
+    else:
+        status_list = Parallel(n_jobs=num_jobs)(
+            delayed(download_wrapper)(index, output_dir)
+            for index in youtube_ids)
+
+    # Save download report.
+    mmcv.dump(status_list, 'download_report.json')
+
+
+if __name__ == '__main__':
+    description = 'Helper script for downloading GYM videos.'
+    p = argparse.ArgumentParser(description=description)
+    p.add_argument('input', type=str, help='The gym annotation file')
+    p.add_argument(
+        'output_dir', type=str, help='Output directory to save videos.')
+    p.add_argument('-n', '--num-jobs', type=int, default=24)
+    main(**vars(p.parse_args()))
diff --git a/tools/data/gym/download_annotations.sh b/tools/data/gym/download_annotations.sh
new file mode 100644
index 0000000000..4922104995
--- /dev/null
+++ b/tools/data/gym/download_annotations.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+set -e
+
+DATA_DIR="../../../data/gym/annotations"
+
+if [[ ! -d "${DATA_DIR}" ]]; then
+  echo "${DATA_DIR} does not exist. Creating";
+  mkdir -p ${DATA_DIR}
+fi
+
+wget https://sdolivia.github.io/FineGym/resources/dataset/finegym_annotation_info_v1.0.json -O $DATA_DIR/annotation.json
+wget https://sdolivia.github.io/FineGym/resources/dataset/gym99_train_element_v1.0.txt -O $DATA_DIR/gym99_train_org.txt
+wget https://sdolivia.github.io/FineGym/resources/dataset/gym99_val_element.txt -O $DATA_DIR/gym99_val_org.txt
diff --git a/tools/data/gym/download_videos.sh b/tools/data/gym/download_videos.sh
new file mode 100644
index 0000000000..2788a4265d
--- /dev/null
+++ b/tools/data/gym/download_videos.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# set up environment
+conda env create -f environment.yml
+source activate gym
+pip install --upgrade youtube-dl
+
+DATA_DIR="../../../data/gym"
+ANNO_DIR="../../../data/gym/annotations"
+python download.py ${ANNO_DIR}/annotation.json ${DATA_DIR}/videos
+
+source deactivate gym
+conda remove -n gym --all
diff --git a/tools/data/gym/environment.yml b/tools/data/gym/environment.yml
new file mode 100644
index 0000000000..86e7e1a24c
--- /dev/null
+++ b/tools/data/gym/environment.yml
@@ -0,0 +1,36 @@
+name: kinetics
+channels:
+  - anaconda
+  - menpo
+  - conda-forge
+  - defaults
+dependencies:
+  - ca-certificates=2020.1.1
+  - certifi=2020.4.5.1
+  - ffmpeg=2.8.6
+  - libcxx=10.0.0
+  - libedit=3.1.20181209
+  - libffi=3.3
+  - ncurses=6.2
+  - openssl=1.1.1g
+  - pip=20.0.2
+  - python=3.7.7
+  - readline=8.0
+  - setuptools=46.4.0
+  - sqlite=3.31.1
+  - tk=8.6.8
+  - wheel=0.34.2
+  - xz=5.2.5
+  - zlib=1.2.11
+  - pip:
+    - decorator==4.4.2
+    - intel-openmp==2019.0
+    - joblib==0.15.1
+    - mkl==2019.0
+    - numpy==1.18.4
+    - olefile==0.46
+    - pandas==1.0.3
+    - python-dateutil==2.8.1
+    - pytz==2020.1
+    - six==1.14.0
+    - youtube-dl==2020.5.8
diff --git a/tools/data/gym/generate_file_list.py b/tools/data/gym/generate_file_list.py
new file mode 100644
index 0000000000..26b8c1ab20
--- /dev/null
+++ b/tools/data/gym/generate_file_list.py
@@ -0,0 +1,26 @@
+import os
+import os.path as osp
+
+annotation_root = '../../../data/gym/annotations'
+data_root = '../../../data/gym/subactions'
+videos = os.listdir(data_root)
+videos = set(videos)
+
+train_file_org = osp.join(annotation_root, 'gym99_train_org.txt')
+val_file_org = osp.join(annotation_root, 'gym99_val_org.txt')
+train_file = osp.join(annotation_root, 'gym99_train.txt')
+val_file = osp.join(annotation_root, 'gym99_val.txt')
+
+train_org = open(train_file_org).readlines()
+train_org = [x.strip().split() for x in train_org]
+train = [x for x in train_org if x[0] + '.mp4' in videos]
+train = [x[0] + '.mp4 ' + x[1] for x in train]
+with open(train_file, 'w') as fout:
+    fout.write('\n'.join(train_org))
+
+val_org = open(val_file_org).readlines()
+val_org = [x.strip().split() for x in val_org]
+val = [x for x in val_org if x[0] + '.mp4' in videos]
+val = [x[0] + '.mp4 ' + x[1] for x in val]
+with open(val_file, 'w') as fout:
+    fout.write('\n'.join(val_org))
diff --git a/tools/data/gym/preparing_gym.md b/tools/data/gym/preparing_gym.md
new file mode 100644
index 0000000000..d93a91b11d
--- /dev/null
+++ b/tools/data/gym/preparing_gym.md
@@ -0,0 +1,95 @@
+# Preparing GYM
+
+## Introduction
+
+```
+@inproceedings{shao2020finegym,
+  title={Finegym: A hierarchical video dataset for fine-grained action understanding},
+  author={Shao, Dian and Zhao, Yue and Dai, Bo and Lin, Dahua},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={2616--2625},
+  year={2020}
+}
+```
+
+For basic dataset information, please refer to the official [project](https://sdolivia.github.io/FineGym/) and the [paper](https://arxiv.org/abs/2004.06704).
+We currently provide the data pre-processing pipeline for GYM99.
+Before we start, please make sure that the directory is located at `$MMACTION2/tools/data/gym/`.
+
+## Step 1. Prepare Annotations
+
+First of all, you can run the following script to prepare annotations.
+
+```shell
+bash download_annotations.sh
+```
+
+## Step 2. Prepare Videos
+
+Then, you can run the following script to prepare videos.
+The codes are adapted from the [official crawler](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics). Note that this might take a long time.
+
+```shell
+bash download_videos.sh
+```
+
+## Step 3. Trim Videos into Events.
+
+First, you need to trim long videos into events based on the annotation of GYM with the following scripts.
+
+```shell
+python trim_event.py
+```
+
+## Step 4. Trim Events into Subactions.
+
+Then, you need to trim events into subactions based on the annotation of GYM with the following scripts. We use the two stage trimming for better efficiency (trimming multiple short clips from a long video can be extremely inefficient, since you need to go over the video many times).
+
+```shell
+python trim_subaction.py
+```
+
+## Step 5. Generate file list for GYM99 based on extracted subactions.
+
+You can use the following script to generate train / val lists for GYM99.
+
+```shell
+python generate_file_list.py
+```
+
+
+## Step 6. Folder Structure
+
+After the whole data pipeline for GYM preparation. You can get the subaction clips, event clips, raw videos and GYM99 train/val lists.
+
+In the context of the whole project (for GYM only), the full folder structure will look like:
+
+```
+mmaction2
+├── mmaction
+├── tools
+├── configs
+├── data
+│   ├── gym
+|   |   ├── annotations
+|   |   |   ├── gym99_train_org.txt
+|   |   |   ├── gym99_val_org.txt
+|   |   |   ├── gym99_train.txt
+|   |   |   ├── gym99_val.txt
+|   |   |   ├── annotation.json
+|   |   |   └── event_annotation.json
+│   │   ├── videos
+|   |   |   ├── 0LtLS9wROrk.mp4
+|   |   |   ├── ...
+|   |   |   └── zfqS-wCJSsw.mp4
+│   │   ├── events
+|   |   |   ├── 0LtLS9wROrk_E_002407_002435.mp4
+|   |   |   ├── ...
+|   |   |   └── zfqS-wCJSsw_E_006732_006824.mp4
+│   │   └── subactions
+|   |       ├── 0LtLS9wROrk_E_002407_002435_A_0003_0005.mp4
+|   |       ├── ...
+|   |       └── zfqS-wCJSsw_E_006244_006252_A_0000_0007.mp4
+```
+
+For training and evaluating on GYM, please refer to [getting_started](/docs/getting_started.md).
diff --git a/tools/data/gym/trim_event.py b/tools/data/gym/trim_event.py
new file mode 100644
index 0000000000..8903829c19
--- /dev/null
+++ b/tools/data/gym/trim_event.py
@@ -0,0 +1,58 @@
+import os
+import os.path as osp
+import subprocess
+
+import mmcv
+
+data_root = '../../../data/gym'
+video_root = f'{data_root}/videos'
+anno_root = f'{data_root}/annotations'
+anno_file = f'{anno_root}/annotation.json'
+
+event_anno_file = f'{anno_root}/event_annotation.json'
+event_root = f'{data_root}/events'
+
+videos = os.listdir(video_root)
+videos = set(videos)
+annotation = mmcv.load(anno_file)
+event_annotation = {}
+
+if not osp.exists(event_root):
+    os.makedirs(event_root)
+
+for k, v in annotation.items():
+    if k + '.mp4' not in videos:
+        print(f'video {k} has not been downloaded')
+        continue
+
+    video_path = osp.join(video_root, k + '.mp4')
+
+    for event_id, event_anno in v.items():
+        timestamps = event_anno['timestamps'][0]
+        start_time, end_time = timestamps
+        event_name = k + '_' + event_id
+
+        output_filename = event_name + '.mp4'
+
+        command = [
+            'ffmpeg', '-i',
+            '"%s"' % video_path, '-ss',
+            str(start_time), '-t',
+            str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy',
+            '-threads', '1', '-loglevel', 'panic',
+            '"%s"' % osp.join(event_root, output_filename)
+        ]
+        command = ' '.join(command)
+        try:
+            subprocess.check_output(
+                command, shell=True, stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError:
+            print(
+                f'Trimming of the Event {event_name} of Video {k} Failed',
+                flush=True)
+
+        segments = event_anno['segments']
+        if segments is not None:
+            event_annotation[event_name] = segments
+
+mmcv.dump(event_annotation, event_anno_file)
diff --git a/tools/data/gym/trim_subaction.py b/tools/data/gym/trim_subaction.py
new file mode 100644
index 0000000000..311d04b46b
--- /dev/null
+++ b/tools/data/gym/trim_subaction.py
@@ -0,0 +1,52 @@
+import os
+import os.path as osp
+import subprocess
+
+import mmcv
+
+data_root = '../../../data/gym'
+anno_root = f'{data_root}/annotations'
+
+event_anno_file = f'{anno_root}/event_annotation.json'
+event_root = f'{data_root}/events'
+subaction_root = f'{data_root}/subactions'
+
+events = os.listdir(event_root)
+events = set(events)
+annotation = mmcv.load(event_anno_file)
+
+if not osp.exists(subaction_root):
+    os.makedirs(subaction_root)
+
+for k, v in annotation.items():
+    if k + '.mp4' not in events:
+        print(f'video {k[:11]} has not been downloaded '
+              f'or the event clip {k} not generated')
+        continue
+
+    video_path = osp.join(event_root, k + '.mp4')
+
+    for subaction_id, subaction_anno in v.items():
+        timestamps = subaction_anno['timestamps']
+        start_time, end_time = timestamps[0][0], timestamps[-1][1]
+        subaction_name = k + '_' + subaction_id
+
+        output_filename = subaction_name + '.mp4'
+
+        command = [
+            'ffmpeg', '-i',
+            '"%s"' % video_path, '-ss',
+            str(start_time), '-t',
+            str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy',
+            '-threads', '1', '-loglevel', 'panic',
+            '"%s"' % osp.join(subaction_root, output_filename)
+        ]
+        command = ' '.join(command)
+        try:
+            subprocess.check_output(
+                command, shell=True, stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError:
+            print(
+                f'Trimming of the Subaction {subaction_name} of Event '
+                f'{k} Failed',
+                flush=True)

From 394343ce3b178b81f8e3e88f177080c99a901759 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Tue, 10 Nov 2020 16:06:53 +0800
Subject: [PATCH 10/15] use 8-thread instead of 1 for performance

---
 tools/data/gym/trim_event.py     | 2 +-
 tools/data/gym/trim_subaction.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/data/gym/trim_event.py b/tools/data/gym/trim_event.py
index 8903829c19..78e3624a76 100644
--- a/tools/data/gym/trim_event.py
+++ b/tools/data/gym/trim_event.py
@@ -39,7 +39,7 @@
             '"%s"' % video_path, '-ss',
             str(start_time), '-t',
             str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy',
-            '-threads', '1', '-loglevel', 'panic',
+            '-threads', '8', '-loglevel', 'panic',
             '"%s"' % osp.join(event_root, output_filename)
         ]
         command = ' '.join(command)
diff --git a/tools/data/gym/trim_subaction.py b/tools/data/gym/trim_subaction.py
index 311d04b46b..ec8c1e5711 100644
--- a/tools/data/gym/trim_subaction.py
+++ b/tools/data/gym/trim_subaction.py
@@ -38,7 +38,7 @@
             '"%s"' % video_path, '-ss',
             str(start_time), '-t',
             str(end_time - start_time), '-c:v', 'libx264', '-c:a', 'copy',
-            '-threads', '1', '-loglevel', 'panic',
+            '-threads', '8', '-loglevel', 'panic',
             '"%s"' % osp.join(subaction_root, output_filename)
         ]
         command = ' '.join(command)

From 8629e5daca9e2fe9ab748eba15452349aa73542f Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Tue, 10 Nov 2020 16:48:54 +0800
Subject: [PATCH 11/15] update

---
 tools/data/gym/generate_file_list.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/data/gym/generate_file_list.py b/tools/data/gym/generate_file_list.py
index 26b8c1ab20..54c0c21f5c 100644
--- a/tools/data/gym/generate_file_list.py
+++ b/tools/data/gym/generate_file_list.py
@@ -16,11 +16,11 @@
 train = [x for x in train_org if x[0] + '.mp4' in videos]
 train = [x[0] + '.mp4 ' + x[1] for x in train]
 with open(train_file, 'w') as fout:
-    fout.write('\n'.join(train_org))
+    fout.write('\n'.join(train))
 
 val_org = open(val_file_org).readlines()
 val_org = [x.strip().split() for x in val_org]
 val = [x for x in val_org if x[0] + '.mp4' in videos]
 val = [x[0] + '.mp4 ' + x[1] for x in val]
 with open(val_file, 'w') as fout:
-    fout.write('\n'.join(val_org))
+    fout.write('\n'.join(val))

From 94127a189bec9fb07b7a6eeebfe9ab6c6c48fec5 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Tue, 10 Nov 2020 20:26:47 +0800
Subject: [PATCH 12/15] add frame extraction, since we will also add flow
 models

---
 tools/data/gym/extract_frames.sh     |  7 +++++++
 tools/data/gym/generate_file_list.py | 22 ++++++++++++++++++++++
 tools/data/gym/preparing_gym.md      | 17 ++++++++++++++---
 3 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 tools/data/gym/extract_frames.sh

diff --git a/tools/data/gym/extract_frames.sh b/tools/data/gym/extract_frames.sh
new file mode 100644
index 0000000000..cfcc8c044d
--- /dev/null
+++ b/tools/data/gym/extract_frames.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+cd ../
+python build_rawframes.py ../../data/gym/subactions/ ../../data/gym/subaction_frames/ --level 1 --flow-type tvl1 --ext mp4 --task both --new-short 256
+echo "Raw frames (RGB and tv-l1) Generated"
+
+cd gym/
diff --git a/tools/data/gym/generate_file_list.py b/tools/data/gym/generate_file_list.py
index 54c0c21f5c..bbbaeae209 100644
--- a/tools/data/gym/generate_file_list.py
+++ b/tools/data/gym/generate_file_list.py
@@ -3,6 +3,8 @@
 
 annotation_root = '../../../data/gym/annotations'
 data_root = '../../../data/gym/subactions'
+frame_data_root = '../../../data/gym/subaction_frames'
+
 videos = os.listdir(data_root)
 videos = set(videos)
 
@@ -10,10 +12,21 @@
 val_file_org = osp.join(annotation_root, 'gym99_val_org.txt')
 train_file = osp.join(annotation_root, 'gym99_train.txt')
 val_file = osp.join(annotation_root, 'gym99_val.txt')
+train_frame_file = osp.join(annotation_root, 'gym99_train_frame.txt')
+val_frame_file = osp.join(annotation_root, 'gym99_val_frame.txt')
 
 train_org = open(train_file_org).readlines()
 train_org = [x.strip().split() for x in train_org]
 train = [x for x in train_org if x[0] + '.mp4' in videos]
+if osp.exists(frame_data_root):
+    train_frames = []
+    for line in train:
+        length = len(os.listdir(osp.join(frame_data_root, line[0])))
+        train_frames.append([line[0], str(length // 3), line[1]])
+    train_frames = [' '.join(x) for x in train_frames]
+    with open(train_frame_file, 'w') as fout:
+        fout.write('\n'.join(train_frames))
+
 train = [x[0] + '.mp4 ' + x[1] for x in train]
 with open(train_file, 'w') as fout:
     fout.write('\n'.join(train))
@@ -21,6 +34,15 @@
 val_org = open(val_file_org).readlines()
 val_org = [x.strip().split() for x in val_org]
 val = [x for x in val_org if x[0] + '.mp4' in videos]
+if osp.exists(frame_data_root):
+    val_frames = []
+    for line in val:
+        length = len(os.listdir(osp.join(frame_data_root, line[0])))
+        val_frames.append([line[0], str(length // 3), line[1]])
+    val_frames = [' '.join(x) for x in val_frames]
+    with open(val_frame_file, 'w') as fout:
+        fout.write('\n'.join(val_frames))
+
 val = [x[0] + '.mp4 ' + x[1] for x in val]
 with open(val_file, 'w') as fout:
     fout.write('\n'.join(val))
diff --git a/tools/data/gym/preparing_gym.md b/tools/data/gym/preparing_gym.md
index d93a91b11d..a86c855da0 100644
--- a/tools/data/gym/preparing_gym.md
+++ b/tools/data/gym/preparing_gym.md
@@ -49,7 +49,19 @@ Then, you need to trim events into subactions based on the annotation of GYM wit
 python trim_subaction.py
 ```
 
-## Step 5. Generate file list for GYM99 based on extracted subactions.
+## Step 5. Extract RGB and Flow
+
+This part is **optional** if you only want to use the video loader for RGB model training.
+
+Before extracting, please refer to [install.md](/docs/install.md) for installing [denseflow](https://github.com/open-mmlab/denseflow).
+
+Run the following script to extract both rgb and flow using "tvl1" algorithm.
+
+```shell
+bash extract_frames.sh
+```
+
+## Step 6. Generate file list for GYM99 based on extracted subactions.
 
 You can use the following script to generate train / val lists for GYM99.
 
@@ -57,8 +69,7 @@ You can use the following script to generate train / val lists for GYM99.
 python generate_file_list.py
 ```
 
-
-## Step 6. Folder Structure
+## Step 7. Folder Structure
 
 After the whole data pipeline for GYM preparation. You can get the subaction clips, event clips, raw videos and GYM99 train/val lists.
 

From e4e01c9508cdae606190905076dfed623fedcc5c Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Wed, 11 Nov 2020 10:35:10 +0800
Subject: [PATCH 13/15] resolve comments

---
 tools/data/activitynet/download.py | 6 ++----
 tools/data/gym/download.py         | 6 ++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/tools/data/activitynet/download.py b/tools/data/activitynet/download.py
index e4d9ba7902..f767863918 100644
--- a/tools/data/activitynet/download.py
+++ b/tools/data/activitynet/download.py
@@ -2,13 +2,11 @@
 # https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py  # noqa: E501
 # The code is licensed under the MIT licence.
 import os
+import ssl
 import subprocess
 
 import mmcv
-
-import ssl  # isort:skip
-
-from joblib import Parallel, delayed  # isort:skip
+from joblib import Parallel, delayed
 
 ssl._create_default_https_context = ssl._create_unverified_context
 data_file = '../../../data/ActivityNet'
diff --git a/tools/data/gym/download.py b/tools/data/gym/download.py
index 14631ce1bc..51b54c24a8 100644
--- a/tools/data/gym/download.py
+++ b/tools/data/gym/download.py
@@ -3,13 +3,11 @@
 # The code is licensed under the MIT licence.
 import argparse
 import os
+import ssl
 import subprocess
 
 import mmcv
-
-import ssl  # isort:skip
-
-from joblib import Parallel, delayed  # isort:skip
+from joblib import Parallel, delayed
 
 ssl._create_default_https_context = ssl._create_unverified_context
 

From f0badb1073972461ee3f4c1cf9c2620b70ba88f6 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Wed, 11 Nov 2020 16:31:37 +0800
Subject: [PATCH 14/15] resolve comments

---
 tools/data/gym/trim_event.py     | 3 +--
 tools/data/gym/trim_subaction.py | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/data/gym/trim_event.py b/tools/data/gym/trim_event.py
index 78e3624a76..9ae22262b5 100644
--- a/tools/data/gym/trim_event.py
+++ b/tools/data/gym/trim_event.py
@@ -17,8 +17,7 @@
 annotation = mmcv.load(anno_file)
 event_annotation = {}
 
-if not osp.exists(event_root):
-    os.makedirs(event_root)
+mmcv.mkdir_or_exist(event_root)
 
 for k, v in annotation.items():
     if k + '.mp4' not in videos:
diff --git a/tools/data/gym/trim_subaction.py b/tools/data/gym/trim_subaction.py
index ec8c1e5711..fa705e97a4 100644
--- a/tools/data/gym/trim_subaction.py
+++ b/tools/data/gym/trim_subaction.py
@@ -15,8 +15,7 @@
 events = set(events)
 annotation = mmcv.load(event_anno_file)
 
-if not osp.exists(subaction_root):
-    os.makedirs(subaction_root)
+mmcv.mkdir_or_exist(subaction_root)
 
 for k, v in annotation.items():
     if k + '.mp4' not in events:

From 4859d4a4855c9ceda893cf61dca4ba6abb9fd6a4 Mon Sep 17 00:00:00 2001
From: HaodongDuan <duanhaodong@sensetime.com>
Date: Wed, 11 Nov 2020 16:41:46 +0800
Subject: [PATCH 15/15] update changelog for this PR and some previous ones

---
 docs/changelog.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/changelog.md b/docs/changelog.md
index 51e5ad06a1..e5a43c10e1 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -6,17 +6,22 @@
 
 **New Features**
 - Automatically add modelzoo statistics to readthedocs ([#327](https://github.com/open-mmlab/mmaction2/pull/327))
+- Support GYM99 data preparation ([#331](https://github.com/open-mmlab/mmaction2/pull/331))
 
 **Improvements**
 - Support PyTorch 1.7 in CI ([#312](https://github.com/open-mmlab/mmaction2/pull/312))
 - Add random seed for building filelists ([#323](https://github.com/open-mmlab/mmaction2/pull/323))
 - Move docs about demo to `demo/README.md` ([#329](https://github.com/open-mmlab/mmaction2/pull/329))
+- Remove redundant code in `tools/test.py` ([#310](https://github.com/open-mmlab/mmaction2/pull/310))
 
 **Bug Fixes**
 - Fix a bug in BaseDataset when `data_prefix` is None ([#314](https://github.com/open-mmlab/mmaction2/pull/314))
 - Fix the bug of HVU object `num_classes` from 1679 to 1678 ([#307](https://github.com/open-mmlab/mmaction2/pull/307))
+- Fix OmniSource training configs ([#321](https://github.com/open-mmlab/mmaction2/pull/321))
+- Fix Issue #306: Bug of SampleAVAFrames ([#317](https://github.com/open-mmlab/mmaction2/pull/317))
 
 **ModelZoo**
+- Add SlowOnly model for GYM99, both RGB and Flow ([#336](https://github.com/open-mmlab/mmaction2/pull/336))
 
 ### v0.8.0 (31/10/2020)