Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ANet datatools #190

Merged
merged 15 commits into from
Sep 21, 2020
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
filename_tmpl='flow_{}_{:05d}.jpg',
with_offset=True,
modality='Flow',
start_index=0,
innerlee marked this conversation as resolved.
Show resolved Hide resolved
pipeline=train_pipeline),
val=dict(
type=dataset_type,
Expand All @@ -88,6 +89,7 @@
filename_tmpl='flow_{}_{:05d}.jpg',
with_offset=True,
modality='Flow',
start_index=0,
pipeline=val_pipeline),
test=dict(
type=dataset_type,
Expand All @@ -96,6 +98,7 @@
filename_tmpl='flow_{}_{:05d}.jpg',
with_offset=True,
modality='Flow',
start_index=0,
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
filename_tmpl='flow_{}_{:05d}.jpg',
with_offset=True,
modality='Flow',
start_index=0,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
Expand All @@ -88,6 +89,7 @@
filename_tmpl='flow_{}_{:05d}.jpg',
with_offset=True,
modality='Flow',
start_index=0,
pipeline=val_pipeline),
test=dict(
type=dataset_type,
Expand All @@ -96,6 +98,7 @@
filename_tmpl='flow_{}_{:05d}.jpg',
with_offset=True,
modality='Flow',
start_index=0,
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,20 +79,23 @@
data_prefix=data_root,
pipeline=train_pipeline,
with_offset=True,
start_index=0,
filename_tmpl='image_{:05d}.jpg'),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
data_prefix=data_root_val,
pipeline=val_pipeline,
with_offset=True,
start_index=0,
filename_tmpl='image_{:05d}.jpg'),
test=dict(
type=dataset_type,
ann_file=ann_file_test,
data_prefix=data_root_val,
pipeline=test_pipeline,
with_offset=True,
start_index=0,
filename_tmpl='image_{:05d}.jpg'))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,20 +79,23 @@
data_prefix=data_root,
pipeline=train_pipeline,
with_offset=True,
start_index=0,
filename_tmpl='image_{:05d}.jpg'),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
data_prefix=data_root_val,
pipeline=val_pipeline,
with_offset=True,
start_index=0,
filename_tmpl='image_{:05d}.jpg'),
test=dict(
type=dataset_type,
ann_file=ann_file_test,
data_prefix=data_root_val,
pipeline=test_pipeline,
with_offset=True,
start_index=0,
filename_tmpl='image_{:05d}.jpg'))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
Expand Down
124 changes: 124 additions & 0 deletions tools/data/activitynet/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# This scripts is copied from
# https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py # noqa: E501
kennymckormick marked this conversation as resolved.
Show resolved Hide resolved
import os
import subprocess

import mmcv

import ssl # isort:skip

from joblib import Parallel, delayed # isort:skip

ssl._create_default_https_context = ssl._create_unverified_context
data_file = '../../../data/ActivityNet'
video_list = f'{data_file}/video_info_new.csv'
anno_file = f'{data_file}/anet_anno_action.json'
output_dir = f'{data_file}/videos'


def download_clip(video_identifier,
output_filename,
num_attempts=5,
url_base='https://www.youtube.com/watch?v='):
"""Download a video from youtube if exists and is not blocked.
arguments:
---------
video_identifier: str
Unique YouTube video identifier (11 characters)
output_filename: str
File path where the video will be stored.
"""
# Defensive argument checking.
assert isinstance(video_identifier, str), 'video_identifier must be string'
assert isinstance(output_filename, str), 'output_filename must be string'
assert len(video_identifier) == 11, 'video_identifier must have length 11'

status = False

if not os.path.exists(output_filename):
command = [
'youtube-dl', '--quiet', '--no-warnings', '--no-check-certificate',
'-f', 'mp4', '-o',
'"%s"' % output_filename,
'"%s"' % (url_base + video_identifier)
]
command = ' '.join(command)
print(command)
attempts = 0
while True:
try:
subprocess.check_output(
command, shell=True, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
attempts += 1
if attempts == num_attempts:
return status, 'Fail'
else:
break
# Check if the video was successfully saved.
status = os.path.exists(output_filename)
return status, 'Downloaded'


def download_clip_wrapper(youtube_id, output_dir):
"""Wrapper for parallel processing purposes."""
# we do this to align with names in annotations
output_filename = os.path.join(output_dir, 'v_' + youtube_id + '.mp4')
if os.path.exists(output_filename):
status = tuple(['v_' + youtube_id, True, 'Exists'])
return status

downloaded, log = download_clip(youtube_id, output_filename)
status = tuple(['v_' + youtube_id, downloaded, log])
return status


def parse_activitynet_annotations(input_csv):
"""Returns a list of YoutubeID.
arguments:
---------
input_csv: str
Path to CSV file containing the following columns:
'video,numFrame,seconds,fps,rfps,subset,featureFrame'
returns:
-------
youtube_ids: list
List of all YoutubeIDs in ActivityNet.

"""
lines = open(input_csv).readlines()
lines = lines[1:]
# YoutubeIDs do not have prefix `v_`
youtube_ids = [x.split(',')[0][2:] for x in lines]
return youtube_ids


def main(input_csv, output_dir, anno_file, num_jobs=24):
# Reading and parsing ActivityNet.
youtube_ids = parse_activitynet_annotations(input_csv)

# Creates folders where videos will be saved later.
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Download all clips.
if num_jobs == 1:
status_lst = []
kennymckormick marked this conversation as resolved.
Show resolved Hide resolved
for index in youtube_ids:
status_lst.append(download_clip_wrapper(index, output_dir))
else:
status_lst = Parallel(n_jobs=num_jobs)(
delayed(download_clip_wrapper)(index, output_dir)
for index in youtube_ids)

# Save download report.
mmcv.dump(status_lst, 'download_report.json')
annotation = mmcv.load(anno_file)
downloaded = {status[0]: status[1] for status in status_lst}
annotation = {k: v for k, v in annotation.items() if downloaded[k]}
anno_file_bak = anno_file.replace('.json', '_bak.json')
os.system(f'mv {anno_file} {anno_file_bak}')
mmcv.dump(annotation, anno_file)


if __name__ == '__main__':
main(video_list, output_dir, anno_file, 24)
12 changes: 12 additions & 0 deletions tools/data/activitynet/download_videos.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash

# set up environment
conda env create -f environment.yml
source activate activitynet
pip install --upgrade youtube-dl

DATA_DIR="../../../data/ActivityNet"
python download.py

source deactivate activitynet
conda remove -n activitynet --all
36 changes: 36 additions & 0 deletions tools/data/activitynet/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: activitynet
channels:
- anaconda
- menpo
- conda-forge
- defaults
dependencies:
- ca-certificates=2020.1.1
- certifi=2020.4.5.1
- ffmpeg=2.8.6
- libcxx=10.0.0
- libedit=3.1.20181209
- libffi=3.3
- ncurses=6.2
- openssl=1.1.1g
- pip=20.0.2
- python=3.7.7
- readline=8.0
- setuptools=46.4.0
- sqlite=3.31.1
- tk=8.6.8
- wheel=0.34.2
- xz=5.2.5
- zlib=1.2.11
- pip:
- decorator==4.4.2
- intel-openmp==2019.0
- joblib==0.15.1
- mkl==2019.0
- numpy==1.18.4
- olefile==0.46
- pandas==1.0.3
- python-dateutil==2.8.1
- pytz==2020.1
- six==1.14.0
- youtube-dl==2020.5.8
6 changes: 6 additions & 0 deletions tools/data/activitynet/extract_frames.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash
cd ../
python build_rawframes.py ../../data/ActivityNet/videos/ ../../data/ActivityNet/rawframes/ --level 1 --flow-type tvl1 --ext mp4 --task both --new-width 340 --new-height 256
echo "Raw frames (RGB and tv-l1) Generated for train set"

cd activitynet/
24 changes: 20 additions & 4 deletions tools/data/activitynet/preparing_activitynet.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Preparing ActivityNet

For basic dataset information, please refer to the official [website](http://activity-net.org/).
Here, we use the ActivityNet rescaled feature provided in this [repo](https://github.com/wzmsltw/BSN-boundary-sensitive-network#code-and-data-preparation).
For action detection, you can either use the ActivityNet rescaled feature provided in this [repo](https://github.com/wzmsltw/BSN-boundary-sensitive-network#code-and-data-preparation) or extract feature with mmaction2 (which has better performance).
We release both pipeline.
Before we start, please make sure that current working directory is `$MMACTION2/tools/data/activitynet/`.

## Step 1. Download Annotations
Expand All @@ -10,21 +11,36 @@ First of all, you can run the following script to download annotation files.
bash download_annotations.sh
```

## Step 2. Prepare Videos Features
## Option 1: Use the ActivityNet rescaled feature provided in this [repo](https://github.com/wzmsltw/BSN-boundary-sensitive-network#code-and-data-preparation)

### Step 2. Prepare Videos Features
Then, you can run the following script to download activitynet features.
```shell
bash download_features.sh
```

## Step 3. Process Annotation Files
### Step 3. Process Annotation Files
Next, you can run the following script to process the downloaded annotation files for training and testing.
It first merges the two annotation files together and then seperates the annoations by `train`, `val` and `test`.

```shell
python process_annotations.py
```

## Step 4. Check Directory Structure
## Option 2: Extract ActivityNet feature using MMAction2.

### Step 2. Prepare Videos.
Then, you can run the following script to prepare videos.
The codes are adapted from the [official crawler](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics). Note that this might take a long time.
Some videos in the ActivityNet dataset might be no longer available on YouTube, so that after video downloading, the downloading scripts update the annotation file to make sure every video in it exists.

```shell
bash download_videos.sh
```

###

## Final Step. Check Directory Structure

After the whole data pipeline for ActivityNet preparation,
you will get the features and annotation files.
Expand Down