Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/source/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,11 @@ USPS
.. autoclass:: USPS
:members: __getitem__
:special-members:


Kinetics-400
~~~~~

.. autoclass:: Kinetics400
:members: __getitem__
:special-members:
4 changes: 2 additions & 2 deletions references/video_classification/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def main(args):
if args.distributed:
print("It is recommended to pre-compute the dataset cache "
"on a single-gpu first, as it will be faster")
dataset = torchvision.datasets.KineticsVideo(
dataset = torchvision.datasets.Kinetics400(
traindir,
frames_per_clip=args.clip_len,
step_between_clips=1,
Expand Down Expand Up @@ -171,7 +171,7 @@ def main(args):
if args.distributed:
print("It is recommended to pre-compute the dataset cache "
"on a single-gpu first, as it will be faster")
dataset_test = torchvision.datasets.KineticsVideo(
dataset_test = torchvision.datasets.Kinetics400(
valdir,
frames_per_clip=args.clip_len,
step_between_clips=1,
Expand Down
4 changes: 2 additions & 2 deletions torchvision/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .sbd import SBDataset
from .vision import VisionDataset
from .usps import USPS
from .kinetics import KineticsVideo
from .kinetics import Kinetics400
from .hmdb51 import HMDB51
from .ucf101 import UCF101

Expand All @@ -31,4 +31,4 @@
'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k',
'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet',
'Caltech101', 'Caltech256', 'CelebA', 'SBDataset', 'VisionDataset',
'USPS', 'KineticsVideo', 'HMDB51', 'UCF101')
'USPS', 'Kinetics400', 'HMDB51', 'UCF101')
35 changes: 33 additions & 2 deletions torchvision/datasets/kinetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,40 @@
from .vision import VisionDataset


class KineticsVideo(VisionDataset):
class Kinetics400(VisionDataset):
"""
`Kinetics-400 <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>`_
dataset.

Kinetics-400 is an action recognition video dataset.
This dataset consider every video as a collection of video clips of fixed size, specified
by ``frames_per_clip``, where the step in frames between each clip is given by
``step_between_clips``.

To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
elements will come from video 1, and the next three elements from video 2.
Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
frames in a video might be present.

Internally, it uses a VideoClips object to handle clip creation.

Args:
root (string): Root directory of the Kinetics-400 Dataset.
frames_per_clip (int): number of frames in a clip
step_between_clips (int): number of frames between each clip
transform (callable, optional): A function/transform that takes in a TxHxWxC video
and returns a transformed version.

Returns:
video (Tensor[T, H, W, C]): the `T` video frames
audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
and `L` is the number of points
label (int): class of the video clip
"""

def __init__(self, root, frames_per_clip, step_between_clips=1, transform=None):
super(KineticsVideo, self).__init__(root)
super(Kinetics400, self).__init__(root)
extensions = ('avi',)

classes = list(sorted(list_dir(root)))
Expand Down