In [None]:
# Check nvcc version
!nvcc -V
# Check GCC version
!gcc --version

In [None]:
# install dependencies: (use cu111 because colab has CUDA 11.1)
!pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html

# install mmcv-full thus we could use CUDA operators
!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html

# Install mmaction2
!rm -rf mmaction2
!git clone https://github.com/open-mmlab/mmaction2.git
%cd mmaction2

!pip install -e .

# Install some optional requirements
!pip install -r requirements/optional.txt

In [None]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMAction2 installation
import mmaction
print(mmaction.__version__)

# Check MMCV installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

In [None]:
from mmcv import Config
cfg = Config.fromfile('./configs/recognition/i3d/i3d_r50_video_32x2x1_100e_kinetics400_rgb.py')

from mmcv.runner import set_random_seed

# Modify dataset type and path
cfg.dataset_type = 'VideoDataset'
cfg.data_root = 'ucf_action/train/'
cfg.data_root_val = 'ucf_action/test/'
cfg.ann_file_train = 'ucf_action/train.txt'
cfg.ann_file_val = 'ucf_action/test.txt'
cfg.ann_file_test = 'ucf_action/test.txt'

cfg.data.test.type = 'VideoDataset'
cfg.data.test.ann_file = 'ucf_action/test.txt'
cfg.data.test.data_prefix = 'ucf_action/test/'

cfg.data.train.type = 'VideoDataset'
cfg.data.train.ann_file = 'ucf_action/train.txt'
cfg.data.train.data_prefix = 'ucf_action/train/'

cfg.data.val.type = 'VideoDataset'
cfg.data.val.ann_file = 'ucf_action/test.txt'
cfg.data.val.data_prefix = 'ucf_action/test/'

# The flag is used to determine whether it is omnisource training
cfg.setdefault('omnisource', False)
# Modify num classes of the model in cls_head
cfg.model.cls_head.num_classes = 13
# We can use the pre-trained TSN model
cfg.load_from = './checkpoints/i3d_r50_video_32x2x1_100e_kinetics400_rgb_20200826-e31c6f52.pth'

# Set up working dir to save files and logs.
cfg.work_dir = './tutorial_exps'

cfg.data.videos_per_gpu = 16
cfg.optimizer.lr = 0.001
cfg.total_epochs = 50

# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 5
# We can set the log print interval to reduce the the times of printing log
cfg.log_config.interval = 5

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

# Save the best
cfg.evaluation.save_best='auto'


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')


In [None]:
import os.path as osp

from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.apis import train_model

import mmcv

# Build the dataset
datasets = [build_dataset(cfg.data.train)]

# Build the recognizer
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_model(model, datasets, cfg, distributed=False, validate=True)

In [27]:
from mmaction.apis import single_gpu_test
from mmaction.datasets import build_dataloader
from mmcv.parallel import MMDataParallel

# Build a test dataloader
dataset = build_dataset(cfg.data.test, dict(test_mode=True))
data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)
model = MMDataParallel(model, device_ids=[0])
outputs = single_gpu_test(model, data_loader)

eval_config = cfg.evaluation
eval_config.pop('interval')
eval_res = dataset.evaluate(outputs, **eval_config)
for name, val in eval_res.items():
    print(f'{name}: {val:.04f}')

[                                                  ] 0/33, elapsed: 0s, ETA:



[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 33/33, 0.6 task/s, elapsed: 57s, ETA:     0s
Evaluating top_k_accuracy ...

top1_acc	0.8788
top5_acc	1.0000

Evaluating mean_class_accuracy ...

mean_acc	0.8692
top1_acc: 0.8788
top5_acc: 1.0000
mean_class_accuracy: 0.8692


In [36]:
actual_labels=['Golf-Swing-Back', 'Golf-Swing-Front', 'Kicking-Front', 'Riding-Horse', 'Diving-Side', 'Swing-Bench', 'Kicking-Side', 'Golf-Swing-Side', 'Walk-Front', 'Run-Side', 'Lifting', 'Swing-SideAngle', 'SkateBoarding-Front']

In [37]:
# Result from example video
from IPython.display import HTML
from base64 import b64encode


print(actual_labels[outputs[10].argmax()])


mp4 = open("ucf_action/test/2527-12_70526.mp4",'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)




Swing-Bench


Testing from saved model

In [None]:
from mmcv import Config
cfg = Config.fromfile('./configs/recognition/i3d/i3d_r50_video_32x2x1_100e_kinetics400_rgb.py')

from mmcv.runner import set_random_seed

# Modify dataset type and path
cfg.dataset_type = 'VideoDataset'
cfg.data_root = 'ucf_action/train/'
cfg.data_root_val = 'ucf_action/test/'
cfg.ann_file_train = 'ucf_action/train.txt'
cfg.ann_file_val = 'ucf_action/test.txt'
cfg.ann_file_test = 'ucf_action/test.txt'

cfg.data.test.type = 'VideoDataset'
cfg.data.test.ann_file = 'ucf_action/test.txt'
cfg.data.test.data_prefix = 'ucf_action/test/'

cfg.data.train.type = 'VideoDataset'
cfg.data.train.ann_file = 'ucf_action/train.txt'
cfg.data.train.data_prefix = 'ucf_action/train/'

cfg.data.val.type = 'VideoDataset'
cfg.data.val.ann_file = 'ucf_action/test.txt'
cfg.data.val.data_prefix = 'ucf_action/test/'

# The flag is used to determine whether it is omnisource training
cfg.setdefault('omnisource', False)
# Modify num classes of the model in cls_head
cfg.model.cls_head.num_classes = 13
# We can use the pre-trained TSN model
cfg.load_from = './tutorial_exps/best_top1_acc_epoch_35.pth'

# Set up working dir to save files and logs.
cfg.work_dir = './tutorial_exps'

cfg.data.videos_per_gpu = 16
cfg.optimizer.lr = 0.001
cfg.total_epochs = 50

# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 5
# We can set the log print interval to reduce the the times of printing log
cfg.log_config.interval = 5

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

# Save the best
cfg.evaluation.save_best='auto'


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

In [1]:
import os.path as osp

from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.apis import train_model

import mmcv

# Build the dataset
datasets = [build_dataset(cfg.data.train)]

# Build the recognizer
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

from mmaction.apis import single_gpu_test
from mmaction.datasets import build_dataloader
from mmcv.parallel import MMDataParallel

# Build a test dataloader
dataset = build_dataset(cfg.data.test, dict(test_mode=True))
data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)
model = MMDataParallel(model, device_ids=[0])
outputs = single_gpu_test(model, data_loader)

eval_config = cfg.evaluation
eval_config.pop('interval')
eval_res = dataset.evaluate(outputs, **eval_config)
for name, val in eval_res.items():
    print(f'{name}: {val:.04f}')

ModuleNotFoundError: No module named 'mmaction'