In [1]:
# Check Pytorch installation
import torch, torchvision

# Check MMAction2 installation
import mmaction

# Check MMCV installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version

# Train

In [2]:
data_path = '../data'

In [3]:
!tree $data_path

../data
|-- kinetics_tiny_train_video.txt
|-- kinetics_tiny_val_video.txt
|-- train
|   |-- 27_CSXByd3s.mp4
|   |-- 34XczvTaRiI.mp4
|   |-- A-wiliK50Zw.mp4
|   |-- D32_1gwq35E.mp4
|   |-- D92m0HsHjcQ.mp4
|   |-- DbX8mPslRXg.mp4
|   |-- FMlSTTpN3VY.mp4
|   |-- IyfILH9lBRo.mp4
|   |-- LvcFDgCAXQs.mp4
|   |-- O46YA8tI530.mp4
|   |-- P5M-hAts7MQ.mp4
|   |-- PnOe3GZRVX8.mp4
|   |-- R8HXQkdgKWA.mp4
|   |-- RqnKtCEoEcA.mp4
|   |-- T_TMNGzVrDk.mp4
|   |-- TkkZPZHbAKA.mp4
|   |-- WWP5HZJsg-o.mp4
|   |-- WaS0qwP46Us.mp4
|   |-- Wh_YPQdH1Zg.mp4
|   |-- ZQV4U2KQ370.mp4
|   |-- h10B9SVE-nk.mp4
|   |-- h2YqqUhnR34.mp4
|   |-- iRuyZSKhHRg.mp4
|   |-- kFC3KY2bOP8.mp4
|   |-- oMrZaozOvdQ.mp4
|   |-- oXy-e_P_cAI.mp4
|   |-- phDqGd0NKoo.mp4
|   |-- soEcZZsBmDs.mp4
|   |-- xGY2dP0YUjA.mp4
|   `-- yLC9CtWU5ws.mp4
`-- val
    |-- 0pVGiAU6XEA.mp4
    |-- AQrbRSnRt8M.mp4
    |-- IcLztCtvhb8.mp4
    |-- SU_x2LQqSLs.mp4
    |-- b6Q_b7vgc7Q.mp4
    |-- ddvJ6-faICE.mp4
    |-- ik4BW3-SCts.mp4
    |-- jqRrH30V0k4.

In [4]:
train_video_txt_path = data_path + '/kinetics_tiny_train_video.txt'

In [5]:
!cat $train_video_txt_path

D32_1gwq35E.mp4 0
iRuyZSKhHRg.mp4 1
oXy-e_P_cAI.mp4 0
34XczvTaRiI.mp4 1
h2YqqUhnR34.mp4 0
O46YA8tI530.mp4 0
kFC3KY2bOP8.mp4 1
WWP5HZJsg-o.mp4 1
phDqGd0NKoo.mp4 1
yLC9CtWU5ws.mp4 0
27_CSXByd3s.mp4 1
IyfILH9lBRo.mp4 1
T_TMNGzVrDk.mp4 1
TkkZPZHbAKA.mp4 0
PnOe3GZRVX8.mp4 1
soEcZZsBmDs.mp4 1
FMlSTTpN3VY.mp4 1
WaS0qwP46Us.mp4 0
A-wiliK50Zw.mp4 1
oMrZaozOvdQ.mp4 1
ZQV4U2KQ370.mp4 0
DbX8mPslRXg.mp4 1
h10B9SVE-nk.mp4 1
P5M-hAts7MQ.mp4 0
R8HXQkdgKWA.mp4 0
D92m0HsHjcQ.mp4 0
RqnKtCEoEcA.mp4 0
LvcFDgCAXQs.mp4 0
xGY2dP0YUjA.mp4 0
Wh_YPQdH1Zg.mp4 0


In [6]:
from mmcv import Config
cfg = Config.fromfile('./configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py')

In [7]:
from mmcv.runner import set_random_seed

# Modify dataset type and path
cfg.dataset_type = 'VideoDataset'
cfg.data_root = data_path + '/train/'
cfg.data_root_val = data_path + '/val/'
cfg.ann_file_train = data_path + '/kinetics_tiny_train_video.txt'
cfg.ann_file_val = data_path + '/kinetics_tiny_val_video.txt'
cfg.ann_file_test = data_path + '/kinetics_tiny_val_video.txt'

cfg.data.test.type = 'VideoDataset'
cfg.data.test.ann_file = data_path + '/kinetics_tiny_val_video.txt'
cfg.data.test.data_prefix = data_path + '/val/'

cfg.data.train.type = 'VideoDataset'
cfg.data.train.ann_file = data_path + '/kinetics_tiny_train_video.txt'
cfg.data.train.data_prefix = data_path + '/train/'

cfg.data.val.type = 'VideoDataset'
cfg.data.val.ann_file = data_path + '/kinetics_tiny_val_video.txt'
cfg.data.val.data_prefix = data_path + '/val/'

# The flag is used to determine whether it is omnisource training
cfg.setdefault('omnisource', False)
# Modify num classes of the model in cls_head
cfg.model.cls_head.num_classes = 2
# We can use the pre-trained TSN model
cfg.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'

# Set up working dir to save files and logs.
cfg.work_dir = './tutorial_exps'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.data.videos_per_gpu = cfg.data.videos_per_gpu // 16
cfg.optimizer.lr = cfg.optimizer.lr / 8 / 16
cfg.total_epochs = 30

# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 10
# We can set the log print interval to reduce the the times of printing log
cfg.log_config.interval = 5

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

Config:
model = dict(
    type='Recognizer2D',
    backbone=dict(
        type='ResNet',
        pretrained='torchvision://resnet50',
        depth=50,
        norm_eval=False),
    cls_head=dict(
        type='TSNHead',
        num_classes=2,
        in_channels=2048,
        spatial_type='avg',
        consensus=dict(type='AvgConsensus', dim=1),
        dropout_ratio=0.4,
        init_std=0.01),
    train_cfg=None,
    test_cfg=dict(average_clips=None))
optimizer = dict(type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
lr_config = dict(policy='step', step=[40, 80])
total_epochs = 30
checkpoint_config = dict(interval=10)
log_config = dict(interval=5, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'
resume_from = None
workflow = [('train', 1)]
dataset_type = 'VideoDataset'
data_root =

In [8]:
import os.path as osp

from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.apis import train_model

import mmcv

# Build the dataset
datasets = [build_dataset(cfg.data.train)]

# Build the recognizer
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_model(model, datasets, cfg, distributed=False, validate=True)

Use load_from_torchvision loader


2021-03-23 02:37:10,389 - mmaction - INFO - These parameters in pretrained checkpoint are not loaded: {'fc.bias', 'fc.weight'}
2021-03-23 02:37:13,295 - mmaction - INFO - load checkpoint from ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth
2021-03-23 02:37:13,297 - mmaction - INFO - Use load_from_local loader

size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([2, 2048]).
size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([2]).
2021-03-23 02:37:13,426 - mmaction - INFO - Start running, host: root@654045d5cfa8, work_dir: /mlsteam/lab/tutorial_exps
2021-03-23 02:37:13,427 - mmaction - INFO - workflow: [('train', 1)], max: 30 epochs
2021-03-23 02:37:18,301 - mmaction - INFO - Epoch [1][5/15]	lr: 7.813e-05, eta: 0:07:09, time: 0.965, data_time: 0.765, memory: 1654, top1_acc: 0.7

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 6.5 task/s, elapsed: 2s, ETA:     0s

2021-03-23 02:37:43,651 - mmaction - INFO - Evaluating top_k_accuracy ...
2021-03-23 02:37:43,653 - mmaction - INFO - 
top1_acc	0.7000
top5_acc	1.0000
2021-03-23 02:37:43,656 - mmaction - INFO - Evaluating mean_class_accuracy ...
2021-03-23 02:37:43,669 - mmaction - INFO - 
mean_acc	0.7000
2021-03-23 02:37:44,068 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_5.pth.
2021-03-23 02:37:44,069 - mmaction - INFO - Best top1_acc is 0.7000 at 5 epoch.
2021-03-23 02:37:44,073 - mmaction - INFO - Epoch(val) [5][15]	top1_acc: 0.7000, top5_acc: 1.0000, mean_class_accuracy: 0.7000
2021-03-23 02:37:48,858 - mmaction - INFO - Epoch [6][5/15]	lr: 7.813e-05, eta: 0:02:31, time: 0.925, data_time: 0.790, memory: 1654, top1_acc: 0.5000, top5_acc: 1.0000, loss_cls: 0.6696, loss: 0.6696, grad_norm: 11.0202
2021-03-23 02:37:49,545 - mmaction - INFO - Epoch [6][10/15]	lr: 7.813e-05, eta: 0:02:24, time: 0.137, data_time: 0.003, memory: 1654, top1_acc: 0.6000, top5_acc: 1.0000, loss_cl

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 3.1 task/s, elapsed: 3s, ETA:     0s

2021-03-23 02:38:15,935 - mmaction - INFO - Evaluating top_k_accuracy ...
2021-03-23 02:38:15,936 - mmaction - INFO - 
top1_acc	0.9000
top5_acc	1.0000
2021-03-23 02:38:15,939 - mmaction - INFO - Evaluating mean_class_accuracy ...
2021-03-23 02:38:15,940 - mmaction - INFO - 
mean_acc	0.9000
2021-03-23 02:38:16,295 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_10.pth.
2021-03-23 02:38:16,296 - mmaction - INFO - Best top1_acc is 0.9000 at 10 epoch.
2021-03-23 02:38:16,300 - mmaction - INFO - Epoch(val) [10][15]	top1_acc: 0.9000, top5_acc: 1.0000, mean_class_accuracy: 0.9000
2021-03-23 02:38:20,976 - mmaction - INFO - Epoch [11][5/15]	lr: 7.813e-05, eta: 0:01:55, time: 0.894, data_time: 0.741, memory: 1654, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.5356, loss: 0.5356, grad_norm: 8.8047
2021-03-23 02:38:21,875 - mmaction - INFO - Epoch [11][10/15]	lr: 7.813e-05, eta: 0:01:51, time: 0.180, data_time: 0.035, memory: 1654, top1_acc: 0.8000, top5_acc: 1.0000, los

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 6.8 task/s, elapsed: 1s, ETA:     0s

2021-03-23 02:38:45,961 - mmaction - INFO - Evaluating top_k_accuracy ...
2021-03-23 02:38:45,962 - mmaction - INFO - 
top1_acc	0.8000
top5_acc	1.0000
2021-03-23 02:38:45,964 - mmaction - INFO - Evaluating mean_class_accuracy ...
2021-03-23 02:38:45,965 - mmaction - INFO - 
mean_acc	0.8000
2021-03-23 02:38:45,967 - mmaction - INFO - Epoch(val) [15][15]	top1_acc: 0.8000, top5_acc: 1.0000, mean_class_accuracy: 0.8000
2021-03-23 02:38:49,669 - mmaction - INFO - Epoch [16][5/15]	lr: 7.813e-05, eta: 0:01:23, time: 0.739, data_time: 0.602, memory: 1654, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.5428, loss: 0.5428, grad_norm: 10.0096
2021-03-23 02:38:50,757 - mmaction - INFO - Epoch [16][10/15]	lr: 7.813e-05, eta: 0:01:20, time: 0.218, data_time: 0.082, memory: 1654, top1_acc: 0.7000, top5_acc: 1.0000, loss_cls: 0.6095, loss: 0.6095, grad_norm: 11.1860
2021-03-23 02:38:51,325 - mmaction - INFO - Epoch [16][15/15]	lr: 7.813e-05, eta: 0:01:17, time: 0.114, data_time: 0.002, memory: 1654, 

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 3.1 task/s, elapsed: 3s, ETA:     0s

2021-03-23 02:39:17,041 - mmaction - INFO - Evaluating top_k_accuracy ...
2021-03-23 02:39:17,042 - mmaction - INFO - 
top1_acc	0.9000
top5_acc	1.0000
2021-03-23 02:39:17,045 - mmaction - INFO - Evaluating mean_class_accuracy ...
2021-03-23 02:39:17,046 - mmaction - INFO - 
mean_acc	0.9000
2021-03-23 02:39:17,050 - mmaction - INFO - Epoch(val) [20][15]	top1_acc: 0.9000, top5_acc: 1.0000, mean_class_accuracy: 0.9000
2021-03-23 02:39:20,954 - mmaction - INFO - Epoch [21][5/15]	lr: 7.813e-05, eta: 0:00:54, time: 0.770, data_time: 0.612, memory: 1654, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.4516, loss: 0.4516, grad_norm: 8.3854
2021-03-23 02:39:21,979 - mmaction - INFO - Epoch [21][10/15]	lr: 7.813e-05, eta: 0:00:52, time: 0.205, data_time: 0.073, memory: 1654, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.4543, loss: 0.4543, grad_norm: 9.1092
2021-03-23 02:39:22,526 - mmaction - INFO - Epoch [21][15/15]	lr: 7.813e-05, eta: 0:00:49, time: 0.109, data_time: 0.001, memory: 1654, to

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 6.4 task/s, elapsed: 2s, ETA:     0s

2021-03-23 02:39:46,030 - mmaction - INFO - Evaluating top_k_accuracy ...
2021-03-23 02:39:46,032 - mmaction - INFO - 
top1_acc	1.0000
top5_acc	1.0000
2021-03-23 02:39:46,034 - mmaction - INFO - Evaluating mean_class_accuracy ...
2021-03-23 02:39:46,035 - mmaction - INFO - 
mean_acc	1.0000
2021-03-23 02:39:46,344 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_25.pth.
2021-03-23 02:39:46,345 - mmaction - INFO - Best top1_acc is 1.0000 at 25 epoch.
2021-03-23 02:39:46,349 - mmaction - INFO - Epoch(val) [25][15]	top1_acc: 1.0000, top5_acc: 1.0000, mean_class_accuracy: 1.0000
2021-03-23 02:39:50,580 - mmaction - INFO - Epoch [26][5/15]	lr: 7.813e-05, eta: 0:00:26, time: 0.821, data_time: 0.688, memory: 1654, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.3757, loss: 0.3757, grad_norm: 7.9559
2021-03-23 02:39:51,615 - mmaction - INFO - Epoch [26][10/15]	lr: 7.813e-05, eta: 0:00:24, time: 0.207, data_time: 0.077, memory: 1654, top1_acc: 0.5000, top5_acc: 1.0000, los

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 5.3 task/s, elapsed: 2s, ETA:     0s

2021-03-23 02:40:16,297 - mmaction - INFO - Evaluating top_k_accuracy ...
2021-03-23 02:40:16,298 - mmaction - INFO - 
top1_acc	1.0000
top5_acc	1.0000
2021-03-23 02:40:16,302 - mmaction - INFO - Evaluating mean_class_accuracy ...
2021-03-23 02:40:16,303 - mmaction - INFO - 
mean_acc	1.0000
2021-03-23 02:40:16,305 - mmaction - INFO - Epoch(val) [30][15]	top1_acc: 1.0000, top5_acc: 1.0000, mean_class_accuracy: 1.0000
