In [1]:
# 检查torch的安装以及gpu的使用
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# 检查MMAction2的安装
import mmaction
print(mmaction.__version__)

# 检查mmcv的安装
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

1.10.0+cu113 True
0.15.0
11.3
GCC 7.3


In [5]:
from mmaction.apis import inference_recognizer, init_recognizer

# 选择tsn对应的配置文件
config = 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py'
# 加载上面下载的checkpoint文件
checkpoint = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'
# 初始化模型
print(config)
model = init_recognizer(config, checkpoint, device='cuda:0')
#tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth

configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py
load checkpoint from local path: checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth


In [6]:
# 选择视频进行推理
video = 'demo/demo.mp4'
label = 'demo/label_map_k400.txt'
results = inference_recognizer(model, video, label)

In [7]:
# 查看视频
from IPython.display import HTML
from base64 import b64encode
mp4 = open(video,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [8]:
# 查看推理Top-5结果
for result in results:
    print(f'{result[0]}: ', result[1])

arm wrestling:  29.62016
rock scissors paper:  10.755016
shaking hands:  9.909008
clapping:  9.190468
massaging feet:  8.304269


In [1]:
# 获得tsn对应的配置文件cfg
from mmcv import Config
cfg = Config.fromfile('./configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py')

In [2]:
from mmcv.runner import set_random_seed

# 修改数据集类型和各个文件路径
cfg.dataset_type = 'VideoDataset'
cfg.data_root = 'kinetics400_tiny/train/'
cfg.data_root_val = 'kinetics400_tiny/val/'
cfg.ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'
cfg.ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'
cfg.ann_file_test = 'kinetics400_tiny/kinetics_tiny_val_video.txt'

cfg.data.test.type = 'VideoDataset'
cfg.data.test.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'
cfg.data.test.data_prefix = 'kinetics400_tiny/val/'

cfg.data.train.type = 'VideoDataset'
cfg.data.train.ann_file = 'kinetics400_tiny/kinetics_tiny_train_video.txt'
cfg.data.train.data_prefix = 'kinetics400_tiny/train/'

cfg.data.val.type = 'VideoDataset'
cfg.data.val.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'
cfg.data.val.data_prefix = 'kinetics400_tiny/val/'

# 这里用于确认是否使用到omnisource训练
cfg.setdefault('omnisource', False)
# 修改cls_head中类别数为2
cfg.model.cls_head.num_classes = 2
# 使用预训练好的tsn模型
cfg.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'

# 设置工作目录
cfg.work_dir = './tutorial_exps'

# 由于是单卡训练，修改对应的lr
cfg.data.videos_per_gpu = cfg.data.videos_per_gpu // 16
cfg.optimizer.lr = cfg.optimizer.lr / 8 / 16
cfg.total_epochs = 30

# 设置存档点间隔减少存储空间的消耗
cfg.checkpoint_config.interval = 10
# 设置日志打印间隔减少打印时间
cfg.log_config.interval = 5

# 固定随机种子使得结果可复现
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

# 打印所有的配置参数
print(f'Config:\n{cfg.pretty_text}')

Config:
model = dict(
    type='Recognizer2D',
    backbone=dict(
        type='ResNet',
        pretrained='torchvision://resnet50',
        depth=50,
        norm_eval=False),
    cls_head=dict(
        type='TSNHead',
        num_classes=2,
        in_channels=2048,
        spatial_type='avg',
        consensus=dict(type='AvgConsensus', dim=1),
        dropout_ratio=0.4,
        init_std=0.01),
    train_cfg=None,
    test_cfg=dict(average_clips=None))
optimizer = dict(type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
lr_config = dict(policy='step', step=[40, 80])
total_epochs = 30
checkpoint_config = dict(interval=10)
log_config = dict(interval=5, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'
resume_from = None
workflow = [('train', 1)]
dataset_type = 'VideoDataset'
data_root =

In [3]:
import os.path as osp

from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.apis import train_model

import mmcv

# 构建数据集
datasets = [build_dataset(cfg.data.train)]

# 构建动作识别模型
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

# 创建工作目录并训练模型
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_model(model, datasets, cfg, distributed=False, validate=True)

load checkpoint from torchvision path: torchvision://resnet50


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

2023-03-07 10:48:21,727 - mmaction - INFO - These parameters in pretrained checkpoint are not loaded: {'fc.weight', 'fc.bias'}
2023-03-07 10:48:28,799 - mmaction - INFO - load checkpoint from local path: ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth

size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([2, 2048]).
size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([2]).
2023-03-07 10:48:28,920 - mmaction - INFO - Start running, host: root@autodl-container-80fd118e52-a05a309f, work_dir: /root/Video-Swin-Transformer-master/tutorial_exps
2023-03-07 10:48:28,920 - mmaction - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) StepLrUpdaterHook                  
(NORMAL      ) CheckpointHook                     
(NORMAL      ) EvalHook                

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 7.6 task/s, elapsed: 1s, ETA:     0s

2023-03-07 10:48:51,368 - mmaction - INFO - Evaluating top_k_accuracy ...
2023-03-07 10:48:51,369 - mmaction - INFO - 
top1_acc	0.7000
top5_acc	1.0000
2023-03-07 10:48:51,370 - mmaction - INFO - Evaluating mean_class_accuracy ...
2023-03-07 10:48:51,371 - mmaction - INFO - 
mean_acc	0.7000
2023-03-07 10:48:51,857 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_5.pth.
2023-03-07 10:48:51,858 - mmaction - INFO - Best top1_acc is 0.7000 at 5 epoch.
2023-03-07 10:48:51,858 - mmaction - INFO - Epoch(val) [5][5]	top1_acc: 0.7000, top5_acc: 1.0000, mean_class_accuracy: 0.7000
2023-03-07 10:48:55,113 - mmaction - INFO - Epoch [6][5/15]	lr: 7.813e-05, eta: 0:01:48, time: 0.650, data_time: 0.586, memory: 1621, top1_acc: 0.7000, top5_acc: 1.0000, loss_cls: 0.6538, loss: 0.6538, grad_norm: 12.5854
2023-03-07 10:48:55,463 - mmaction - INFO - Epoch [6][10/15]	lr: 7.813e-05, eta: 0:01:42, time: 0.070, data_time: 0.011, memory: 1621, top1_acc: 0.4000, top5_acc: 1.0000, loss_cls

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 7.1 task/s, elapsed: 1s, ETA:     0s

2023-03-07 10:49:14,194 - mmaction - INFO - Evaluating top_k_accuracy ...
2023-03-07 10:49:14,195 - mmaction - INFO - 
top1_acc	0.7000
top5_acc	1.0000
2023-03-07 10:49:14,195 - mmaction - INFO - Evaluating mean_class_accuracy ...
2023-03-07 10:49:14,196 - mmaction - INFO - 
mean_acc	0.7000
2023-03-07 10:49:14,197 - mmaction - INFO - Epoch(val) [10][5]	top1_acc: 0.7000, top5_acc: 1.0000, mean_class_accuracy: 0.7000
2023-03-07 10:49:17,473 - mmaction - INFO - Epoch [11][5/15]	lr: 7.813e-05, eta: 0:01:21, time: 0.655, data_time: 0.587, memory: 1621, top1_acc: 1.0000, top5_acc: 1.0000, loss_cls: 0.5091, loss: 0.5091, grad_norm: 8.6306
2023-03-07 10:49:17,795 - mmaction - INFO - Epoch [11][10/15]	lr: 7.813e-05, eta: 0:01:18, time: 0.064, data_time: 0.001, memory: 1621, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.6282, loss: 0.6282, grad_norm: 11.6421
2023-03-07 10:49:18,133 - mmaction - INFO - Epoch [11][15/15]	lr: 7.813e-05, eta: 0:01:15, time: 0.068, data_time: 0.008, memory: 1621, to

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 7.0 task/s, elapsed: 1s, ETA:     0s

2023-03-07 10:49:36,106 - mmaction - INFO - Evaluating top_k_accuracy ...
2023-03-07 10:49:36,107 - mmaction - INFO - 
top1_acc	0.8000
top5_acc	1.0000
2023-03-07 10:49:36,107 - mmaction - INFO - Evaluating mean_class_accuracy ...
2023-03-07 10:49:36,108 - mmaction - INFO - 
mean_acc	0.8000
2023-03-07 10:49:36,124 - mmaction - INFO - The previous best checkpoint /root/Video-Swin-Transformer-master/tutorial_exps/best_top1_acc_epoch_5.pth was removed
2023-03-07 10:49:36,625 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_15.pth.
2023-03-07 10:49:36,628 - mmaction - INFO - Best top1_acc is 0.8000 at 15 epoch.
2023-03-07 10:49:36,629 - mmaction - INFO - Epoch(val) [15][5]	top1_acc: 0.8000, top5_acc: 1.0000, mean_class_accuracy: 0.8000
2023-03-07 10:49:39,832 - mmaction - INFO - Epoch [16][5/15]	lr: 7.813e-05, eta: 0:00:59, time: 0.640, data_time: 0.577, memory: 1621, top1_acc: 0.7000, top5_acc: 1.0000, loss_cls: 0.5964, loss: 0.5964, grad_norm: 11.0554
2023-03-07 10:

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 7.2 task/s, elapsed: 1s, ETA:     0s

2023-03-07 10:49:58,698 - mmaction - INFO - Evaluating top_k_accuracy ...
2023-03-07 10:49:58,699 - mmaction - INFO - 
top1_acc	0.8000
top5_acc	1.0000
2023-03-07 10:49:58,699 - mmaction - INFO - Evaluating mean_class_accuracy ...
2023-03-07 10:49:58,700 - mmaction - INFO - 
mean_acc	0.8000
2023-03-07 10:49:58,701 - mmaction - INFO - Epoch(val) [20][5]	top1_acc: 0.8000, top5_acc: 1.0000, mean_class_accuracy: 0.8000
2023-03-07 10:50:02,009 - mmaction - INFO - Epoch [21][5/15]	lr: 7.813e-05, eta: 0:00:38, time: 0.661, data_time: 0.592, memory: 1621, top1_acc: 0.7000, top5_acc: 1.0000, loss_cls: 0.4784, loss: 0.4784, grad_norm: 9.8553
2023-03-07 10:50:02,307 - mmaction - INFO - Epoch [21][10/15]	lr: 7.813e-05, eta: 0:00:37, time: 0.060, data_time: 0.001, memory: 1621, top1_acc: 0.7000, top5_acc: 1.0000, loss_cls: 0.5168, loss: 0.5168, grad_norm: 9.9363
2023-03-07 10:50:02,609 - mmaction - INFO - Epoch [21][15/15]	lr: 7.813e-05, eta: 0:00:35, time: 0.060, data_time: 0.000, memory: 1621, top

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 7.5 task/s, elapsed: 1s, ETA:     0s

2023-03-07 10:50:20,562 - mmaction - INFO - Evaluating top_k_accuracy ...
2023-03-07 10:50:20,564 - mmaction - INFO - 
top1_acc	0.8000
top5_acc	1.0000
2023-03-07 10:50:20,564 - mmaction - INFO - Evaluating mean_class_accuracy ...
2023-03-07 10:50:20,565 - mmaction - INFO - 
mean_acc	0.8000
2023-03-07 10:50:20,565 - mmaction - INFO - Epoch(val) [25][5]	top1_acc: 0.8000, top5_acc: 1.0000, mean_class_accuracy: 0.8000
2023-03-07 10:50:23,752 - mmaction - INFO - Epoch [26][5/15]	lr: 7.813e-05, eta: 0:00:18, time: 0.636, data_time: 0.575, memory: 1621, top1_acc: 1.0000, top5_acc: 1.0000, loss_cls: 0.3687, loss: 0.3687, grad_norm: 7.4849
2023-03-07 10:50:24,169 - mmaction - INFO - Epoch [26][10/15]	lr: 7.813e-05, eta: 0:00:17, time: 0.083, data_time: 0.024, memory: 1621, top1_acc: 0.6000, top5_acc: 1.0000, loss_cls: 0.5224, loss: 0.5224, grad_norm: 10.6556
2023-03-07 10:50:24,481 - mmaction - INFO - Epoch [26][15/15]	lr: 7.813e-05, eta: 0:00:15, time: 0.062, data_time: 0.000, memory: 1621, to

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 7.2 task/s, elapsed: 1s, ETA:     0s

2023-03-07 10:50:42,922 - mmaction - INFO - Evaluating top_k_accuracy ...
2023-03-07 10:50:42,923 - mmaction - INFO - 
top1_acc	0.8000
top5_acc	1.0000
2023-03-07 10:50:42,924 - mmaction - INFO - Evaluating mean_class_accuracy ...
2023-03-07 10:50:42,924 - mmaction - INFO - 
mean_acc	0.8000
2023-03-07 10:50:42,925 - mmaction - INFO - Epoch(val) [30][5]	top1_acc: 0.8000, top5_acc: 1.0000, mean_class_accuracy: 0.8000


In [4]:
from mmaction.apis import single_gpu_test
from mmaction.datasets import build_dataloader
from mmcv.parallel import MMDataParallel

# 构建测试数据集
dataset = build_dataset(cfg.data.test, dict(test_mode=True))
data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)
model = MMDataParallel(model, device_ids=[0])
outputs = single_gpu_test(model, data_loader)

# 在测试集上评价训练完成的识别模型
eval_config = cfg.evaluation
eval_config.pop('interval')
eval_res = dataset.evaluate(outputs, **eval_config)
for name, val in eval_res.items():
    print(f'{name}: {val:.04f}')

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 5.0 task/s, elapsed: 2s, ETA:     0s
Evaluating top_k_accuracy ...

top1_acc	0.9000
top5_acc	1.0000

Evaluating mean_class_accuracy ...

mean_acc	0.9000
top1_acc: 0.9000
top5_acc: 1.0000
mean_class_accuracy: 0.9000
