In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
root_dir = "https://dl.fbaipublicfiles.com/pytorchvideo/model_zoo"
checkpoint_paths = {
    "slowfast_r50": f"{root_dir}/kinetics/SLOWFAST_8x8_R50.pyth",
    "slowfast_r50_detection": f"{root_dir}/ava/SLOWFAST_8x8_R50_DETECTION.pyth",
    "slowfast_r101": f"{root_dir}/kinetics/SLOWFAST_8x8_R101.pyth",
    "slowfast_16x8_r101_50_50": f"{root_dir}/kinetics/SLOWFAST_16x8_R101_50_50.pyth",
}



In [None]:
!pwd

In [None]:
!nvidia-smi

In [None]:
!pip install pytorchvideo

## Train

In [None]:
import argparse
import math
import os
import random

import numpy as np
import pandas as pd
import torch
from pytorchvideo.data import make_clip_sampler, labeled_video_dataset
from pytorchvideo.models import create_slowfast
from torch.backends import cudnn
from torch.nn import CrossEntropyLoss
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader
from tqdm import tqdm

#from utils import train_transform, test_transform, clip_duration, num_classes

from pytorchvideo.transforms import ApplyTransformToKey, UniformTemporalSubsample, RandomShortSideScale, \
    ShortSideScale, Normalize
from torch import nn
from torchvision.transforms import Compose, Lambda, RandomCrop, RandomHorizontalFlip, CenterCrop

side_size = 256
max_size = 256
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
crop_size = 100
num_frames = 32
sampling_rate = 1
frames_per_second = 32/6
clip_duration = (num_frames * sampling_rate) / frames_per_second
num_classes = 3
checkpoint_path = '/kaggle/working/SLOWFAST_8x8_R50.pyth'

data_root = "/kaggle/input/prevention/all"
batch_size = 6
epochs = 50
save_root = '/kaggle/working/CheckPoints/Batch_2_sgd_lr001'

# for reproducibility
random.seed(1)
np.random.seed(1)
torch.manual_seed(1)
cudnn.deterministic = True
cudnn.benchmark = True




class PackPathway(nn.Module):
    """
    Transform for converting video frames as a list of tensors.
    """

    def __init__(self, alpha=4):
        super().__init__()
        self.alpha = alpha

    def forward(self, frames):
        fast_pathway = frames
        # perform temporal sampling from the fast pathway.
        slow_pathway = torch.index_select(frames, 1,
                                          torch.linspace(0, frames.shape[1] - 1, frames.shape[1] // self.alpha).long())
        frame_list = [slow_pathway, fast_pathway]
        return frame_list


train_transform = ApplyTransformToKey(key="video", transform=Compose(
    [UniformTemporalSubsample(num_frames), Lambda(lambda x: x / 255.0), Normalize(mean, std), ShortSideScale(size=side_size), PackPathway()]))

test_transform = ApplyTransformToKey(key="video", transform=Compose(
    [UniformTemporalSubsample(num_frames), Lambda(lambda x: x / 255.0), Normalize(mean, std), ShortSideScale(size=side_size), PackPathway()]))



# train for one epoch
def train(model, data_loader, train_optimizer):
    model.train()
    total_loss, total_acc, total_num = 0.0, 0, 0
    train_bar = tqdm(data_loader, total=math.ceil(train_data.num_videos / batch_size), dynamic_ncols=True)
    for batch in train_bar:
        video, label = [i.cuda() for i in batch['video']], batch['label'].cuda()
        
        train_optimizer.zero_grad()
        pred = model(video)
        loss = loss_criterion(pred, label)
        total_loss += loss.item() * video[0].size(0)
        total_acc += (torch.eq(pred.argmax(dim=-1), label)).sum().item()
        loss.backward()
        train_optimizer.step()

        total_num += video[0].size(0)
        train_bar.set_description('Train Epoch: [{}/{}] Loss: {:.4f} Acc: {:.2f}%'
                                  .format(epoch, epochs, total_loss / total_num, total_acc * 100 / total_num))

    return total_loss / total_num, total_acc / total_num


# test for one epoch
def val(model, data_loader):
    model.eval()
    with torch.no_grad():
        total_top_1, total_top_5, total_num = 0, 0, 0
        test_bar = tqdm(data_loader, total=math.ceil(test_data.num_videos / batch_size), dynamic_ncols=True)
        for batch in test_bar:
            video, label = [i.cuda() for i in batch['video']], batch['label'].cuda()
            pred = model(video)
            total_top_1 += (torch.eq(pred.argmax(dim=-1), label)).sum().item()
            total_top_5 += torch.any(torch.eq(pred.topk(k=2, dim=-1).indices, label.unsqueeze(dim=-1)),
                                     dim=-1).sum().item()
            total_num += video[0].size(0)
            test_bar.set_description('Test Epoch: [{}/{}] | Top-1:{:.2f}% | Top-5:{:.2f}%'
                                     .format(epoch, epochs, total_top_1 * 100 / total_num,
                                             total_top_5 * 100 / total_num))
    return total_top_1 / total_num, total_top_5 / total_num

'''
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Train Model')
    # common args
    parser.add_argument('--data_root', default='data', type=str, help='Datasets root path')
    parser.add_argument('--batch_size', default=8, type=int, help='Number of videos in each mini-batch')
    parser.add_argument('--epochs', default=10, type=int, help='Number of epochs over the model to train')
    parser.add_argument('--save_root', default='result', type=str, help='Result saved root path')
    
# args parse
args = parser.parse_args()
'''


# data prepare
train_data = labeled_video_dataset('{}/train'.format(data_root), make_clip_sampler('random', clip_duration),
                                   transform=train_transform, decode_audio=False)
test_data = labeled_video_dataset('{}/test'.format(data_root),
                                  make_clip_sampler('constant_clips_per_video', clip_duration, 1),
                                  transform=test_transform, decode_audio=False)
train_loader = DataLoader(train_data, batch_size=batch_size, num_workers=8)
test_loader = DataLoader(test_data, batch_size=batch_size, num_workers=8)


#------------------------------------------------------------------------------------------------------------

# model define, loss setup and optimizer config
#slow_fast = create_slowfast(model_num_class=num_classes).cuda()


slow_fast = torch.hub.load('facebookresearch/pytorchvideo:main', model='slowfast_r50', pretrained=True).cuda()


#------------------------------------------------------------------------------------------------------------


loss_criterion = CrossEntropyLoss()
# optimizer = Adam(slow_fast.parameters(), lr=1e-1)
optimizer = SGD(slow_fast.parameters(), lr=0.001, momentum=0.9)

# training loop
results = {'loss': [], 'acc': [], 'top-1': [], 'top-5': []}
if not os.path.exists(save_root):
    os.makedirs(save_root)
best_acc = 0.0
for epoch in range(1, epochs + 1):
    train_loss, train_acc = train(slow_fast, train_loader, optimizer)
    results['loss'].append(train_loss)
    results['acc'].append(train_acc * 100)
    top_1, top_5 = val(slow_fast, test_loader)
    results['top-1'].append(top_1 * 100)
    results['top-5'].append(top_5 * 100)
    # save statistics
    data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
    data_frame.to_csv('{}/metrics.csv'.format(save_root), index_label='epoch')

    if top_1 > best_acc:
        best_acc = top_1
        torch.save(slow_fast.state_dict(), '{}/slow_fast.pth'.format(save_root))

## 继续训练

In [None]:
!pip install pytorchvideo

In [None]:
import argparse
import math
import os
import random

import numpy as np
import pandas as pd
import torch
from pytorchvideo.data import make_clip_sampler, labeled_video_dataset
from pytorchvideo.models import create_slowfast
from torch.backends import cudnn
from torch.nn import CrossEntropyLoss
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader
from tqdm import tqdm

#from utils import train_transform, test_transform, clip_duration, num_classes

from pytorchvideo.transforms import ApplyTransformToKey, UniformTemporalSubsample, RandomShortSideScale, \
    ShortSideScale, Normalize
from torch import nn
from torchvision.transforms import Compose, Lambda, RandomCrop, RandomHorizontalFlip, CenterCrop

side_size = 256
max_size = 256
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
crop_size = 100
num_frames = 32
sampling_rate = 1
frames_per_second = 32/6
clip_duration = (num_frames * sampling_rate) / frames_per_second
num_classes = 3
checkpoint_path = '/kaggle/working/SLOWFAST_8x8_R50.pyth'

data_root = "/kaggle/input/prevention/all"
batch_size = 6
epochs = 50
save_root = '/kaggle/working/CheckPoints/Batch_2_sgd_lr001'

# for reproducibility
random.seed(1)
np.random.seed(1)
torch.manual_seed(1)
cudnn.deterministic = True
cudnn.benchmark = True




class PackPathway(nn.Module):
    """
    Transform for converting video frames as a list of tensors.
    """

    def __init__(self, alpha=4):
        super().__init__()
        self.alpha = alpha

    def forward(self, frames):
        fast_pathway = frames
        # perform temporal sampling from the fast pathway.
        slow_pathway = torch.index_select(frames, 1,
                                          torch.linspace(0, frames.shape[1] - 1, frames.shape[1] // self.alpha).long())
        frame_list = [slow_pathway, fast_pathway]
        return frame_list


train_transform = ApplyTransformToKey(key="video", transform=Compose(
    [UniformTemporalSubsample(num_frames), Lambda(lambda x: x / 255.0), Normalize(mean, std), ShortSideScale(size=side_size), PackPathway()]))

test_transform = ApplyTransformToKey(key="video", transform=Compose(
    [UniformTemporalSubsample(num_frames), Lambda(lambda x: x / 255.0), Normalize(mean, std), ShortSideScale(size=side_size), PackPathway()]))



# train for one epoch
def train(model, data_loader, train_optimizer):
    model.train()
    total_loss, total_acc, total_num = 0.0, 0, 0
    train_bar = tqdm(data_loader, total=math.ceil(train_data.num_videos / batch_size), dynamic_ncols=True)
    for batch in train_bar:
        video, label = [i.cuda() for i in batch['video']], batch['label'].cuda()
        
        train_optimizer.zero_grad()
        pred = model(video)
        loss = loss_criterion(pred, label)
        total_loss += loss.item() * video[0].size(0)
        total_acc += (torch.eq(pred.argmax(dim=-1), label)).sum().item()
        loss.backward()
        train_optimizer.step()

        total_num += video[0].size(0)
        train_bar.set_description('Train Epoch: [{}/{}] Loss: {:.4f} Acc: {:.2f}%'
                                  .format(epoch, epochs, total_loss / total_num, total_acc * 100 / total_num))

    return total_loss / total_num, total_acc / total_num


# test for one epoch
def val(model, data_loader):
    model.eval()
    with torch.no_grad():
        total_top_1, total_top_5, total_num = 0, 0, 0
        test_bar = tqdm(data_loader, total=math.ceil(test_data.num_videos / batch_size), dynamic_ncols=True)
        for batch in test_bar:
            video, label = [i.cuda() for i in batch['video']], batch['label'].cuda()
            pred = model(video)
            total_top_1 += (torch.eq(pred.argmax(dim=-1), label)).sum().item()
            total_top_5 += torch.any(torch.eq(pred.topk(k=2, dim=-1).indices, label.unsqueeze(dim=-1)),
                                     dim=-1).sum().item()
            total_num += video[0].size(0)
            test_bar.set_description('Test Epoch: [{}/{}] | Top-1:{:.2f}% | Top-5:{:.2f}%'
                                     .format(epoch, epochs, total_top_1 * 100 / total_num,
                                             total_top_5 * 100 / total_num))
    return total_top_1 / total_num, total_top_5 / total_num

'''
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Train Model')
    # common args
    parser.add_argument('--data_root', default='data', type=str, help='Datasets root path')
    parser.add_argument('--batch_size', default=8, type=int, help='Number of videos in each mini-batch')
    parser.add_argument('--epochs', default=10, type=int, help='Number of epochs over the model to train')
    parser.add_argument('--save_root', default='result', type=str, help='Result saved root path')
    parser.add_argument("--learning_rate", default=3e-2, type=float,help="The initial learning rate for SGD.")
    parser.add_argument("--weight_decay", default=0, type=float,help="Weight deay if we apply some.")
# args parse
args = parser.parse_args()
'''


# data prepare
train_data = labeled_video_dataset('{}/train'.format(data_root), make_clip_sampler('random', clip_duration),
                                   transform=train_transform, decode_audio=False)
test_data = labeled_video_dataset('{}/test'.format(data_root),
                                  make_clip_sampler('constant_clips_per_video', clip_duration, 1),
                                  transform=test_transform, decode_audio=False)
train_loader = DataLoader(train_data, batch_size=batch_size, num_workers=8)
test_loader = DataLoader(test_data, batch_size=batch_size, num_workers=8)


#------------------------------------------------------------------------------------------------------------

# model define, loss setup and optimizer config
#slow_fast = create_slowfast(model_num_class=num_classes).cuda()


slow_fast = torch.hub.load('facebookresearch/pytorchvideo:main', model='slowfast_r50', pretrained=True).cuda()

#slow_fast.load_state_dict(torch.load('/kaggle/working/CheckPoints/Batch_2_sgd_lr001/slow_fast.pth', 'cuda'))
slow_fast.blocks[6].proj = torch.nn.Linear(in_features=2304, out_features=3, bias=True).cuda()

#------------------------------------------------------------------------------------------------------------


loss_criterion = CrossEntropyLoss()
# optimizer = Adam(slow_fast.parameters(), lr=1e-1)
#optimizer = SGD(slow_fast.parameters(), lr=0.001, momentum=0.9,weight_decay=0.0001)


# optimizer = SGD([{'params':slow_fast.parameters(),'lr':args.learning_rate},{'params':model.head.parameters(),'lr':args.learning_rate}],
#                 lr=args.learning_rate,momentum=0.9,weight_decay=args.weight_decay)

optimizer = SGD([{'params':slow_fast.blocks[0:6].parameters(),'lr':0.0001},
                 {'params':slow_fast.blocks[6].dropout.parameters(),'lr':0.0001},
                 {'params':slow_fast.blocks[6].proj.parameters(),'lr':0.001},
                 {'params':slow_fast.blocks[6].output_pool.parameters(),'lr':0.0001}], 
                lr=0.0001,momentum=0.9,weight_decay=0.0001)



print(optimizer)

##---------------------------------------------------------------------------------------------------------
# training loop
results = {'loss': [], 'acc': [], 'top-1': [], 'top-5': []}
if not os.path.exists(save_root):
    os.makedirs(save_root)
best_acc = 0.0
for epoch in range(1, epochs + 1):
    train_loss, train_acc = train(slow_fast, train_loader, optimizer)
    results['loss'].append(train_loss)
    results['acc'].append(train_acc * 100)
    top_1, top_5 = val(slow_fast, test_loader)
    results['top-1'].append(top_1 * 100)
    results['top-5'].append(top_5 * 100)
    # save statistics
    data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
    data_frame.to_csv('{}/metrics.csv'.format(save_root), index_label='epoch')

    if top_1 > best_acc:
        best_acc = top_1
        torch.save(slow_fast.state_dict(), '{}/slow_fast.pth'.format(save_root))

In [None]:
optimizer

In [None]:
slow_fast.blocks[6]

In [None]:
slow_fast.blocks[6].proj = torch.nn.Linear(in_features=2304, out_features=3, bias=True)

## train 1

224*224

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py:481: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
  cpuset_checked))
  
  
Train Epoch: [1/50] Loss: 1.1127  **Acc: 41.06%**: : 1312it [34:25,  1.57s/it]                        
Test Epoch: [1/50] |  **Top-1:39.55%**  | Top-5:74.07%: 100%|██████████| 560/560 [13:27<00:00,  1.44s/it]
Train Epoch: [2/50] Loss: 1.0590  **Acc: 46.50%**: : 1312it [34:30,  1.58s/it]                        
Test Epoch: [2/50] |  **Top-1:40.89%**  | Top-5:71.98%: 100%|██████████| 560/560 [13:35<00:00,  1.46s/it]
Train Epoch: [3/50] Loss: 1.0099  **Acc: 49.37%:** : 1312it [34:23,  1.57s/it]                        
Test Epoch: [3/50] |  **Top-1:43.43%**  | Top-5:71.56%: 100%|██████████| 560/560 [13:38<00:00,  1.46s/it]
Train Epoch: [4/50] Loss: 0.9751  **Acc: 51.94%**  : : 1312it [34:44,  1.59s/it]                        
Test Epoch: [4/50] |  **Top-1:42.83%**  | Top-5:71.59%: 100%|██████████| 560/560 [13:15<00:00,  1.42s/it]
Train Epoch: [5/50] Loss: 0.9386  ** Acc: 54.57%**  : : 1312it [33:56,  1.55s/it]                        
Test Epoch: [5/50] |  **Top-1:44.05%**  | Top-5:74.22%: 100%|██████████| 560/560 [13:32<00:00,  1.45s/it]
Train Epoch: [6/50] Loss: 0.9006  **Acc: 57.53%**  : : 1312it [33:42,  1.54s/it]                        
Test Epoch: [6/50] |  **Top-1:35.74%**  | Top-5:73.23%: 100%|██████████| 560/560 [13:43<00:00,  1.47s/it]
Train Epoch: [7/50] Loss: 0.8706  **Acc: 58.79%**  : : 1312it [34:28,  1.58s/it]                        
Test Epoch: [7/50] |  **Top-1:41.04%**  | Top-5:72.10%: 100%|██████████| 560/560 [13:56<00:00,  1.49s/it] 
Train Epoch: [8/50] Loss: 0.8357  **Acc: 61.38%**  : : 1312it [34:41,  1.59s/it]                        
Test Epoch: [8/50] |  **Top-1:39.49%**  | Top-5:73.35%: 100%|██████████| 560/560 [13:49<00:00,  1.48s/it]
Train Epoch: [9/50] Loss: 0.7987  **Acc: 62.91%**  : : 1312it [34:04,  1.56s/it]                        
Test Epoch: [9/50] |  **Top-1:42.56%**  | Top-5:70.64%: 100%|██████████| 560/560 [13:33<00:00,  1.45s/it] 
Train Epoch: [10/50] Loss: 0.7658 **Acc: 66.39%**:   : 1312it [34:24,  1.57s/it]                        
Test Epoch: [10/50] | **Top-1:41.13%**  | Top-5:75.02%: 100%|██████████| 560/560 [13:27<00:00,  1.44s/it]
Train Epoch: [11/50] Loss: 0.7220 **Acc: 67.77%**  : : 1312it [34:26,  1.58s/it]                        
Test Epoch: [11/50] | **Top-1:35.89%**  | Top-5:74.22%: 100%|██████████| 560/560 [13:47<00:00,  1.48s/it] 
Train Epoch: [12/50] Loss: 0.7307 **Acc: 68.45%**  :  32%|███▏      | 421/1308 [11:22<23:50,  1.61s/it]

## train
***with pretrained weight***  - 224*224


Train Epoch: [1/50] Loss: 0.5032 Acc: 81.64%: : 1312it [34:36,  1.58s/it]                        
Test Epoch: [1/50] | Top-1:45.01% | Top-5:74.93%: 100%|██████████| 560/560 [13:47<00:00,  1.48s/it]
Train Epoch: [2/50] Loss: 0.1541 Acc: 94.38%:   7%|▋         | 92/1308 [02:40<31:32,  1.56s/it]  


## Test

In [None]:
import argparse
import json

import torch
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.models import create_slowfast

from utils import num_classes, clip_duration, test_transform

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Test Model')
    parser.add_argument('--model_path', default='result/slow_fast.pth', type=str, help='Model path')
    parser.add_argument('--video_path', default='data/test/applauding/_V-dzjftmCQ_000023_000033.mp4', type=str,
                        help='Video path')

    opt = parser.parse_args()
    model_path, video_path = opt.model_path, opt.video_path
    slow_fast = create_slowfast(model_num_class=num_classes)
    slow_fast.load_state_dict(torch.load(model_path, 'cpu'))
    slow_fast = slow_fast.cuda().eval()
    with open('result/kinetics_classnames.json', 'r') as f:
        kinetics_classnames = json.load(f)

    # create an id to label name mapping
    kinetics_id_to_classname = {}
    for k, v in kinetics_classnames.items():
        kinetics_id_to_classname[v] = str(k).replace('"', "")

    video = EncodedVideo.from_path(video_path, decode_audio=False)
    video_data = video.get_clip(start_sec=0, end_sec=clip_duration)
    video_data = test_transform(video_data)
    inputs = [i.cuda()[None, ...] for i in video_data['video']]
    pred = slow_fast(inputs)

    # get the predicted classes
    pred_classes = pred.topk(k=5).indices
    pred_class_names = [kinetics_id_to_classname[int(i)] for i in pred_classes[0]]
    print('predicted labels: {}'.format(pred_class_names))

## Confusion Metrics

In [None]:
!pip install pytorchvideo

In [None]:
import argparse
import math
import os
import random

import numpy as np
import pandas as pd
import torch
from pytorchvideo.data import make_clip_sampler, labeled_video_dataset
from pytorchvideo.models import create_slowfast
from torch.backends import cudnn
from torch.nn import CrossEntropyLoss
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader
from tqdm import tqdm

#from utils import train_transform, test_transform, clip_duration, num_classes

from pytorchvideo.transforms import ApplyTransformToKey, UniformTemporalSubsample, RandomShortSideScale, \
    ShortSideScale, Normalize
from torch import nn
from torchvision.transforms import Compose, Lambda, RandomCrop, RandomHorizontalFlip, CenterCrop

side_size = 256
max_size = 256
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
crop_size = 100
num_frames = 32
sampling_rate = 1
frames_per_second = 32/6
clip_duration = (num_frames * sampling_rate) / frames_per_second
num_classes = 3
checkpoint_path = '/kaggle/working/SLOWFAST_8x8_R50.pyth'

data_root = "/kaggle/input/prevention/all"
batch_size = 6
epochs = 1
save_root = '/kaggle/working/CheckPoints/Batch_2_sgd_lr001'

# for reproducibility
random.seed(1)
np.random.seed(1)
torch.manual_seed(1)
cudnn.deterministic = True
cudnn.benchmark = True




class PackPathway(nn.Module):
    """
    Transform for converting video frames as a list of tensors.
    """

    def __init__(self, alpha=4):
        super().__init__()
        self.alpha = alpha

    def forward(self, frames):
        fast_pathway = frames
        # perform temporal sampling from the fast pathway.
        slow_pathway = torch.index_select(frames, 1,
                                          torch.linspace(0, frames.shape[1] - 1, frames.shape[1] // self.alpha).long())
        frame_list = [slow_pathway, fast_pathway]
        return frame_list


train_transform = ApplyTransformToKey(key="video", transform=Compose(
    [UniformTemporalSubsample(num_frames), Lambda(lambda x: x / 255.0), Normalize(mean, std), ShortSideScale(size=side_size), PackPathway()]))

test_transform = ApplyTransformToKey(key="video", transform=Compose(
    [UniformTemporalSubsample(num_frames), Lambda(lambda x: x / 255.0), Normalize(mean, std), ShortSideScale(size=side_size), PackPathway()]))



# train for one epoch
def train(model, data_loader, train_optimizer):
    model.train()
    total_loss, total_acc, total_num = 0.0, 0, 0
    train_bar = tqdm(data_loader, total=math.ceil(train_data.num_videos / batch_size), dynamic_ncols=True)
    for batch in train_bar:
        video, label = [i.cuda() for i in batch['video']], batch['label'].cuda()
        
        train_optimizer.zero_grad()
        pred = model(video)
        loss = loss_criterion(pred, label)
        total_loss += loss.item() * video[0].size(0)
        total_acc += (torch.eq(pred.argmax(dim=-1), label)).sum().item()
        loss.backward()
        train_optimizer.step()

        total_num += video[0].size(0)
        train_bar.set_description('Train Epoch: [{}/{}] Loss: {:.4f} Acc: {:.2f}%'
                                  .format(epoch, epochs, total_loss / total_num, total_acc * 100 / total_num))

    return total_loss / total_num, total_acc / total_num


# test for one epoch
def val(model, data_loader):
    model.eval()
    with torch.no_grad():
        total_top_1, total_top_5, total_num = 0, 0, 0
        test_bar = tqdm(data_loader, total=math.ceil(test_data.num_videos / batch_size), dynamic_ncols=True)
        for batch in test_bar:
            video, label = [i.cuda() for i in batch['video']], batch['label'].cuda()
            
            pred = model(video)
            
            #print(pred.argmax(dim=-1).cpu())
            pred_result.extend(pred.argmax(dim=-1).cpu())
            ground_truth.extend(label.cpu())
            #print(label)

            
            total_top_1 += (torch.eq(pred.argmax(dim=-1), label)).sum().item()
            total_top_5 += torch.any(torch.eq(pred.topk(k=2, dim=-1).indices, label.unsqueeze(dim=-1)),
                                     dim=-1).sum().item()
            total_num += video[0].size(0)
            test_bar.set_description('Test Epoch: [{}/{}] | Top-1:{:.2f}% | Top-5:{:.2f}%'
                                     .format(epoch, epochs, total_top_1 * 100 / total_num,
                                             total_top_5 * 100 / total_num))
    return total_top_1 / total_num, total_top_5 / total_num

'''
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Train Model')
    # common args
    parser.add_argument('--data_root', default='data', type=str, help='Datasets root path')
    parser.add_argument('--batch_size', default=8, type=int, help='Number of videos in each mini-batch')
    parser.add_argument('--epochs', default=10, type=int, help='Number of epochs over the model to train')
    parser.add_argument('--save_root', default='result', type=str, help='Result saved root path')
    
# args parse
args = parser.parse_args()
'''


# data prepare
train_data = labeled_video_dataset('{}/train'.format(data_root), make_clip_sampler('random', clip_duration),
                                   transform=train_transform, decode_audio=False)
test_data = labeled_video_dataset('{}/test'.format(data_root),
                                  make_clip_sampler('constant_clips_per_video', clip_duration, 1),
                                  transform=test_transform, decode_audio=False)
train_loader = DataLoader(train_data, batch_size=batch_size, num_workers=8)
test_loader = DataLoader(test_data, batch_size=batch_size, num_workers=8)


#------------------------------------------------------------------------------------------------------------

# model define, loss setup and optimizer config
#slow_fast = create_slowfast(model_num_class=num_classes).cuda()


slow_fast = torch.hub.load('facebookresearch/pytorchvideo:main', model='slowfast_r50', pretrained=True).cuda()

slow_fast.load_state_dict(torch.load('/kaggle/input/pretrained-weight/slow_fast.pth', 'cuda'))

#------------------------------------------------------------------------------------------------------------


loss_criterion = CrossEntropyLoss()
# optimizer = Adam(slow_fast.parameters(), lr=1e-1)
optimizer = SGD(slow_fast.parameters(), lr=0.001, momentum=0.9)

# training loop
results = {'loss': [], 'acc': [], 'top-1': [], 'top-5': []}

pred_result = []
ground_truth = []

if not os.path.exists(save_root):
    os.makedirs(save_root)
best_acc = 0.0
for epoch in range(1, epochs + 1):
    train_loss, train_acc = train(slow_fast, train_loader, optimizer)
    results['loss'].append(train_loss)
    results['acc'].append(train_acc * 100)
    top_1, top_5 = val(slow_fast, test_loader)
    results['top-1'].append(top_1 * 100)
    results['top-5'].append(top_5 * 100)
    
    #pred_result.extend(pred)
    #ground_truth.extend(label)
    
    
    # save statistics
    data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
    data_frame.to_csv('{}/metrics.csv'.format(save_root), index_label='epoch')

    if top_1 > best_acc:
        best_acc = top_1
        torch.save(slow_fast.state_dict(), '{}/slow_fast.pth'.format(save_root))

In [None]:
len(ground_truth), len(pred_result)

In [None]:
#ground_truth[i for i in range(len(ground_truth))].numpy()


a = [i.numpy() for i in ground_truth]
b = [i.numpy() for i in pred_result]

#a.numpy()
#ground_truth[i for i in range(3355)].numpy()

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd

classes = ('non', 'left', 'right')

# Build confusion matrix
cf_matrix = confusion_matrix(a, b)
df_cm = pd.DataFrame(cf_matrix/np.sum(cf_matrix) * 3, index = [i for i in classes],
                     columns = [i for i in classes])
plt.figure(figsize = (3,3))
sn.heatmap(df_cm, annot=True)

In [None]:
/home/k/kai/checkpoint
/home/k/kai/data/all

slowfast_diff_lr.py