In [1]:
import os
import sys
import json
import numpy as np
import torch
from torch import nn
from torch import optim

from opts import parse_opts
from mean import get_mean, get_std
from spatial_transforms import (
    Compose, Normalize, Scale, CenterCrop, CornerCrop, MultiScaleCornerCrop,
    MultiScaleRandomCrop, RandomHorizontalFlip, ToTensor)
from temporal_transforms import LoopPadding, TemporalRandomCrop
from target_transforms import ClassLabel, VideoID
from target_transforms import Compose as TargetCompose
from dataset import get_training_set, get_validation_set, get_test_set
from utils import Logger
from train import train_epoch
from validation import val_epoch
import collections
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from joblib import dump, load
from sklearn import preprocessing
from scipy import stats
from sklearn.metrics import accuracy_score

from models import *

In [2]:
os.environ['CUDA_VISIBLE_DEVICES']='3,4'

In [3]:
# set hyperparameters 

import easydict
opt = easydict.EasyDict({
    "root_path": '',
    "video_path": 'video_kinetics_jpg',
    "annotation_path": 'kinetics.json',
    "result_path": 'results',
    "dataset": 'ucf101-music', # 'ucf101',
    "n_classes": 9, # 101, 
    "n_finetune_classes": 9, # 101,
    "sample_size": 112,
    "sample_duration": 16,
    "initial_scale": 1.0,
    "n_scales": 5,
    "scale_step": 0.84089641525,
    "train_crop": 'corner',
    "learning_rate": 0.1,
    "momentum": 0.9,
    "dampening": 0.9,
    "weight_decay": 0.001,
    "mean_dataset": 'activitynet',
    "no_mean_norm": False,
    "std_norm": False,
    "nesterov": False,
    "optimizer": 'sgd',
    "lr_patience": 10,
    "batch_size": 32,
    "n_epochs": 20,
    "begin_epoch": 1,
    "n_val_samples": 3,
    "resume_path": '',
    "pretrain_path": '',
    "ft_begin_index": 5,
    "no_train": False,
    "no_val": False,
    "test": False,
    "test_subset": 'val',
    "scale_in_test": 1.0,
    "crop_position_in_test": 'c',
    "no_softmax_in_test": False,
    "no_cuda": False,
    "n_threads": 4,
    "checkpoint": 2,
    "no_hflip": False,
    "norm_value": 1,
    "model": 'resnet',
    "model_depth": 101,
    "resnet_shortcut": 'B',
    "wide_resnet_k": 2,
    "resnext_cardinality": 32,
    "manual_seed": 1
})
opt.arch = '{}-{}'.format(opt.model, opt.model_depth)

In [4]:
# construct model architecture
from models import resnext
model_name = 'resnext-101-kinetics'
model = resnext.resnet101(
            num_classes=9,
            shortcut_type='B',
            cardinality=32,
            sample_size=112,
            sample_duration=16)

  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')


In [5]:
# load pretrained weight
model = model.cuda()
# wrap the current model again in nn.DataParallel / or we can just remove the .module keys.
model = nn.DataParallel(model, device_ids=None)
# filter out unnecessary keys
pretrain = torch.load('./results/save_14.pth')
model.load_state_dict(pretrain['state_dict'])

<All keys matched successfully>

In [6]:
# load preprocessed video frames and annotation
from datasets.ucf101 import UCF101
root_path = '/data/qq/CSCE689/video/'
video_path = root_path + 'UCF-music/'  # 'UCF-101-jpg/' 
annotation_path = root_path+'ucfTrainTestlist/ucf101_01_music.json'

sample_size = 112 # res3d
sample_duration = 16 # for res3d
norm_value = 1
mean = get_mean(norm_value, dataset='kinetics')
std = get_std(norm_value)
norm_method = Normalize(mean, [1,1,1])

batch_size = 32
n_threads = 6

spatial_transform = Compose([
    Scale(sample_size),
    CornerCrop(sample_size, 'c'),
    ToTensor(norm_value), norm_method
])

temporal_transform = LoopPadding(sample_duration)
target_transform = ClassLabel() # VideoID()

# get test data
test_data = UCF101(
    video_path,
    annotation_path,
    'validation',
    0,
    spatial_transform=spatial_transform,
    temporal_transform=temporal_transform,
    target_transform=target_transform,
    sample_duration=16)


# wrap test data
test_loader = torch.utils.data.DataLoader(
    test_data,
    batch_size=batch_size,
    shuffle=False,
    num_workers=n_threads,
    pin_memory=False) # True

dataset loading [0/356]


In [7]:
from testing import final_test
all_output_buffer = final_test(test_loader, model, opt, test_data.class_names)

test


  inputs = Variable(inputs, volatile=True)
  outputs = F.softmax(outputs)


[1/186]	Time 6.708 (6.708)	Data 1.445 (1.445)	
[2/186]	Time 0.237 (3.472)	Data 0.016 (0.731)	
[3/186]	Time 0.153 (2.366)	Data 0.023 (0.495)	
[4/186]	Time 0.151 (1.812)	Data 0.016 (0.375)	
[5/186]	Time 0.145 (1.479)	Data 0.012 (0.302)	
[6/186]	Time 0.143 (1.256)	Data 0.009 (0.253)	
[7/186]	Time 0.144 (1.097)	Data 0.011 (0.219)	
[8/186]	Time 0.146 (0.978)	Data 0.016 (0.193)	
[9/186]	Time 0.152 (0.886)	Data 0.016 (0.174)	
[10/186]	Time 0.138 (0.812)	Data 0.009 (0.157)	
[11/186]	Time 0.142 (0.751)	Data 0.009 (0.144)	
[12/186]	Time 0.149 (0.701)	Data 0.012 (0.133)	
[13/186]	Time 0.162 (0.659)	Data 0.019 (0.124)	
[14/186]	Time 0.159 (0.623)	Data 0.015 (0.116)	
[15/186]	Time 0.170 (0.593)	Data 0.019 (0.110)	
[16/186]	Time 0.149 (0.565)	Data 0.015 (0.104)	
[17/186]	Time 0.153 (0.541)	Data 0.016 (0.099)	
[18/186]	Time 0.142 (0.519)	Data 0.011 (0.094)	
[19/186]	Time 0.146 (0.499)	Data 0.011 (0.089)	
[20/186]	Time 0.149 (0.482)	Data 0.014 (0.086)	
[21/186]	Time 0.156 (0.466)	Data 0.014 (0.082)	
[

[171/186]	Time 0.149 (0.229)	Data 0.014 (0.059)	
[172/186]	Time 0.154 (0.228)	Data 0.021 (0.059)	
[173/186]	Time 0.152 (0.228)	Data 0.019 (0.059)	
[174/186]	Time 0.149 (0.227)	Data 0.009 (0.059)	
[175/186]	Time 0.379 (0.228)	Data 0.241 (0.060)	
[176/186]	Time 0.156 (0.228)	Data 0.022 (0.059)	
[177/186]	Time 0.157 (0.227)	Data 0.009 (0.059)	
[178/186]	Time 0.156 (0.227)	Data 0.022 (0.059)	
[179/186]	Time 0.160 (0.227)	Data 0.021 (0.059)	
[180/186]	Time 0.151 (0.226)	Data 0.008 (0.058)	
[181/186]	Time 0.427 (0.227)	Data 0.288 (0.060)	
[182/186]	Time 0.157 (0.227)	Data 0.023 (0.060)	
[183/186]	Time 0.155 (0.226)	Data 0.011 (0.059)	
[184/186]	Time 0.146 (0.226)	Data 0.012 (0.059)	
[185/186]	Time 0.150 (0.226)	Data 0.020 (0.059)	
[186/186]	Time 0.145 (0.225)	Data 0.016 (0.059)	


In [None]:
with open(os.path.join(opt.result_path, 'val.json'), 'r') as f:
      qq= json.load(f)