In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import os
import random
import numpy as np
from tqdm import tqdm
from distutils import util

from temporal_transform import TemporalRandomCrop
from resnet3d import generate_model
from ucf101 import get_ucf_dataset
from hmdb51 import get_hmdb_dataset
from logger import Logger

In [2]:
logger = Logger(show = True, html_output = True, config_file = "config.txt")

torch.manual_seed(1)
random.seed(1)

frame_size = logger.config_dict['frame_resize']
spatial_transform = transforms.Compose([transforms.Resize((frame_size, frame_size)), transforms.ToTensor()])
logger.log("Frames resized to {}x{}".format(frame_size, frame_size))

sampling_method_str = logger.config_dict['sampling_method']
if "rand" in sampling_method_str:
    crop_size = int(sampling_method_str.replace("rand", ""))
    sampling_method = TemporalRandomCrop(size = crop_size)
    logger.log("Sampling strategy selecting {} consecutives frames from a random index".format(
      crop_size))

2020.05.29-18:56:08 Initialize the logger
2020.05.29-18:56:08 Create logs folder logs
2020.05.29-18:56:08 Create log file logs\2020-05-29_18_56_08_log0.html
2020.05.29-18:56:08 Read config file config.txt
2020.05.29-18:56:08 Create models folder models
2020.05.29-18:56:08 Create output folder output
2020.05.29-18:56:08 Create data folder data
[2020.05.29-18:56:08] Frames resized to 56x56
[2020.05.29-18:56:08] Sampling strategy selecting 16 consecutives frames from a random index


In [3]:
video_path = os.path.join(logger.data_folder, 
                          logger.config_dict['video_folder'], logger.config_dict['frame_folder'])
annotation_path = logger.get_data_file(logger.config_dict['annotation_file'])
batch_size  = logger.config_dict['batch_size']
num_workers = logger.config_dict['num_workers']
dataset_type = logger.config_dict['dataset_type']

if dataset_type == "ucf101":
    train_dataset = get_ucf_dataset(video_path, annotation_path, "training", sampling_method, 
                                    spatial_transform, temporal_transform = "None",
                                    stack_clip = True, is_simclr_transform = False, 
                                    apply_same_per_clip = True)
    test_dataset = get_ucf_dataset(video_path, annotation_path, "validation", sampling_method, 
                                   spatial_transform, temporal_transform = "None",
                                   stack_clip = True, is_simclr_transform = False, 
                                   apply_same_per_clip = True)
elif dataset_type == "hmdb51":
    train_dataset = get_hmdb_dataset(video_path, annotation_path, "training", sampling_method, 
                                     spatial_transform, temporal_transform = "None",
                                     stack_clip = True, is_simclr_transform = False, 
                                     apply_same_per_clip = True)
    test_dataset = get_hmdb_dataset(video_path, annotation_path, "validation", sampling_method, 
                                    spatial_transform, temporal_transform = "None",
                                    stack_clip = True, is_simclr_transform = False, 
                                    apply_same_per_clip = True)

train_loader  = DataLoader(train_dataset, batch_size = batch_size, shuffle = False, drop_last = False, 
                           num_workers = num_workers)
logger.log("{} Train data loaded with {} clips".format(dataset_type.upper(), len(train_dataset)))

test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False, drop_last = False,
                         num_workers = num_workers)
logger.log("{} Test data loaded with {} clips".format(dataset_type.upper(), len(test_dataset)))

dataset loading [0/9537]
dataset loading [1000/9537]
dataset loading [2000/9537]
dataset loading [3000/9537]
dataset loading [4000/9537]
dataset loading [5000/9537]
dataset loading [6000/9537]
dataset loading [7000/9537]
dataset loading [8000/9537]
dataset loading [9000/9537]
[2020.05.29-18:56:19] UCF101 Train data loaded with 9537 clips
dataset loading [0/3783]
dataset loading [1000/3783]
dataset loading [2000/3783]
dataset loading [3000/3783]
[2020.05.29-18:56:21] UCF101 Test data loaded with 3783 clips


In [4]:
base_model = logger.config_dict['base_convnet']
output_dim = logger.config_dict['simclr_out_dim']
model = generate_model(model_depth = 18, add_projection_layers = True, projection_dim = output_dim)
logger.log("Model {} 3D simCLR loaded {}".format(base_model, model))

[2020.05.29-18:56:22] Model resnet18 3D simCLR loaded ResNet(
  (conv1): Conv3d(3, 64, kernel_size=(7, 7, 7), stride=(1, 2, 2), padding=(3, 3, 3), bias=False)
  (bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool3d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (bn2): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (bn1): BatchNorm3d(64, eps=1e-05

In [6]:
checkpoint_epoch = logger.config_dict['model_checkpoint_epoch']
checkpoint_file  = logger.config_dict['model_checkpoint_file']

use_kinet = bool(util.strtobool(logger.config_dict['use_kinet']))

if use_kinet:
    logger.log("Loading kinetics pretrained model weights at epoch {} from {}".format(
      checkpoint_epoch, checkpoint_file))
    checkpoint = torch.load(logger.get_model_file(checkpoint_file))
    msg = model.load_state_dict(checkpoint['state_dict'], strict = False)
    assert set(msg.missing_keys) == {"fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias"}
    checkpoint_epoch = 0
else:    
    training_batch_size = int(checkpoint_file.split("_")[1].replace("b", ""))
    logger.log("Loading simCLR model weights at epoch {} with {} training batchsize from {}".format(
        checkpoint_epoch, training_batch_size, checkpoint_file))
    checkpoint = torch.load(logger.get_model_file(checkpoint_file))

    try:
        model.load_state_dict(checkpoint['model'])
    except:
        model = nn.DataParallel(model)
        model.load_state_dict(checkpoint['model'])

    temporal_transform_str = logger.config_dict['temporal_transform_type']
    step = logger.config_dict['temporal_transform_step']

    if temporal_transform_str != "None":
        logger.log("Using {} as temporal transform with step {}".format(temporal_transform_str, step))

[2020.05.29-18:56:26] Loading model weights at epoch 100 with 256 training batchsize from resnet18_256b_ucf101_56f_rand16_3d_e100_l175.80_2020-05-29_15_13_22.ptm


<All keys matched successfully>

In [5]:
gpu_avail = torch.cuda.is_available()
logger.log("GPU available: {}".format(gpu_avail))
if gpu_avail:
    model = model.cuda()

[2020.05.29-18:56:23] GPU available: True


In [7]:
model = model.eval()
logger.log("Generating SimCLR features from training data ...")
X_train_feature = []
y_train = []

with torch.no_grad():
    for i, (inputs, labels) in tqdm(enumerate(train_loader)):

        #print(inputs.size())

        if gpu_avail:
            inputs = inputs.cuda()

        features, _ = model(inputs)
        X_train_feature.extend(features.detach().cpu().numpy())
        y_train.extend(labels.numpy())
    
X_train_feature = np.array(X_train_feature)
logger.log("Train features of shape: {}".format(X_train_feature.shape), show_time = True)

'''
train_feats_filename  = base_model + "_" + str(training_batch_size) + "b_" + str(frame_size) + "f_" + str(temporal_shift)
train_feats_filename += "s_" + "x_train_feats" + "_" + dataset_type + "_" + time_crop_type + "_3d.npy"

train_feats_filename = logger.get_output_file(train_feats_filename)
logger.log("Saving training data simCLR features at {}".format(train_feats_filename))
np.save(train_feats_filename, X_train_feature)
'''

[2020.05.29-18:56:27] Generating SimCLR features from training data ...


149it [06:59,  2.81s/it]

[2020.05.29-19:03:31] Train features of shape: (9536, 512) [423.30s]
[2020.05.29-19:03:31] Saving training data simCLR features at output\resnet18_256b_56f_x_train_feats_ucf101_rand16_3d.npy





In [8]:
model = model.eval()
logger.log("Generating SimCLR features from testing data ...")
X_test_feature = []
y_test = []

with torch.no_grad():
    for i, (inputs, labels) in tqdm(enumerate(test_loader)):
        if gpu_avail:
            inputs = inputs.cuda()

        features, _ = model(inputs)
        X_test_feature.extend(features.detach().cpu().numpy())
        y_test.extend(labels.numpy())

X_test_feature = np.array(X_test_feature)
logger.log("Test features of shape: {}".format(X_test_feature.shape), show_time = True)

'''
test_feats_filename  = base_model + "_" + str(training_batch_size) + "b_" + str(frame_size) + "f_" + str(temporal_shift) 
test_feats_filename += "s_" + "x_test_feats" + "_" + dataset_type + "_" + time_crop_type + "_3d.npy"

test_feats_filename = logger.get_output_file(test_feats_filename)
logger.log("Saving testing data simCLR features at {}".format(test_feats_filename))
np.save(test_feats_filename, X_test_feature)
'''

[2020.05.29-19:03:31] Generating SimCLR features from testing data ...


59it [03:20,  3.39s/it]

[2020.05.29-19:07:05] Test features of shape: (3776, 512) [214.36s]
[2020.05.29-19:07:05] Saving testing data simCLR features at output\resnet18_256b_56f_x_test_feats_ucf101_rand16_3d.npy





In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing

clf = LogisticRegression(random_state=0, max_iter=2000, solver='lbfgs', C=1.0, n_jobs = -1)
scaler = preprocessing.StandardScaler()
scaler.fit(X_train_feature)

logger.log("Start training LogisticRegression on SimCLR features ...")
clf.fit(scaler.transform(X_train_feature), y_train)
logger.log("Finished training", show_time = True)

[2020.05.29-19:07:12] Start training LogisticRegression on SimCLR features ...
[2020.05.29-19:09:27] Finished training [134.66s]


In [10]:
training_batch_size = None
logger.log("SimCLR with {} batchsize feature evaluation on {}".format(training_batch_size, dataset_type))
logger.log("Train score: {:.4f}".format(clf.score(scaler.transform(X_train_feature), y_train)))
logger.log("Test score: {:.4f}".format(clf.score(scaler.transform(X_test_feature), y_test)))

[2020.05.29-19:09:27] SimCLR with 256 batchsize feature evaluation
[2020.05.29-19:09:27] Train score: 0.996
[2020.05.29-19:09:27] Test score: 0.329
