In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import os
import random
import numpy as np
from tqdm import tqdm
from distutils import util

from temporal_transform import TemporalRandomCrop
from resnet3d import generate_model
from ucf101 import get_ucf_dataset
from hmdb51 import get_hmdb_dataset
from logger import Logger

In [None]:
logger = Logger(show = True, html_output = True, config_file = "config.txt")

torch.manual_seed(1)
random.seed(1)

frame_size = logger.config_dict['frame_resize']
spatial_transform = transforms.Compose([transforms.Resize((frame_size, frame_size)), transforms.ToTensor()])
logger.log("Frames resized to {}x{}".format(frame_size, frame_size))

sampling_method_str = logger.config_dict['sampling_method']
if "rand" in sampling_method_str:
    crop_size = int(sampling_method_str.replace("rand", ""))
    sampling_method = TemporalRandomCrop(size = crop_size)
    logger.log("Sampling strategy selecting {} consecutives frames from a random index".format(
      crop_size))

In [None]:
video_path = os.path.join(logger.data_folder, 
                          logger.config_dict['video_folder'], logger.config_dict['frame_folder'])
annotation_path = logger.get_data_file(logger.config_dict['annotation_file'])
batch_size  = logger.config_dict['batch_size']
num_workers = logger.config_dict['num_workers']
dataset_type = logger.config_dict['dataset_type']

if dataset_type == "ucf101":
    train_dataset = get_ucf_dataset(video_path, annotation_path, "training", sampling_method, 
                                    spatial_transform, temporal_transform = "None",
                                    stack_clip = True, is_simclr_transform = False, 
                                    apply_same_per_clip = True)
    test_dataset = get_ucf_dataset(video_path, annotation_path, "validation", sampling_method, 
                                   spatial_transform, temporal_transform = "None",
                                   stack_clip = True, is_simclr_transform = False, 
                                   apply_same_per_clip = True)
elif dataset_type == "hmdb51":
    train_dataset = get_hmdb_dataset(video_path, annotation_path, "training", sampling_method, 
                                     spatial_transform, temporal_transform = "None",
                                     stack_clip = True, is_simclr_transform = False, 
                                     apply_same_per_clip = True)
    test_dataset = get_hmdb_dataset(video_path, annotation_path, "validation", sampling_method, 
                                    spatial_transform, temporal_transform = "None",
                                    stack_clip = True, is_simclr_transform = False, 
                                    apply_same_per_clip = True)

train_loader  = DataLoader(train_dataset, batch_size = batch_size, shuffle = False, drop_last = False, 
                           num_workers = num_workers)
logger.log("{} Train data loaded with {} clips".format(dataset_type.upper(), len(train_dataset)))

test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False, drop_last = False,
                         num_workers = num_workers)
logger.log("{} Test data loaded with {} clips".format(dataset_type.upper(), len(test_dataset)))

In [None]:
base_model = logger.config_dict['base_convnet']
output_dim = logger.config_dict['simclr_out_dim']
model = generate_model(model_depth = 18, add_projection_layers = True, projection_dim = output_dim)
logger.log("Model {} 3D simCLR loaded {}".format(base_model, model))

In [None]:
checkpoint_epoch = logger.config_dict['model_checkpoint_epoch']
checkpoint_file  = logger.config_dict['model_checkpoint_file']

use_kinet = bool(util.strtobool(logger.config_dict['use_kinet']))

if use_kinet:
    logger.log("Loading kinetics pretrained model weights at epoch {} from {}".format(
      checkpoint_epoch, checkpoint_file))
    checkpoint = torch.load(logger.get_model_file(checkpoint_file))
    msg = model.load_state_dict(checkpoint['state_dict'], strict = False)
    assert set(msg.missing_keys) == {"fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias"}
    checkpoint_epoch = 0
else:    
    training_batch_size = int(checkpoint_file.split("_")[1].replace("b", ""))
    logger.log("Loading simCLR model weights at epoch {} with {} training batchsize from {}".format(
        checkpoint_epoch, training_batch_size, checkpoint_file))
    checkpoint = torch.load(logger.get_model_file(checkpoint_file))

    try:
        model.load_state_dict(checkpoint['model'])
    except:
        model = nn.DataParallel(model)
        model.load_state_dict(checkpoint['model'])

    temporal_transform_str = logger.config_dict['temporal_transform_type']
    step = logger.config_dict['temporal_transform_step']

    if temporal_transform_str != "None":
        logger.log("Using {} as temporal transform with step {}".format(temporal_transform_str, step))

In [None]:
gpu_avail = torch.cuda.is_available()
logger.log("GPU available: {}".format(gpu_avail))
if gpu_avail:
    model = model.cuda()

In [None]:
model = model.eval()
logger.log("Generating SimCLR features from training data ...")
X_train_feature = []
y_train = []

with torch.no_grad():
    for i, (inputs, labels) in tqdm(enumerate(train_loader)):

        #print(inputs.size())

        if gpu_avail:
            inputs = inputs.cuda()

        features, _ = model(inputs)
        X_train_feature.extend(features.detach().cpu().numpy())
        y_train.extend(labels.numpy())
    
X_train_feature = np.array(X_train_feature)
logger.log("Train features of shape: {}".format(X_train_feature.shape), show_time = True)

'''
train_feats_filename  = base_model + "_" + str(training_batch_size) + "b_" + str(frame_size) + "f_" + str(temporal_shift)
train_feats_filename += "s_" + "x_train_feats" + "_" + dataset_type + "_" + time_crop_type + "_3d.npy"

train_feats_filename = logger.get_output_file(train_feats_filename)
logger.log("Saving training data simCLR features at {}".format(train_feats_filename))
np.save(train_feats_filename, X_train_feature)
'''

In [None]:
model = model.eval()
logger.log("Generating SimCLR features from testing data ...")
X_test_feature = []
y_test = []

with torch.no_grad():
    for i, (inputs, labels) in tqdm(enumerate(test_loader)):
        if gpu_avail:
            inputs = inputs.cuda()

        features, _ = model(inputs)
        X_test_feature.extend(features.detach().cpu().numpy())
        y_test.extend(labels.numpy())

X_test_feature = np.array(X_test_feature)
logger.log("Test features of shape: {}".format(X_test_feature.shape), show_time = True)

'''
test_feats_filename  = base_model + "_" + str(training_batch_size) + "b_" + str(frame_size) + "f_" + str(temporal_shift) 
test_feats_filename += "s_" + "x_test_feats" + "_" + dataset_type + "_" + time_crop_type + "_3d.npy"

test_feats_filename = logger.get_output_file(test_feats_filename)
logger.log("Saving testing data simCLR features at {}".format(test_feats_filename))
np.save(test_feats_filename, X_test_feature)
'''

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing

clf = LogisticRegression(random_state=0, max_iter=2000, solver='lbfgs', C=1.0, n_jobs = -1)
scaler = preprocessing.StandardScaler()
scaler.fit(X_train_feature)

logger.log("Start training LogisticRegression on SimCLR features ...")
clf.fit(scaler.transform(X_train_feature), y_train)
logger.log("Finished training", show_time = True)

In [None]:
training_batch_size = None
logger.log("SimCLR with {} batchsize feature evaluation on {}".format(training_batch_size, dataset_type))
logger.log("Train score: {:.4f}".format(clf.score(scaler.transform(X_train_feature), y_train)))
logger.log("Test score: {:.4f}".format(clf.score(scaler.transform(X_test_feature), y_test)))