In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import os
import random
import numpy as np
from tqdm import tqdm
import numpy as np

from temporal_transform import TemporalRandomCrop, TemporalCenterCrop
from resnet3d import generate_model
from ucf101 import get_ucf_dataset
from hmdb51 import get_hmdb_dataset
from logger import Logger

In [None]:
logger = Logger(show = True, html_output = True)

torch.manual_seed(1)
random.seed(1)

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

frame_size = logger.config_dict['frame_resize']
spatial_transform = transforms.Compose([transforms.Resize((frame_size, frame_size)), 
                                         transforms.ToTensor(), normalize])
logger.log("Frames resized to {}x{}".format(frame_size, frame_size))

sampling_method_str = logger.config_dict['sampling_method']
if "rand" in sampling_method_str:
    crop_size = int(sampling_method_str.replace("rand", ""))
    sampling_method = TemporalRandomCrop(size = crop_size)
    logger.log("Sampling strategy selecting {} consecutives frames from a random index".format(
      crop_size))

In [None]:
video_path = os.path.join(logger.data_folder, 
                          logger.config_dict['video_folder'], logger.config_dict['frame_folder'])
#video_path = "../data/hmdb51_videos/jpg"
annotation_path = logger.get_data_file(logger.config_dict['annotation_file'])
batch_size  = logger.config_dict['batch_size']
num_workers = logger.config_dict['num_workers']
dataset_type = logger.config_dict['dataset_type']

if dataset_type == "ucf101":
    train_dataset = get_ucf_dataset(video_path, annotation_path, "training", sampling_method, 
                                    spatial_transform, temporal_transform = "None",
                                    stack_clip = True, is_moco_transform = False, 
                                    apply_same_per_clip = True)
    test_dataset = get_ucf_dataset(video_path, annotation_path, "validation", sampling_method, 
                                   spatial_transform, temporal_transform = "None",
                                   stack_clip = True, is_moco_transform = False, 
                                   apply_same_per_clip = True)
elif dataset_type == "hmdb51":
    train_dataset = get_hmdb_dataset(video_path, annotation_path, "training", sampling_method, 
                                     spatial_transform, temporal_transform = "None",
                                     stack_clip = True, is_moco_transform = False, 
                                     apply_same_per_clip = True)
    test_dataset = get_hmdb_dataset(video_path, annotation_path, "validation", sampling_method, 
                                    spatial_transform, temporal_transform = "None",
                                    stack_clip = True, is_moco_transform = False, 
                                    apply_same_per_clip = True)

    
logger.log("Batch size {}".format(batch_size))
train_loader  = DataLoader(train_dataset, batch_size = batch_size, shuffle = False, drop_last = False, 
                           num_workers = num_workers)
logger.log("{} Train data loaded with {} clips".format(dataset_type.upper(), len(train_dataset)))

test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False, drop_last = False,
                         num_workers = num_workers)
logger.log("{} Test data loaded with {} clips".format(dataset_type.upper(), len(test_dataset)))

In [None]:
base_model = logger.config_dict['base_convnet']
num_classes = logger.config_dict['num_classes']
model = generate_model(model_depth = 18, n_classes = num_classes)
logger.log("Model {} 3D simCLR loaded {}".format(base_model, model))

In [None]:
checkpoint_epoch = logger.config_dict['model_checkpoint_epoch']
checkpoint_file  = logger.config_dict['model_checkpoint_file']
checkpoint = torch.load(logger.get_model_file(checkpoint_file), map_location="cpu")
logger.log("Loading MOCO model weights at epoch {} from {}".format(checkpoint_epoch, checkpoint_file))

In [None]:
state_dict = checkpoint['state_dict']

for k in list(state_dict.keys()):
    # retain only encoder_q up to before the embedding layer
    if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'):
    # remove prefix
        state_dict[k[len("module.encoder_q."):]] = state_dict[k]

    # delete renamed or unused k
    del state_dict[k]

msg = model.load_state_dict(state_dict, strict=False)
assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}

In [None]:
for name, param in model.named_parameters():
    if name not in ['fc.weight', 'fc.bias']:
        param.requires_grad = False

In [None]:
model.fc = nn.Identity()
gpu_avail = torch.cuda.is_available()
logger.log("GPU available: {}".format(gpu_avail))
if gpu_avail:
    model = model.cuda()

In [None]:
model = model.eval()
logger.log("Generating MOCO features from training data ...")
X_train_feature = []
y_train = []

for i, (inputs, labels) in tqdm(enumerate(train_loader)):
    if gpu_avail:
        inputs = inputs.cuda()
            
    features = model(inputs)
    X_train_feature.extend(features.detach().cpu().numpy())
    y_train.extend(labels.numpy())

    
X_train_feature = np.array(X_train_feature)
logger.log("Train features of shape: {}".format(X_train_feature.shape), show_time = True)

In [None]:
model = model.eval()
logger.log("Generating MOCO features from testing data ...")
X_test_feature = []
y_test = []

for i, (inputs, labels) in tqdm(enumerate(test_loader)):
    if gpu_avail:
        inputs = inputs.cuda()
            
    features = model(inputs)
    X_test_feature.extend(features.detach().cpu().numpy())
    y_test.extend(labels.numpy())

X_test_feature = np.array(X_test_feature)
logger.log("Test features of shape: {}".format(X_test_feature.shape), show_time = True)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing

clf = LogisticRegression(random_state=0, max_iter=1000, solver='lbfgs', C=1.0, n_jobs = -1)
scaler = preprocessing.StandardScaler()
scaler.fit(X_train_feature)

logger.log("Start training LogisticRegression on MOCO features ...")
clf.fit(scaler.transform(X_train_feature), y_train)
logger.log("Finished training", show_time = True)

In [None]:
logger.log("MOCO feature evaluation on {}".format(dataset_type))
logger.log("Train score: {:.3f}".format(clf.score(scaler.transform(X_train_feature), y_train)))
logger.log("Test score: {:.3f}".format(clf.score(scaler.transform(X_test_feature), y_test)))