In [None]:
from math import sqrt
import numpy as np
from scipy import spatial
from sklearn.preprocessing import normalize
import sklearn.metrics.pairwise as smp
 
def cosine_distance(row1, row2):
    return 1 - spatial.distance.cosine(row1, row2)

def euclidean_distance(row1, row2):
    return np.linalg.norm(np.array(row1) - np.array(row2))
 
def get_neighbors(train, test_row, distance, num_neighbors):
    
    distances = list()
    for i, train_row in enumerate(train):
        dist = distance(test_row, train_row)
        distances.append((train_row, dist))

    sorted_idxs = [x for x,y in sorted(enumerate(distances), key = lambda x: -x[1][1])]
    return sorted_idxs[:num_neighbors]

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import os
import random
import numpy as np
from tqdm import tqdm
from distutils import util

from temporal_transform import TemporalRandomCrop, TemporalCenterCrop
from resnet3d import generate_model
from ucf101 import get_ucf_dataset
from hmdb51 import get_hmdb_dataset
from logger import Logger

logger = Logger(show = True, html_output = True, config_file = "config.txt")

torch.manual_seed(1)
random.seed(1)

frame_size = logger.config_dict['frame_resize']
spatial_transform = transforms.Compose([transforms.Resize((frame_size, frame_size)), transforms.ToTensor()])
logger.log("Frames resized to {}x{}".format(frame_size, frame_size))

sampling_method_str = logger.config_dict['sampling_method']
if "rand" in sampling_method_str:
    crop_size = int(sampling_method_str.replace("rand", ""))
    sampling_method = TemporalRandomCrop(size = crop_size)
    sampling_method = TemporalCenterCrop(size = crop_size)
    logger.log("Sampling strategy selecting {} consecutives frames from a random index".format(
      crop_size))

video_path = os.path.join(logger.config_dict['data_folder'], 
                          logger.config_dict['video_folder'], logger.config_dict['frame_folder'])
annotation_path = logger.get_data_file(logger.config_dict['annotation_file'])
batch_size  = logger.config_dict['batch_size']
num_workers = logger.config_dict['num_workers']
dataset_type = logger.config_dict['dataset_type']

if dataset_type == "ucf101":
    train_dataset = get_ucf_dataset(video_path, annotation_path, "training", sampling_method, 
                                    spatial_transform, temporal_transform = "None",
                                    stack_clip = True, is_simclr_transform = False, 
                                    apply_same_per_clip = True)
    test_dataset = get_ucf_dataset(video_path, annotation_path, "validation", sampling_method, 
                                   spatial_transform, temporal_transform = "None",
                                   stack_clip = True, is_simclr_transform = False, 
                                   apply_same_per_clip = True)
elif dataset_type == "hmdb51":
    train_dataset = get_hmdb_dataset(video_path, annotation_path, "training", sampling_method, 
                                     spatial_transform, temporal_transform = "None",
                                     stack_clip = True, is_simclr_transform = False, 
                                     apply_same_per_clip = True)
    test_dataset = get_hmdb_dataset(video_path, annotation_path, "validation", sampling_method, 
                                    spatial_transform, temporal_transform = "None",
                                    stack_clip = True, is_simclr_transform = False, 
                                    apply_same_per_clip = True)

train_loader  = DataLoader(train_dataset, batch_size = batch_size, shuffle = False, drop_last = False, 
                           num_workers = num_workers)
logger.log("{} Train data loaded with {} clips".format(dataset_type.upper(), len(train_dataset)))

test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False, drop_last = False,
                         num_workers = num_workers)
logger.log("{} Test data loaded with {} clips".format(dataset_type.upper(), len(test_dataset)))

In [None]:
kinet_test = np.load(logger.get_data_file("kinet_ucf01_test.npy"))
videoSimCLR_test = np.load(logger.get_data_file("ucf01_test.npy"))

In [None]:
nei = get_neighbors(videoSimCLR_test, videoSimCLR_test[2666], cosine_distance, 7)
kinet_nei = get_neighbors(kinet_test, kinet_test[2666], cosine_distance, 7)
print(nei)
print(kinet_nei)

In [None]:
videos = {}
#nei = kinet_nei
with torch.no_grad():
    for i, (inputs, labels) in tqdm(enumerate(test_loader)):
      if i*batch_size <= nei[0] and nei[0] < (i+1)*batch_size:
        videos[nei[0]] = inputs[nei[0] - i*batch_size]
      if i*batch_size <= nei[1] and nei[1] < (i+1)*batch_size:
        videos[nei[1]] = inputs[nei[1] - i*batch_size]
      if i*batch_size <= nei[2] and nei[2] < (i+1)*batch_size:
        videos[nei[2]] = inputs[nei[2] - i*batch_size]
      if i*batch_size <= nei[3] and nei[3] < (i+1)*batch_size:
        videos[nei[3]] = inputs[nei[3] - i*batch_size]
      if i*batch_size <= nei[4] and nei[4] < (i+1)*batch_size:
        videos[nei[4]] = inputs[nei[4] - i*batch_size]
      if i*batch_size <= nei[5] and nei[5] < (i+1)*batch_size:
        videos[nei[5]] = inputs[nei[5] - i*batch_size]
      if i*batch_size <= nei[6] and nei[6] < (i+1)*batch_size:
        videos[nei[6]] = inputs[nei[6] - i*batch_size]

In [None]:
idx = 0

from torchvision.utils import save_image
for i in range(4):
  img = videos[nei[idx]].permute(1, 0, 2, 3)[i*4]
  save_image(img, "output/kinet" + "f" + str(i*4) + "_" + str(nei[idx]) + ".png")

In [None]:
y_test = np.load(logger.get_data_file("y_test_labels.npy"))