In [None]:
# Load database

In [None]:
%cd /content/drive/MyDrive/video-fingerprinting/

/content/drive/MyDrive/video-fingerprinting


In [None]:
!pip install scann
!pip install efficientnet_pytorch

In [None]:
import numpy as np
import cv2
from os import path as osp
import glob
from PIL import Image
import scann
import time
import torch
import pickle
from tqdm import tqdm
from torchvision import datasets, models, transforms

In [None]:
# Prepairing database for model

In [None]:
data_transform =  transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
# Making fingerprints

In [None]:
def extract_fp(name):
    video_pth = glob.glob(osp.join('vcdb-dataset', name+'.*'))[0]
    cap= cv2.VideoCapture(video_pth)
    i = 0
    video_fp = []
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == False:
            break
        i+=1
        if i % 30 == 0:
            frame  = Image.fromarray(frame)
            frame = data_transform(frame).unsqueeze(0)
            with torch.no_grad():
                fp = model(frame.cuda())
            video_fp.append(fp.cpu().numpy())
    cap.release()
    cv2.destroyAllWindows()
    return np.array(video_fp)

In [None]:
# Efficient model

In [None]:
# model = models.vgg16(pretrained=True).cuda()
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b0').cuda()

In [None]:
# Training

In [None]:
with open('ndvr-dml/datasets/vcdb.pickle', 'rb') as f:
    vcdb_info = pickle.load(f)
result_vl = []
result_fl = []
for name in tqdm(vcdb_info['index'][:528]):
    fp = extract_fp(name)
    result_vl.append(fp.mean(0))
    result_fl.append(fp)
result_vl = np.array(result_vl)
result_fl = np.array(result_fl)
with open('fps_efficient_b0.npy', 'wb') as f:
    np.save(f, result_vl)
with open('fps_efficient_b0-fl.npy', 'wb') as f:
    np.save(f, result_fl)

In [None]:
dataset = np.load('fps_efficient_b0.npy').squeeze()

In [None]:
# Data normalization

In [None]:
normalized_dataset = dataset / np.linalg.norm(dataset, axis=1)[:, np.newaxis]

In [None]:
# ScaNN search algorithm

In [None]:
searcher = scann.scann_ops_pybind.builder(normalized_dataset, 528, "dot_product").tree(
    num_leaves=528, num_leaves_to_search=528, training_sample_size=528).score_ah(
    2, anisotropic_quantization_threshold=0.2).reorder(528).build()

In [None]:
start = time.time()
neighbors, distances = searcher.search_batched(dataset)
end = time.time()
print("Time:", end - start)

Time: 0.28555917739868164


In [None]:
# Finding similar videos

In [None]:
our_labels = np.zeros((528, 528))
for i in range(528):
    for j in range(528):
        if distances[i, j] > 4: #distance > 4  -> for not missing a positive video
            our_labels[i, neighbors[i, j]] = 1

In [None]:
import glob
import numpy as np
import time
gt_labels = np.zeros((528, 528))
for i in range(528):
    gt_labels[i, i] = 1
ann_pths = glob.glob('annotation/*.*')
for ann_pth in ann_pths:
    with open(ann_pth, 'r') as f:
        for line in f.readlines():
            pair = line.strip().split(',')
            i = vcdb_info['index'].index(pair[0].split('.')[0])
            j = vcdb_info['index'].index(pair[1].split('.')[0])
            gt_labels[i, j] = 1

In [None]:
accuracy = (gt_labels == our_labels).sum() * 100 / gt_labels.size
accuracy

39.20992596418733

In [None]:
recall = 0
posetives = np.where(gt_labels == 1)
for i in range(len(posetives[0])):
    x = posetives[0][i]
    y = posetives[1][i]
    recall += our_labels[x, y]
recall = recall / len(posetives[0]) * 100
recall

78.74592833876221

In [None]:
# Marking a lot of video as a copy  ->  start finding real copies ->  Finding overlap by searching more toughly in fram levels

In [None]:
...

In [None]:
#added
for i in 