### Global imports

In [None]:
import os
from datetime import datetime
from math import floor
import torch
import wandb as wb
from packages.video_utils import H264Extractor, Video
from packages.constants import GOP_SIZE, FRAME_HEIGHT, FRAME_WIDTH, DATASET_ROOT, N_GOPS_FROM_DIFFERENT_DEVICE, N_GOPS_FROM_SAME_DEVICE
from packages.dataset import VisionGOPDataset, GopPairDataset
from packages.common import create_custom_logger
from packages.network import H4vdmNet

### Initialize stuff

In [None]:
if not os.path.exists(DATASET_ROOT):
    raise Exception(f'Dataset root does not exist: {DATASET_ROOT}')

log = create_custom_logger('h4vdm.ipynb')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
log.info(f'Using device: {device}')

### Load GOP dataset

Remember to delete dataset.json if you want to add new devices/videos

In [None]:
bin_path = os.path.abspath(os.path.join(os.getcwd(), 'h264-extractor', 'bin'))
h264_ext_bin = os.path.join(bin_path, 'h264dec_ext_info')
h264_extractor = H264Extractor(bin_filename=h264_ext_bin, cache_dir=DATASET_ROOT)
Video.set_h264_extractor(h264_extractor)

dataset = VisionGOPDataset(
    root_path=DATASET_ROOT,
    devices=[],
    media_types = ['videos'],
    properties=['flat'],
    extensions=['mp4'],
    gop_size=GOP_SIZE,
    frame_width=FRAME_WIDTH,
    frame_height=FRAME_HEIGHT,
    gops_per_video=4,
    build_on_init=False,
    force_rebuild=False,
    download_on_init=False,
    ignore_local_dataset=False,
    shuffle=False)

is_loaded = dataset.load()
if not is_loaded:
    log.info('Dataset was not loaded. Building...')
else:
    log.info('Dataset was loaded.')

print(f'Dataset length: {len(dataset)}')

### Create training and testing datasets

In [None]:
from random import shuffle
devices = list(dataset.get_devices())

print(f'All devices: {devices}')

shuffle(devices)
testing_set_1_devices = devices[:len(devices)//2]
training_set_1_devices = devices[len(devices)//2:]

shuffle(devices)
testing_set_2_devices = devices[:len(devices)//2]
training_set_2_devices = devices[len(devices)//2:]

shuffle(devices)
testing_set_3_devices = devices[:len(devices)//2]
training_set_3_devices = devices[len(devices)//2:]

shuffle(devices)
testing_set_4_devices = devices[:len(devices)//2]
training_set_4_devices = devices[len(devices)//2:]

shuffle(devices)
testing_set_5_devices = devices[:len(devices)//3]
training_set_5_devices = devices[len(devices)//3:]

testing_set_6_devices = devices[len(devices)//3:2*(len(devices)//3)]
training_set_6_devices = devices[:len(devices)//3] + devices[2*(len(devices)//3):]

testing_set_7_devices = devices[2*(len(devices)//3):]
training_set_7_devices = devices[:2*(len(devices)//3)]

training_set_devices = [training_set_1_devices, training_set_2_devices, training_set_3_devices, training_set_4_devices, training_set_5_devices, training_set_6_devices, training_set_7_devices]
testing_set_devices = [testing_set_1_devices, testing_set_2_devices, testing_set_3_devices, testing_set_4_devices, testing_set_5_devices, testing_set_6_devices, testing_set_7_devices]

assert len(training_set_devices) == len(testing_set_devices)
n_epochs = len(training_set_devices)

for i in range(n_epochs):
    print(f'Training set {i+1} devices: {training_set_devices[i]}')
    print(f'Testing set {i+1} devices: {testing_set_devices[i]}')

Build all GOPs so that cache can be cleaned

In [None]:
# for device in dataset.get_devices():
#     for video_metadata in dataset.dataset[device]:
#         video = dataset._get_video_from_metadata(video_metadata)
#         gops = video.get_gops()

#         Video.h264_extractor.clean_cache()
#         video = None
#         gops = None

### Define network parameters and functions

In [None]:
BATCH_SIZE = 72
LEARNING_RATE = 8e-6
LEARNING_RATE_DECAY_FACTOR = 0.97
VALIDATION_PERCENTAGE = 12.5 # 1/8
TEST_PERCENTAGE = 40
WARM_UP_EPOCHS = 5
START_LINEAR_LEARNING_RATE_COEFFICIENT = 1e-9 # basically zero
END_LINEAR_LEARNING_RATE_COEFFICIENT = 1


# define loss function
compute_loss = torch.nn.BCELoss()
# compute_loss = torch.nn.BCEWithLogitsLoss()

# Check if the loss function is working
print(f'Same / Same: {compute_loss(torch.tensor([1.0]), torch.tensor([1.0]))}')
print(f'Same / Different: {compute_loss(torch.tensor([1.0]), torch.tensor([0.0]))}')
print(f'Different / Different: {compute_loss(torch.tensor([0.0]), torch.tensor([0.0]))}')
print(f'Different / Same: {compute_loss(torch.tensor([0.0]), torch.tensor([1.0]))}')

# instantiate the model
net = H4vdmNet()
net = net.to(device)
  
# instantiate the optimizer
optimizer = torch.optim.Adam(net.parameters(), lr=LEARNING_RATE)
linear_scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, START_LINEAR_LEARNING_RATE_COEFFICIENT, END_LINEAR_LEARNING_RATE_COEFFICIENT, WARM_UP_EPOCHS)
exponential_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, LEARNING_RATE_DECAY_FACTOR)

def compute_similarity(gop1_features, gop2_features):
    diff = torch.subtract(gop1_features, gop2_features)
    norm = torch.norm(diff, 2)
    tanh = torch.tanh(norm)
    return (torch.ones(tanh.shape) - tanh)

def train_one_step(model, gop1, gop2, label, optimizer):
    optimizer.zero_grad()

    gop1_features = model(gop1, debug=False, device=device)
    gop2_features = model(gop2, debug=False, device=device)
    gop1_features = gop1_features.to(device)
    gop2_features = gop2_features.to(device)

    similarity = compute_similarity(gop1_features, gop2_features).double()
    similarity.to(device)
    
    label = torch.tensor(label, dtype=float, requires_grad=False, device=device)
    label = label.double()
    
    loss = compute_loss(similarity, label)
    loss = loss.to(device)
    # print(f'Iteration {i}/{len(dataset)} | \tLabel: {label} - Similarity: {similarity} - Loss: {loss}')
    
    loss.backward()
    optimizer.step()

    return loss

def train_one_epoch(model, dataset, optimizer, scheduler):
    model.train()
    total_loss = 0
    wb.log({"learning-rate": scheduler.get_last_lr()[0]})
    for i in range(len(dataset)):
        gop1, gop2, label = dataset[i]
        loss = train_one_step(model, gop1, gop2, label, optimizer)
        total_loss += loss.item()
        wb.log({"instantaneous-loss": loss.item()})
        wb.log({"total-loss": total_loss})
    
    print(f'Updating learning rate from {scheduler.get_last_lr()[0]}', end='')
    scheduler.step()
    print(f' to {scheduler.get_last_lr()[0]}')
    
    return total_loss



def test(dataset):
    pass

### Training and testing loop

In [None]:
N_GOPS_FROM_DIFFERENT_DEVICE = 5
N_GOPS_FROM_SAME_DEVICE = 5

wb.init(project='h4vdm', config={"learning-rate": LEARNING_RATE,
                                 "learning-rate-decay-factor": LEARNING_RATE_DECAY_FACTOR,
                                 "start-linear-learning-rate-coefficient": START_LINEAR_LEARNING_RATE_COEFFICIENT,
                                 "end-linear-learning-rate-coefficient": END_LINEAR_LEARNING_RATE_COEFFICIENT,
                                 "n-warmup-epochs": WARM_UP_EPOCHS,
                                 "n_epochs": n_epochs,
                                 "n-gops-from-same-device": N_GOPS_FROM_SAME_DEVICE,
                                 "n-gops-from-different-device": N_GOPS_FROM_DIFFERENT_DEVICE,
                                 "validation-percentage": VALIDATION_PERCENTAGE,
                                 "test-percentage": TEST_PERCENTAGE,
                                 "batch-size": BATCH_SIZE})

for i in range(WARM_UP_EPOCHS):
    print(f'Warmup epoch {i+1}/{WARM_UP_EPOCHS}')
    print(f'Loading training set {i+1}/{n_epochs}')
    training_set = GopPairDataset(dataset, N_GOPS_FROM_SAME_DEVICE, N_GOPS_FROM_DIFFERENT_DEVICE, consider_devices=training_set_devices[i], shuffle=True)
    print(f'Training epoch {i+1}/{n_epochs}')
    train_one_epoch(net, training_set, optimizer, linear_scheduler)
    
    
    # print(f'Loading testing set {i+1}/{n_epochs}')
    # testing_set = GopPairDataset(dataset, N_GOPS_FROM_SAME_DEVICE, N_GOPS_FROM_DIFFERENT_DEVICE, consider_devices=testing_set_devices[i], shuffle=True)
    # testing_set = testing_set[:len(training_set)*(1-TEST_PERCENTAGE/100)] # reduce size by removing TEST_PERCENTAGE % of the dataset
    # validation_set = testing_set[:floor(len(training_set)*(1-VALIDATION_PERCENTAGE/100))] # VALIDATION_PERCENTAGE % of the training set is used for validation, works because the dataset is shuffled
    # testing_set = testing_set[floor(len(training_set)*(1-VALIDATION_PERCENTAGE/100)):]
    # net.eval()
    # test(testing_set)
    print(f'Epoch {i+1}/{n_epochs} done')
    print('')

print('Done')


In [None]:
filename = os.path.join('models', datetime.today().strftime('%Y-%m-%d_%H:%M') + '_h4vdm.pth')
print(f'Saving model to {filename}')
torch.save(net, filename)

In [None]:
net = torch.load('2024-02-22_19-38_h4vdm.pth')
net.eval()

gop1, gop2, label = pair_dataset[10]
gop1_features = net(gop1, debug=False, device=device)
gop2_features = net(gop2, debug=False, device=device)

similarity = compute_similarity(gop1_features, gop2_features)

print(f'Gop1: {gop1.video_name} Gop2: {gop2.video_name} Label: {label} Similarity: {similarity}')