In [None]:
import os
import cv2
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from functools import partial
plt.style.use('default')

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor

from utils.dataset import VideoDataset, MyConcatDataset, VideoDatasetRNN
from utils.models import TrackNetV2MSE, TrackNetV2NLL, TrackNetV2RNN
from utils.training import train_model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

%load_ext autoreload
%autoreload 2

# Batching and collating for RNN DataLoader

In [None]:
cap = cv2.VideoCapture("../datasets/test_standard/video.mp4")
print(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()

In [None]:
sequence_length = 4

dataset_params = dict(image_size=(360, 640),
                      sequence_length=sequence_length,
                      sigma=5,
                      drop_duplicate_frames=False,
                      one_output_frame=True,
                      transform = ToTensor(),
                      target_transform = ToTensor(),
                      grayscale=False)

dataset = MyConcatDataset([VideoDatasetRNN(root="../datasets/debug/", **dataset_params)])

In [None]:
hasattr(dataset, "datasets")

In [None]:
from train_configurations.utils import collate_fn_rnn, BatchSamplerRNN

collate_fn = partial(collate_fn_rnn,
                     total_clear_probability=0,
                     clear_probability=0,
                     ground_truth_probability=0,
                     sequence_length=sequence_length)

batch_sampler = BatchSamplerRNN(data_source=dataset, batch_size=1, drop_last=True)

dataloader = DataLoader(dataset, batch_sampler=batch_sampler, collate_fn=collate_fn)

In [None]:
dataloader = DataLoader(dataset, batch_size=1)

In [None]:
import os
os.path.basename("cc/phase_3_results.pdf")

In [None]:
model = TrackNetV2RNN(sequence_length=sequence_length, one_output_frame=True)
model.load("checkpoints/tracknet_v2_rnn_360_640/phase_3_0/checkpoint_0002_best.ckpt")
#model.load("checkpoints/tracknet_v2_rnn_360_640/checkpoint_0004_best.ckpt")
model.eval()
model.to(device);

In [None]:
it = iter(dataloader)
for i in range(0):
    next(it)

for i in range(10):
    input, deleted_frames, use_gt, labels = next(it)
    # deleted_frames = torch.tensor([3])
    # print(deleted_frames)
    # print(use_gt)
    input = input.to(torch.float32)
    with torch.no_grad():
        out = model(input.to(device), deleted_frames.to(device), use_gt.to(device)).to('cpu')

In [None]:
for i in range(10):
    input, labels = next(it)
    input = input.to(torch.float32)
    with torch.no_grad():
        out = model(input.to(device)).to('cpu')

In [None]:
internal_state = model.internal_state.to('cpu')
print(internal_state.shape)
plt.imshow(internal_state[0,2])

In [None]:
w, h, dpi = 1280, 720, 100

fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(w/dpi, h/dpi), dpi=dpi)
axs = axs.ravel()

for i in range(3):
    axs[i].imshow(input[0,i], cmap='gray')

axs[3].imshow(input[0,-3:].numpy().transpose(1, 2, 0))
axs[3].imshow(out[0,0], cmap='gray', alpha=0.7)

for ax in axs:
    ax.set_axis_off()

fig.tight_layout()
plt.show()

# Visualize some activations and kernels because why not

In [None]:
model = TrackNetV2RNN(sequence_length=4)
model.load('checkpoints/tracknet_v2_rnn_360_640/phase_3_0/checkpoint_0002_best.ckpt')
model.eval()
model

In [None]:
dataset_params = dict(image_size=(360, 640),
                      sequence_length=4,
                      sigma=5,
                      drop_duplicate_frames=False,
                      transform = ToTensor(),
                      target_transform = ToTensor(),
                      grayscale=False)

dataset = VideoDatasetRNN(root="../datasets/prova/", **dataset_params)

In [None]:
counter = 0

def get_encoding_layer(desired_block=1, subblock=0):
    layers = []
    for i, block in enumerate(model.children()):
        # print(i)
        if i%2 == 1:
            layers.append(block)
        for j, block_element in enumerate(block.children()):
            #print(i, j)
            for k, layer in enumerate(block_element.children()):
                layers.append(layer)
                # print(i, j, k)
                if type(layer) is torch.nn.ReLU and i==2*desired_block and j==subblock:
                    break
            if type(layer) is torch.nn.ReLU and i==2*desired_block and j==subblock:
                break
        if type(layer) is torch.nn.ReLU and i==2*desired_block:
            break
    return layers

def compute_activations(layers, input):
    activation = input.unsqueeze(dim=0)
    with torch.no_grad():
        for l in layers:
            activation = l(activation)

    return activation.squeeze().numpy()

In [None]:
frames, labels = dataset[50]
frames = frames.to(torch.float32)

In [None]:
w, h, dpi = 300*2*16/9, 300, 100

fig, axs = plt.subplots(ncols=2, figsize=(w/dpi, h/dpi), dpi=dpi)

axs[0].imshow(frames[-3:].numpy().transpose(1, 2, 0))
axs[0].set_title("Input frame (last in sequence)")

axs[1].imshow(labels[0])
axs[1].set_title("Ground truth")

fig.tight_layout(pad=0.2)
plt.show()

In [None]:
noise_part = np.linspace(0, 1, 10)
c = []

for n in noise_part:
    with torch.no_grad():
        f = (1-n)*frames + n*torch.randn(frames.shape)
        out = model(f.unsqueeze(dim=0)).squeeze().numpy()
    c.append(out.max())
plt.plot(noise_part, c)

In [None]:
n = 0.07
with torch.no_grad():
    f = (1-n)*frames + n*torch.randn(frames.shape)
    out = model(f.unsqueeze(dim=0)).squeeze().numpy()
plt.imshow(out)
plt.colorbar()
plt.show()

In [None]:
frames[:3] = torch.zeros(3, 360, 640)

In [None]:
block = 2
subblock = 1

activations = compute_activations(get_encoding_layer(block, subblock), frames)
activations.shape

In [None]:
(dead_activations, ) = np.where(activations.max(axis=(1,2))==0)
print(f"Of {activations.shape[0]} activations, {dead_activations.size} are dead and {activations.shape[0]-dead_activations.size} are not.")

In [None]:
height_pixels = 1080
top_adjust = 1

w, h, dpi = height_pixels*16/9*top_adjust, height_pixels, 100
fig, axs = plt.subplots(nrows=8, ncols=8, figsize=(w/dpi, h/dpi), dpi=dpi)

i_0 = 0

for i, ax in enumerate(axs.ravel()):
    ax.imshow(activations[i+i_0], cmap='gray')
    # ax.set_title(i)
    ax.set_axis_off()

#fig.suptitle(f"Activations in encoding block {block}, subblock {subblock}")

fig.tight_layout(pad=0.5)
fig.subplots_adjust(top=top_adjust)

#fig.savefig(f"{block}_{subblock}.png")

plt.show()

In [None]:
model.state_dict().keys()

In [None]:
k = 4

kernels = model.state_dict()['vgg_conv1.1.0.weight'].numpy()
biases = model.state_dict()['vgg_conv1.1.0.bias'].numpy()
w, h, dpi = 800, 800, 100
fig, axs = plt.subplots(nrows=8, ncols=8, figsize=(w/dpi, h/dpi), dpi=dpi)

print(kernels.shape)
print(biases[k])

min_val = kernels[k].min()
max_val = kernels[k].max()
print(min_val, max_val)

max_val=max((max_val, -min_val))
min_val=min((-max_val, min_val))

for i, ax in enumerate(axs.ravel()):
    ax.imshow(kernels[k,i], cmap='RdBu', vmin=min_val, vmax=max_val)
    ax.set_axis_off()

#fig.suptitle(f"Kernel {k}, bias = {biases[k]:.2g}")
fig.tight_layout(pad=0.2)
plt.show()
