# Downloading and formatting data

This a guide that will download and process the data used in https://arxiv.org/abs/1607.06854
once the data is downloaded and processed there is no need to rerun the code below the import
statements in the next cell unit the next titled section.

In [None]:
# this will be useful if you need to reload any module after some changes
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from pycuda import gpuarray, compiler
from collections import OrderedDict
import requests as req
import bs4
import shutil
import os
import h5py

In [None]:
# function for downloading files from a link

def download_file(url, path=''):
    local_filename = path + url.split('/')[-1]
    r = req.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        shutil.copyfileobj(r.raw, f)

    return local_filename

In [None]:
# downloading files to the specified download path into a folder
# called PVM_zipped_set

download_path = '~/Downloads/' # press tab for autocomplete results
root = download_path + 'PVM_zipped_set/'
if root.rsplit('/')[-2] not in os.listdir(download_path):
    os.mkdir(root)

webpage = 'http://pvm.braincorporation.net/'
r = req.get(webpage)
html_doc = r.content
soup = bs4.BeautifulSoup(html_doc, 'html.parser')
    
for link in soup.find_all('a'):
    link_ext = link.get('href')
    if 'PVM_set/' in link_ext:
        print(download_file(webpage + link_ext, path=root))

In [None]:
# listing the zipped folders
dirlist = os.listdir(root)

In [None]:
# renaming some of the face and stop sets to be testing data
# you can choose other arrangements like all odd/even videos
# are part of the testing set

for zip_file in dirlist:
    if 'face' in zip_file:
        count = int(zip_file[4:6])
        if count > 10:
            try:
                os.rename(root + zip_file,
                          root + 'face_test_' + str(count) + '.zip')
            except OSError:
                pass
    elif 'stop' in zip_file:
        count = int(zip_file[4:6])
        if count > 20:
            try:
                os.rename(root + zip_file,
                          root + 'stop_test_' + str(count) + '.zip')
            except OSError:
                pass

In [None]:
# reloading the names of the zipped folders after the renaming
dirlist = os.listdir(root)

In [None]:
# unzipping and splitting training and testing data
import zipfile

test_path = download_path + 'PVM_test_set/'
train_path = download_path + 'PVM_train_set/'


if test_path.split('/')[-1] not in os.listdir(download_path):
    os.mkdir(test_path)
    
if train_path.split('/')[-1] not in os.listdir(download_path):
    os.mkdir(train_path)

for dir_ in dirlist:
    with zipfile.ZipFile(root + dir_, "r") as zip_ref:
        base_name = dir_[:-4]
        if 'test' in base_name:
            zip_ref.extractall(test_path)
        else:
            zip_ref.extractall(train_path)  

In [None]:
# using a hdf5_raw_data to resize and save frames from the multiple 
# videos into and hdf5 file format these are great for large sets of
# data and can be manipulated directly from storage
from FormattingFiles import hdf5_raw_data

train_filename = download_path + 'PVM_train_set.hdf5'
test_filename = download_path + 'PVM_test_set.hdf5'
new_size = (96, 96)
hdf5_raw_data(train_path, train_filename, new_size, img_dir='img')
hdf5_raw_data(test_path, test_filename, new_size, img_dir='img')

# Creating a PVM instance

After you have run the previous cell you should have all the files you need. You
will not need to rerun anything from above besides the first cell of import statements.

In [None]:
# pick your device the default is 0 if not specified if the next line is not commented
# os.environ['CUDA_DEVICE'] = '1' 

# autoinit automatically initializes a CUDA context
import pycuda.autoinit

from PVM_PyCUDA import OnTheFlyPVM

In [None]:
# The parameters for the PVM they are set to be the same as the paper though this PVM will not be a tracker
n_color = 3
input_edge = 6
input_size = input_edge * input_edge * n_color
hidden_size = 49
output_sizes = [0] * 6
structure = [16, 8, 4, 3, 2, 1]

edge_n_pixels = input_edge * structure[0]

In [None]:
# importing two functions for mapping and unmapping and image into a
# one dimensional array
from FormattingFiles import flatten_image, unflatten_image
# importing a function to give a connection dictionary
from RectangularGridConstructor import make_connections

In [None]:
# initialize any instance of a PVM you need to specify how it's connected
# this can be as general as you want in principle as connectivity is 
# defined in dictionary. The function make_connections is a way to 
# construct a layered hierarchy of rectangular grids with nearest neighbor lateral connections
# was done in the paper
connect_dict = make_connections(structure, input_size, hidden_size, output_sizes, context_from_top_0_0=True)

# dim is a tuple (height, width, number of colors)
dim = (edge_n_pixels, edge_n_pixels, 3)
input_shape = (input_edge, input_edge)
basic_index = np.arange(np.prod(dim)).reshape(dim)
flat_map = flatten_image(basic_index, input_shape)
rev_flat_map = unflatten_image(basic_index.flatten(), dim, input_shape)

In [None]:
# you don't need to rerun stuff after it's been saved
download_path = '/media/sdb/'#'~/Downloads/' # press tab for autocomplete results
train_filename = download_path + 'PVM_train_set.hdf5'
test_filename = download_path + 'PVM_test_set.hdf5'

train_data = h5py.File(train_filename, 'r')
test_data_reformat_nontracker =  h5py.File(test_filename, 'r')

In [None]:
pvm = OnTheFlyPVM(connect_dict, flat_map, norm=255.)

In [None]:
fname = '/path/to/files' # choose the path and filename

In [None]:
# run this if model has already been trained and parameters have been trained
pvm.load_parameters(fname)

In [None]:
learning_rate_list = [0.01] * 1000000 
# You may want to change the length of this if you want to change the amount of time trained
# If I remember correctly 5 million frames at a training rate of 0.01 gives good results 
# comparable to the original paper

# this will train the model with the training schedule specified by learning_rate_list
# on the training data in train_data, print_every says how often it will print the results
# of training, save_every_print set to True will save the model parameters and a plot of
# the MSE averaged over the number of frames given in interval, the parameters, plot and 
# connections will be saved in three different files with the name given in filename
pvm.train(train_data, learning_rate_list,
          print_every=100000, save_every_print=True, 
          filename=fname, interval=100000)

# Animating PVM predictions and errors

In [None]:
import matplotlib.pyplot as plt
from matplotlib import animation
%matplotlib tk

fig = plt.figure(figsize=(15, 5))
ax1 = fig.add_subplot(131)
ax2 = fig.add_subplot(132)
ax3 = fig.add_subplot(133)


unflattened_idx_array = rev_flat_map # not a copy
L_y = edge_n_pixels
L_x = edge_n_pixels

def gen_func():
    global test_data_reformat_nontracker, pvm
    for key, rescale_arr in test_data_reformat_nontracker.items():
        n_frame, height, width, n_colors = rescale_arr.shape
        
        pvm.reset_state()
        for i in range(n_frame):
            image = rescale_arr[i, ...]
            pvm.forward(image)
            yield image, pvm.pred[:pvm.L_input].get(),\
                pvm.err[:pvm.L_input].get()
                
def update(vals):
    global L_y, L_x
    image, pred, err = vals
    reordered_err = err[unflattened_idx_array]
    mag_err = abs(reordered_err - 0.5)
    
    im1 = ax1.imshow(image, animated=True)
        
    im2 = ax2.imshow(pred[unflattened_idx_array], animated=True)

    im3 = ax3.imshow(mag_err, animated=True)
    
    return im1, im2, im3

vals = next(gen_func())
image, pred, err = vals
reordered_err = err[unflattened_idx_array]
mag_err = abs(reordered_err - 0.5)

im1 = ax1.imshow(image, animated=True)

im2 = ax2.imshow(pred[unflattened_idx_array], animated=True)

im3 = ax3.imshow(mag_err, animated=True)
ani = animation.FuncAnimation(fig, update, frames=gen_func,
                              interval=5, blit=True, save_count=30*(10*60))

# This takes too long so avoid saving video if you don't have to
# ani.save('SaveVideo.mp4',
#          writer='ffmpeg', fps=30, bitrate=-1,
#          extra_args=['-vcodec', 'libx264'])

plt.show()

In [None]:
# A relatively new method for quickly visualization
pvm.quick_animate(test_data_reformat_nontracker, scale=5)