In [3]:
import torch
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from skimage.draw import random_shapes
import cv2
import pandas as pd
import random

In [19]:
numTransforms = 15
trainSteps = 1000
batchSize = 30 #training 
sparsePen = 1
derivPen = .5
epsilon = .0001 #variance of normal density in the smoothed bump function. since argument is r^2, want (.01)^2
eta = .01 #learning rate of phi, the dictionary of transforms
eta2 = .05 #learning rate of beta, the transform coeff
eta3 = .1 #Learning rate of center
eta4 = .1 #Learning rate of radii
numInference = 50 #arbitrary
betaStep = 15 #FOR TRAINED TRANSFORMS. mini cycle for beta
attStep = 10 #FOR TRAINED TRANSFORMS. mini cycle for center/radii

In [1]:
#Rationale: since movies of multiple objects, with overlap, will just be sum of two images with values capped at 255,
#want transforms to reflect this ceiling like behavior or else prediction might be bad. 
#Will use a smooth squashing function, tanh.

def squash(x):
    y = 1.3*np.tanh(x)
    return(y)

def squash_deriv(x):
    y = 1.3*(1-np.tanh(x)**2)
    return(y)

In [3]:
#Training algorithm, image sequences with one shape. No need to infer center/radii yet.
def timeDiff(beta):
    #assuming beta has shape bkt, where k for trans, t for length, B for number of images (usually batch size)
    
    Zeroes = np.zeros((beta.shape[0],1,beta.shape[2]))
    timeDeriv = np.diff(beta, axis= 1)
    X = np.concatenate((Zeroes, timeDeriv), axis=1)
    Y = np.concatenate((timeDeriv,Zeroes), axis =1)
    Z = -1*np.sign(X)+np.sign(Y)
    
    return(Z)

def inferBeta(Input, Output, phi, sparsePen, derivPen, eta2, numInference):
    #Assume data has shape (B, T, n2), where number of images(usually batch size) , T is time, and n2 is n^2 for nxn image
    #phi is of shape (i,i',k) where i,i' denote image dim, k denotes transform, T is time, and B is batch
    #Input are first T-1 frames. Output are frames 2 to T. Infer i^th frame from previous.
    
    beta = np.zeros((Input.shape[0],Input.shape[1],phi.shape[2])) #initialize beta at 0. beta is shape btk
    
    for step in range(numInference):
        predictions = np.einsum('btk,ijk,btj -> bti', beta, phi, Input)
        dTanh = squash_deriv(predictions)
        error = Output - squash(predictions)
        dBeta = np.einsum('bti,ijk,btj, btj -> btk', error, phi, dTanh, Input) - sparsePen*np.sign(beta) - derivPen*timeDiff(beta)
        beta += eta2*dBeta
        
    return(beta)
    
def learnTransform(Input, Output, phi, beta, eta, batchSize, numInference):
    
    for step in range(numInference):
        predictions = np.einsum('btk,ijk,btj -> bti', beta, phi, Input)
        error = Output - predictions
        dPhi = np.einsum('bti, btj, btk -> ijk', error, Input, beta)
        phi += (eta/batchSize)*dPhi
    
    return(phi)
    
    

In [5]:
#Traning algo, image sequences with one shape.

def Training1(dataset, trainSteps, batchSize, sparsePen, derivPen, eta, eta2, numInference, numTransforms):
    
    #intialize phi
    dim = dataset.shape[2]
    phi = np.random.rand(dim, dim ,numTransforms)
    
    #start training
    for trial in range(trainSteps):
        
        data_batch = dataset[np.random.randint(0,dataset.shape[0],batchSize)] #create random batch
        Input = data_batch[:,:9,:] #will predict next frame from these
        Output = data_batch[:,1:10,:] #the frames to be predicted
        
        #Find beta, fix, then optimize phi
        beta = inferBeta(Input, Output, phi, sparsePen, derivPen, eta2, numInference)
        phi = learnTransform(Input, Output, phi, beta, eta, batchSize, numInference)
        
        #renormalize by capping absolute value to 1
        phi = phi/(np.abs(phi).max(axis=(0,1),keepdims=True))
        
    
    return(phi)
    

In [20]:
def normalMatrix(movie_length, dim, center, radius2, epsilon):
    normal_matrix = np.empty((center.shape[2], movie_length, dim,dim)) #shape is ktdd, where k is number of transforms, t is time, d is dim of image
    
    for k in range(center.shape[2]):
        for t in range(movie_length):
            for i in range(dim):
                for j in range(dim):
                    normal_matrix[k,t,i,j] = (1/np.sqrt(2*np.pi*epsilon))*np.exp(-(radius2[t,k]-np.linalg.norm(np.array([i,j])-center[:,t,k])**2)**2/(2*epsilon**2))
    
    return(normal_matrix)

def center_ParDeriv(movie_length, dim, center, radius2, epsilon): #calculates partial deriv for center, where each entry is 2-vector
    normal_matrix = np.empty((center.shape[2], movie_length, dim,dim)) #shape is ktdd, where k is number of transforms, t is time, d is dim of image
    
    for k in range(center.shape[2]):
        for t in range(movie_length):
            for i in range(dim):
                for j in range(dim):
                    normal_matrix[k,t,i,j] = (1/np.sqrt(2*np.pi*epsilon))*np.exp(-(radius2[t,k]-np.linalg.norm(np.array([i,j])-center[:,t,k])**2)**2/(2*epsilon**2))
    
    distance_pd = np.empty((2,center.shape[2], movie_length, dim, dim)) #shape 2ktdd
    for i in range(dim):
        for j in range(dim):
            distance_pd[:,k,t,i,j] = 2*(np.array([i,j])-center[:,t,k])
            
    centerPD = np.multiply(distance_pd, normal_matrix)
    return(centerPD)

In [16]:
def localImage(Input2, movie_length, dim, center, radius2, epsilon):
    local_matrix = np.empty((center.shape[2], movie_length, dim,dim)) #shape is ktdd, where k is number of transforms, t is time, d is dim of image
    
    for k in range(center.shape[2]):
        for t in range(movie_length):
            for i in range(dim):
                for j in range(dim):
                    local_matrix[k,t,i,j] = (np.linalg.norm(np.array([i,j])-center[:,t,k])**2)/radius2[t,k]
    local_matrix[local_matrix <= 1+ epsilon] = 1
    local_matrix[local_matrix > 1 + epsilon] = 0
    
    LocalInput2 = np.multiply(local_matrix, Input2)
    
    return(LocalInput2)

In [17]:
def InferRadius2(Input2, center, Output, radius2, phi, beta, eta3, epsilon, attStep):
    
    for step in range(attStep):
    
        localimg = localImage(Input2, movie_length, dim, center, radius2, epsilon).reshape((center.shape[2],movie_length, Input.shape[1])) #calculate localized images and reshape
        
        predictions = np.einsum('tk,ijk,ktj -> ti', beta, phi, localimg)
        error = Output - squash(predictions)
        dTanh = squash_deriv(predictions)
        ParDeriv = np.multiply(normalMatrix(movie_length, dim, center, radius2, epsilon), Input2).reshape((center.shape[2],movie_length, Input.shape[1]))
        #ParDeriv: each entry of matrix, reshaped as vector, is derivative of localized image wrt radius2
        dR = np.einsum('ti, tk, ijk, tj, ktj -> tk', error, beta, phi, dTanh, ParDeriv)
        radius2 += eta3*dR
                                                                                                                      
                                                                                                                
    return(radius2)      

In [21]:
def InferCenter(Input2, center, Output, radius2, phi, beta, eta4, epsilon):
    
    for step in range(attStep):
    
        localimg = localImage(Input2, movie_length, dim, center, radius2, epsilon).reshape((center.shape[2],movie_length, Input.shape[1])) #calculate localized images and reshape
        
        predictions = np.einsum('tk,ijk,ktj -> ti', beta, phi, localimg)
        error = Output - squash(predictions)
        dTanh = squash_deriv(predictions)
        centerPD = center_ParDeriv(movie_length, dim, center, radius2, epsilon).reshape((2,center.shape[2],movie_length, Input.shape[1]))
        #ParDeriv: each entry of matrix, reshaped as vector, is derivative of localized image wrt radius2
        dC = np.einsum('ti, tk, ijk, cktj -> ctk', error, beta, phi, centerPD)
        center += eta4*dC
                                                                                                                      
                                                                                                                
    return(center)        

In [22]:
def InferBeta2(Input2, center, Output, radius2, phi ,beta, eta, epsilon, betaStep):
    
    for step in range(betaStep):
        
        localimg = localImage(Input2, movie_length, dim, center, radius2, epsilon).reshape((center.shape[2],movie_length, Input.shape[1])) #calculate localized images and reshape
        
        predictions = np.einsum('tk,ijk,ktj -> ti', beta, phi, localimg)
        error = Output - squash(predictions)
        dTanh = squash_deriv(predictions)
        dBeta = np.einsum('ti,ijk, tj, ktj -> tk', error, phi, dTanh, localimg) - sparsePen*np.sign(np.expand_dims(beta,axis=0))[0,:,:] - derivPen*timeDiff(np.expand_dims(beta,axis=0))[0,:,:]
        #just a bit of reshaping magic above to make the functions work, since sparse/derivPen made for data with extra batch dim
        beta += eta2*dBeta
    
    return(beta)

In [2]:
def InferLocal(movie_seq, phi, sparsePen, derivPen, eta2, eta3, eta4, betaStep, attStep, numInference): 
    #now want to infer beta, center, and radii for a single movie sequence
    #***RADII ARE SQUARED, to make computing the derivative easier***
    #assume movie sequence is shape txn2, where t is length (movie_length) and n2 is product of dimensions of 2d image
    

    dim = np.sqrt(movie_seq.shape[1])
    Input = movie_seq[:(movie_seq.shape[0]-1)]
    Output = movie_seq[1:]
    Input2 = Input.reshape((Input.shape[0],dim,dim)) #just reshape to make region of interest indices more natural
    numInference2 = 10 #this is arbitrary. want to update each of beta, c, r a little
    movie_length = Input2.shape[0]
    
    beta = np.zeros((Input2.shape[0],phi.shape[2])) #initialize beta at 0. beta is shape tk. movie_seq first dim is t
    center = (dim/2)*np.ones((2,movie_length,phi.shape[2])) #shape 2tk (2 since center is 2-vector). initialize centers at center of image. each center is 2-vector
    radius2 = (17**2)*np.ones((movie_length,phi.shape[2])) # shape tk.initialize radius2 at 17^2, so start off region of interest as entire image
    
    for bigStep in range(numInference):
        beta = InferBeta2(Input2, center, Output, radius2, phi ,beta, eta, epsilon, betaStep)
        radius2 = InferRadius2(Input2, center, Output, radius2, phi, beta, eta3, epsilon, attStep)
        center = InferCenter(Input2, center, Output, radius2, phi, beta, eta4, epsilon)
    
    finalImg = localImage(Input2, movie_length, dim, center, radius2, epsilon).reshape((center.shape[2],movie_length, Input.shape[1]))
    final_predictions = np.einsum('tk,ijk,ktj -> ti', beta, phi, finalImg)
    
    fig = plt.figure(figsize = (20,6)) #plot the figures, top is ground truth, bottom is predictions
        for i in range(movie_length):
            for j in range(2):
                fig.add_subplot(2,movie_length,movie_length*j+i+1)
                if j == 0:
                    plt.imshow(Output[i].reshape((32,32)),cmap = "Greys")
                else: 
                    plt.imshow(final_predictions[i].reshape((32,32)),cmap="Greys")

    return((beta,radius2,center))
