# Touring the latent space, but smoother

Yeah so this needs to be smoother. Had previous experiments where the weights were perturbed in one direction, which produces the intended smooth transition effect. The key here is that the direction of travel needs to be fixed for a time duration so that it is sufficiently smooth.

In [1]:
import torch.nn as nn
import torch
from PIL import Image
import cv2
from tqdm import tqdm

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline

## Travelling from point to point

An idea comes to mind. Starting from a point in the latent space, bounded at min and max values, a random point in the latent space is picked, and a path is generated for the next N steps required to move in that direction until the point is arrived at, of which then a new point can be picked.

In [6]:
# Conceptually we start with two points, start and end
start = np.random.uniform(low=-1, high=1, size=(2,))
end = np.random.uniform(low=-1, high=1, size=(2,))

In [7]:
start, end

(array([0.41542216, 0.98174289]), array([-0.0688291 ,  0.29505787]))

In [9]:
# Calculate distance because we want to determine steps and direction
dist = np.linalg.norm(end - start)

In [19]:
# Divide by stepsize to find number of steps to move
# Doesn't need to be exact, just need the actual stepsize to be somewhat close
# Which is why we're not bothering with traversing exact distances
stepsize = 0.01
stepcount = int(dist / stepsize)

In [20]:
# And then we pass to np.linspace to generate the steps
# If np.arange supports array-like input, we don't need to bother w/
# distance to get consistent step sizes
steps = np.linspace(start, end, stepcount)

In [21]:
steps.shape

(84, 2)

In [54]:
def explore_ls(
    dims=3, 
    init_coord=np.array([0, 0, 0]), 
    iterations=1000, 
    stepsize=0.01, 
    min_coord=np.array([-1, -1, -1]), 
    max_coord=np.array([1, 1, 1]), 
):
    """
    Explore latent space by travelling to randomly selected point.
    
    Input
    -----
    dims: int
    init_coord: iterable
    iterations: int
    stepsize: int
    min_coord: iterable
    max_coord: iterable
    
    Output
    ------
    pos_arr: np.ndarray
    path_lenghts: List[Int]
    """
    # Check to ensure all inputs have the correct dimensions
    assert len(init_coord) == dims
    assert len(min_coord) == dims
    assert len(max_coord) == dims
    
    # Transform all to numpy array
    if not isinstance(init_coord, np.ndarray):
        init_coord = np.array(init_coord)
        
    if not isinstance(min_coord, np.ndarray):
        min_coord = np.array(min_coord)
        
    if not isinstance(max_coord, np.ndarray):
        max_coord = np.array(max_coord)
    
    # Initialize list to store positions
    pos_list = []
    current_iterations = 0
    path_lengths = []
    current_point = init_coord
    
    # Iterate to find the probability thresholds
    while current_iterations < iterations:
        
        # Find next point
        next_point = np.random.uniform(low=min_coord, high=max_coord, size=(dims,))
        
        # Calculate distance because we want to determine steps and direction
        dist = np.linalg.norm(next_point - current_point)
        stepcount = int(dist / stepsize)
        steps = np.linspace(current_point, next_point, stepcount)
        
        # Update
        current_iterations += len(steps)
        path_lengths.append(len(steps))
        current_point = next_point
        pos_list.append(steps)
        
    # Convert to array
    pos_arr = np.concatenate(pos_list, axis=0)
    
    return pos_arr[:iterations], path_lengths

In [56]:
pos, path_lengths = explore_ls(dims=3, init_coord=[0, 0, 0], iterations=10000, 
              stepsize=0.1, min_coord=[-1, -1, -1], max_coord=[1, 1, 1])

## Code from previous notebook with added polish

In [44]:
class Net(nn.Module):
    """
    Describes a generative CPPN that takes `x`, `y`, and optionally distance to origin as input,
    and outputs 3-channel / 1-channel pixel intensity.
    """

    def __init__(
        self,
        num_hidden_layers=4,
        num_neurons=8,
        latent_len=3,
        include_bias=True,
        include_dist_to_origin=True,
        rgb=True,
    ):
        """
        Initializes the CPPN.

        Inputs
        ------
        num_hidden_layers: int
            Number of hidden layers in the network.

        num_neurons: int
            Number of neurons in each hidden layer.

        latent_len: int
            Length of latent vector

        include_bias: bool
            If True, includes bias term in input layer.

        include_dist_to_origin: bool
            If True, includes distance to origin as one of the inputs.

        rgb: bool
            If True, produces 3-channel output. Else, produces 1-channel output.

        Output
        ------
        None
        """
        super(Net, self).__init__()

        # Input layer
        if include_dist_to_origin:
            layers = [
                nn.Linear(3 + latent_len, num_neurons, bias=include_bias),
                nn.Tanh(),
            ]
        else:
            layers = [
                nn.Linear(2 + latent_len, num_neurons, bias=include_bias),
                nn.Tanh(),
            ]

        # Hidden layers
        layers.extend(
            num_hidden_layers
            * [
                nn.Linear(num_neurons, num_neurons, bias=False),
                nn.Tanh(),
            ]
        )

        # Output layer
        if rgb:
            layers.extend([nn.Linear(num_neurons, 3, bias=False), nn.Sigmoid()])
        else:
            layers.extend([nn.Linear(num_neurons, 1, bias=False), nn.Sigmoid()])

        # Assign layers to self.layers
        self.layers = nn.Sequential(*layers)

        # Run weight init
        self.init_weights()

    def forward(self, loc_vec, latent_vec):
        """
        `forward` function for the generative network.

        Input
        -----
        loc_vec, latent_vec: torch.Tensor
            Location vector and latent vector.
            Location vector should have shape (N, 2) or shape (N, 3).
            Latent vector should have shape (N, `latent_len`)

        Output
        ------
        x: torch.Tensor
        """
        x = torch.cat([loc_vec, latent_vec], dim=1)
        x = self.layers(x)
        return x

    def _init_weights(self, m):
        """
        Function to apply to the generative network (literally with `Net.apply()`) to initialize
        network weights properly. Required as the default initialization is for deep learning
        training, while we're only interested in starting all layers with a normal distribution.

        Ref: https://stackoverflow.com/questions/49433936/how-to-initialize-weights-in-pytorch

        Input
        -----
        m: nn.Modules (I think)

        Output
        ------
        None
        """
        if type(m) == nn.Linear:
            nn.init.normal_(m.weight, mean=0, std=1)

    def init_weights(self):
        """
        Initializes the weights of the network.

        Input
        -----
        None

        Output
        ------
        None
        """
        self.apply(self._init_weights)

In [45]:
def create_input(img_width, img_height, include_dist_to_origin=True):
    """
    Creates the input for the generative net.

    Input
    -----
    img_width, img_height: int
    include_dist_to_origin: bool

    Output
    ------
    input_arr: np.ndarray
        Should have shape (img_width * img_height, 2)
    """
    # Create vectors of xs and ys
    xs = np.linspace(start=-1, stop=1, num=img_width)
    ys = np.linspace(start=-1, stop=1, num=img_height)

    # Use np.meshgrid to create a mesh grid
    xv, yv = np.meshgrid(xs, ys)
    input_arr = np.stack((xv, yv), axis=2)

    if include_dist_to_origin:
        dist_to_origin = np.sum(np.square(input_arr), axis=2, keepdims=True)
        input_arr = np.concatenate([input_arr, dist_to_origin], axis=2)
        input_arr = input_arr.reshape(img_width * img_height, 3)
    else:
        input_arr = input_arr.reshape(img_width * img_height, 2)

    return input_arr


In [46]:
def generate_one_art(
    net, latent_vec, input_config={"img_width": 320, "img_height": 320}
):
    """
    Wrapper function to generate a single image output from the given network.

    Input
    -----
    net: Net
    latent_vec: torch.Tensor
    input_config: dict
        Dict of parameters to be passed to `create_input` as kwargs.

    Output
    ------
    net_output: np.ndarray
        Should have shape (y, x, 3) or (y, x, 1)
    """
    # Create input to net, and convert from ndarray to torch.FloatTensor
    net_input = torch.tensor(create_input(**input_config)).float()

    # Create input array from latent_vec, and convert from ndarray to torch.FloatTensor
    latent_vec = np.expand_dims(latent_vec, axis=0)
    latent_vec = np.repeat(latent_vec, repeats=net_input.shape[0], axis=0)
    latent_vec = torch.tensor(latent_vec).float()

    assert net_input.shape == latent_vec.shape

    # Run input through net
    net_output = net(net_input, latent_vec).detach().numpy()

    # Reshape into (y, x, 3) for plotting in PIL
    net_output = net_output.reshape(
        input_config["img_height"], input_config["img_width"], -1
    )

    # Re-format to color output
    # Scale to range 0 to 255, and set type to int
    net_output = (net_output * 255).astype(np.uint8)
    return net_output

## Explore latent space with random walk

In [59]:
total_seconds = 10
fps = 24
iterations = total_seconds * fps

latent_arr, path_lengths = explore_ls(
    iterations=iterations, min_coord=[-2, -2, -2], max_coord=[2, 2, 2], stepsize=0.05
)
print(path_lengths)
net = Net(num_hidden_layers=2, num_neurons=64)

imgs = []

for i in tqdm(range(iterations)):
    out = generate_one_art(net, latent_vec=latent_arr[i], input_config={"img_width": 640, "img_height": 320})
    imgs.append(out)

  0%|          | 0/240 [00:00<?, ?it/s]

[44, 63, 60, 46, 82]


100%|██████████| 240/240 [01:12<00:00,  3.32it/s]


Save to video:

In [60]:
video = cv2.VideoWriter("../output/tour-latent-space.avi", cv2.VideoWriter_fourcc(*'XVID'), 24, (640, 320))
for image in imgs:
    video.write(image)

Oh yeah this is a lot better. 

The shift in direction is a bit abrupt, probably possible to incorporate some kind of deceleration and acceleration while transitioning between points. Even better would be to fit splines, though from preliminary Googling, `scipy` has implementations only for 1D and 2D splines. Went off on a tangent reading about motion control profiles, decided to keep it simple.

In [61]:
np.linspace(0, 10)

array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
        1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
        2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
        3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
        4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
        5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
        6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
        7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
        8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
        9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])

In [68]:
np.logspace(np.log10(10), np.log10(5), 10)

array([10.        ,  9.25874712,  8.57243983,  7.93700526,  7.34867246,
        6.80395   ,  6.29960525,  5.8326452 ,  5.40029869,  5.        ])

In [86]:
def explore_ls(
    dims=3, 
    init_coord=np.array([0, 0, 0]), 
    iterations=1000, 
    stepsize=0.01, 
    min_coord=np.array([-1, -1, -1]), 
    max_coord=np.array([1, 1, 1]), 
    smooth_start_stop=True
):
    """
    Explore latent space by travelling to a randomly selected point. 
    This process is repeated until the number of steps as specified by
    `iterations` is exceeded, of which the steps are truncated and 
    returned together with the path lengths.
    
    Input
    -----
    dims: int
    init_coord: iterable
    iterations: int
    stepsize: int
    min_coord: iterable
    max_coord: iterable
    smooth_start_stop: bool
    
    Output
    ------
    pos_arr: np.ndarray
    path_lengths: List[Int]
    """
    # Check to ensure all inputs have the correct dimensions
    assert len(init_coord) == dims
    assert len(min_coord) == dims
    assert len(max_coord) == dims
    
    # Transform all to numpy array
    if not isinstance(init_coord, np.ndarray):
        init_coord = np.array(init_coord)
        
    if not isinstance(min_coord, np.ndarray):
        min_coord = np.array(min_coord)
        
    if not isinstance(max_coord, np.ndarray):
        max_coord = np.array(max_coord)
    
    # Initialize list to store positions
    pos_list = []
    current_iterations = 0
    path_lengths = []
    current_point = init_coord
    
    # Iterate to find the probability thresholds
    while current_iterations < iterations:
        
        # Find next point
        next_point = np.random.uniform(low=min_coord, high=max_coord, size=(dims,))
        
        # Calculate distance because we want to determine steps and direction
        dist = np.linalg.norm(next_point - current_point)
        stepcount = int(dist / stepsize)
        steps = np.linspace(current_point, next_point, stepcount)
        
        # Smooth decel and accel
        if smooth_start_stop is True:
            # Decel final 20% of path
            # 80% to 90% runs at 1.5x steps
            # 90% to 100% runs at 3x steps
            decel_point1 = int(stepcount * 0.8)
            decel_point2 = int(stepcount * 0.9)
            decel_path1 = np.linspace(
                steps[decel_point1],
                steps[decel_point2], 
                int(1.5 * (decel_point2 - decel_point1))
            )
            decel_path2 = np.linspace(
                steps[decel_point2],
                steps[-1], 
                int(3 * (stepcount - decel_point2))
            )

            # Accel first 10% of path
            # 0% to 10% stretched into 3x steps
            # 10% to 20% stretched into 1.5x steps
            # Pretty much reverse of above
            accel_point1 = int(stepcount * 0.1)
            accel_point2 = int(stepcount * 0.2)
            accel_path1 = np.linspace(
                steps[0],
                steps[accel_point1], 
                int(3 * (accel_point1 - 0))
            )
            accel_path2 = np.linspace(
                steps[accel_point1],
                steps[accel_point2], 
                int(1.5 * (accel_point2 - accel_point1))
            )

            steps = np.concatenate([
                accel_path1, accel_path2,
                steps[accel_point2:decel_point1],
                decel_path1, decel_path2
            ])
            
        
        # Update
        current_iterations += len(steps)
        path_lengths.append(len(steps))
        current_point = next_point
        pos_list.append(steps)
        
    # Convert to array
    pos_arr = np.concatenate(pos_list, axis=0)
    
    return pos_arr[:iterations], path_lengths

In [88]:
total_seconds = 10
fps = 24
iterations = total_seconds * fps

latent_arr, path_lengths = explore_ls(
    iterations=iterations, min_coord=[-2, -2, -2], max_coord=[2, 2, 2], stepsize=0.03
)
print(path_lengths)
net = Net(num_hidden_layers=2, num_neurons=64)

imgs = []

for i in tqdm(range(iterations)):
    out = generate_one_art(net, latent_vec=latent_arr[i], input_config={"img_width": 640, "img_height": 320})
    imgs.append(out)

  0%|          | 0/240 [00:00<?, ?it/s]

[70, 71, 114]


100%|██████████| 240/240 [01:09<00:00,  3.48it/s]


In [89]:
video = cv2.VideoWriter("../output/tour-latent-space.avi", cv2.VideoWriter_fourcc(*'XVID'), 24, (640, 320))
for image in imgs:
    video.write(image)

Hmm this is a slight improvement, would really still prefer the path to be a smooth spline. Either way, this is good enough.