In [1]:
import json
import numpy as np
import cv2
import math
import glob, os, random

The purpose of this notebook is to scale the YouTube videos. The unscaled YouTube data are stored on the p3-1 instance in `/home/ec2-user/bopbot_data/yt_data.json`.

In [2]:
#Load the data. [This takes a few minutes.]
with open('yt_data.json', 'r') as fp:
    yt_data = json.load(fp)
len(yt_data)

95

In [3]:
#Convert loaded data to numpy.
for key in yt_data:
    yt_data[key] = np.array(yt_data[key])

In [4]:
yt_data['yt0000'].shape

(4352, 28)

### Clean and scale

In [5]:
def remove_zeros(sequence):
    '''Takes a 3d array of size Nx300x28 that represents a sequence of 
    300-frame clips from a single video. The last clip is padded with zeros
    to reach length 300. This function removes the zeros so the remaining 
    frames can be scaled and centered.'''
    #Reshape to 2dim array and pull out the zeros frames.
    sequence = sequence.reshape(-1,28)
    zero_frames = []
    for frame in range(sequence.shape[0]):
        if np.all(sequence[frame] == np.zeros(28)):
            zero_frames.append(frame)
    sequence = np.delete(sequence,zero_frames, axis=0)
    return sequence

In [6]:
def scale_sequence(sequence):
    '''Takes an input dance sequence as 2d numpy array of size Nx28.
    Scales the array so that all frames are within a 500x256 canvas
    when rendered. Centers the array on the canvas. Returns the
    scaled array.'''
    #Get the range of the y-axis and scale to 95% of the 256 canvas.
    sequence = sequence.reshape(-1,14,2)
    seq_min = np.array([500,256])
    seq_max = np.array([0,0])
    for frame in sequence:
        frame_min = np.min(frame, axis=0)
        frame_max = np.max(frame, axis=0)
        if frame_min[0] < seq_min[0]:
            seq_min[0] = frame_min[0]
        if frame_min[1] < seq_min[1]:
            seq_min[1] = frame_min[1]
        if frame_max[0] > seq_max[0]:
            seq_max[0] = frame_max[0]
        if frame_max[1] > seq_max[1]:
            seq_max[1] = frame_max[1]
    y_range = seq_max[1]-seq_min[1]
    x_range = seq_max[0]-seq_min[0]
    #Scale the y values to expand/contract to 95% of the canvas.
    #Get a random draw for this video from 95+/-1
    draw = random.uniform(0.94,0.96)
    N = 256*draw/y_range
    sequence = sequence*N

    #Find the center point of the new x-range.
    x_center = (x_range*N)/2
    #Shift the x values left/right so center of range falls at center of canvas (250).
    shift_x = 250-(x_center+(seq_min[0]*N))
    #Pull out the x-axis and y-axis values separately.
    x_vals = sequence[:,:,0]
    y_vals = sequence[:,:,1]
    #Shift the x values.
    shifted_x_vals = x_vals+shift_x
    #Put the shifted x values back into the array.
    sequence = np.dstack([shifted_x_vals,y_vals]) 

    #Determine whether any of the scaled/shifted x values are out of range.
    new_min = np.array([500,256])
    new_max = np.array([0,0])
    for frame in sequence:
        frame_min = np.min(frame, axis=0)
        frame_max = np.max(frame, axis=0)
        if frame_min[0] < new_min[0]:
            new_min[0] = frame_min[0]
        if frame_min[1] < new_min[1]:
            new_min[1] = frame_min[1]
        if frame_max[0] > new_max[0]:
            new_max[0] = frame_max[0]
        if frame_max[1] > new_max[1]:
            new_max[1] = frame_max[1]

    #Scale a second time if any x values are out of range.
    M=1
    if ((new_min[0] < 0) or (new_max[0] > 500)):
        #Calculate the scaling factor needed to get within range.
        if 0-new_min[0] > new_max[0]-500:
            M = (500*draw)/(500-new_min[0])
        else:
            M = (500*draw)/(new_max[0])
        sequence = sequence*M
    #Find the center point of the new y-range.
    y_center = (y_range*N*M)/2
    #Shift the y values up/down so center of range falls at center of canvas (128).
    #(This needs to happen after the second scaling.)
    shift_y = 128-(y_center+(new_min[1]*M))
    #Pull out the x-axis and y-axis values separately.
    x_vals = sequence[:,:,0]
    y_vals = sequence[:,:,1]
    #Shift the y values.
    shifted_y_vals = y_vals+shift_y
    #Put the shifted axes back together.
    sequence = np.dstack([x_vals,shifted_y_vals]) 

    return sequence

In [8]:
new_yt_dict = {}
for key in yt_data:
    new_seq = remove_zeros(yt_data[key])
    new_yt_dict[key] = scale_sequence(new_seq).tolist()

MemoryError: Unable to allocate 52.9 MiB for an array with shape (495157, 14) and data type float64

### Save new data

Save the cleaned and scaled data.

In [None]:
with open('../bopbot_data/yt_scaled.json', 'w') as fp:
    json.dump(new_yt_dict, fp)

### Viz

Visualize the out of range sequences.

In [13]:
for key in out_of_range_dict:
    video = d2m_dict_data[key]
    for i in range(video.shape[0]):
        frame_num = f'{i:05d}'
        img = vis_single(video[i], f'rendered/outfile_{frame_num}.jpg')
    images = glob.glob("rendered/*.jpg")
    make_video(images, outvid = f'rendered/{key}.avi',fps=20)    

In [8]:
# Function from D2M takes a numpy array, "pose" of the 14 D2M keypoints,
# saves a rendered figure as "outfile".
def vis_single(pose, outfile):
  colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
          [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
          [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]

  # find connection in the specified sequence, center 29 is in the position 15
  limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \
           [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \
           [1,16], [16,18], [3,17], [6,18]]

  neglect = [14,15,16,17]

  for t in range(1):
    #break
    canvas = np.ones((256,500,3), np.uint8)*255
    canvas[0] = 1
    canvas[255] = 1
    for i in range(256):
        canvas[i][0] = 1
        canvas[i][499] = 1

    thisPeak = pose
    for i in range(18):
      if i in neglect:
        continue
      if thisPeak[i,0] == -1:
        continue
      cv2.circle(canvas, tuple(thisPeak[i,0:2].astype(int)), 4, colors[i], thickness=-1)

    for i in range(17):
      limbid = np.array(limbSeq[i])-1
      if limbid[0] in neglect or limbid[1] in neglect:
        continue
      X = thisPeak[[limbid[0],limbid[1]], 1]
      Y = thisPeak[[limbid[0],limbid[1]], 0]
      if X[0] == -1 or Y[0]==-1 or X[1]==-1 or Y[1]==-1:
        continue
      stickwidth = 4
      cur_canvas = canvas.copy()
      mX = np.mean(X)
      mY = np.mean(Y)
      length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
      angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
      polygon = cv2.ellipse2Poly((int(mY),int(mX)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
      cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
      canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
    cv2.imwrite(outfile,canvas)
    return canvas


In [11]:
# Taken from Stephanie's notebook.
def make_video(images, outvid=None, fps=5, size=None,
               is_color=True, format='MP42'):
    """
    Create a video from a list of images.
 
    @param      outvid      output video
    @param      images      list of images to use in the video
    @param      fps         frame per second
    @param      size        size of each frame
    @param      is_color    color
    @param      format      see http://www.fourcc.org/codecs.php
    @return                 see http://opencv-python-tutroals.readthedocs.org/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html
 
    The function relies on http://opencv-python-tutroals.readthedocs.org/en/latest/.
    By default, the video will have the size of the first image.
    It will resize every image to this size before adding them to the video.
    MODIFIED FROM: http://www.xavierdupre.fr/blog/2016-03-30_nojs.html
    """
    from cv2 import VideoWriter, VideoWriter_fourcc, imread, resize
    fourcc = VideoWriter_fourcc(*format)
    vid = None
    for image in images:
        #print(image)
        if not os.path.exists(image):
            raise FileNotFoundError(image)
        img = imread(image)
        if vid is None:
            if size is None:
                size = img.shape[1], img.shape[0]
            vid = VideoWriter(outvid, fourcc, float(fps), size, is_color)
        if size[0] != img.shape[1] and size[1] != img.shape[0]:
            img = resize(img, size)
        vid.write(img)
        os.remove(image)
    vid.release()
    return vid