# Import Libraries

In [1]:
import numpy as np
import cv2
import glob
import cvxpy as cp
import pickle

# Global Variables

In [2]:
DATASET_FOLDER = "../theatre_dataset/"
VIDEO_FOLDER = "braunfels_1"
ASPECT_RATIO = 0.95 # 4/3 for carol_2, 0.95 for braunfels_1

# Load files

In [3]:
file_video_input = cv2.VideoCapture(DATASET_FOLDER + VIDEO_FOLDER + "/" + VIDEO_FOLDER + ".mp4")
VIDEO_INPUT_CODEC = int(file_video_input.get(cv2.CAP_PROP_FOURCC)) # not working
VIDEO_INPUT_FRAME_SIZE = (int(file_video_input.get(cv2.CAP_PROP_FRAME_WIDTH)), int(file_video_input.get(cv2.CAP_PROP_FRAME_HEIGHT)))
VIDEO_INPUT_FPS = file_video_input.get(cv2.CAP_PROP_FPS)
VIDEO_INPUT_FRAMES_COUNT = int(file_video_input.get(cv2.CAP_PROP_FRAME_COUNT))
file_video_input.release()

In [4]:
VIDEO_INPUT_CODEC, VIDEO_INPUT_FRAME_SIZE, VIDEO_INPUT_FPS, VIDEO_INPUT_FRAMES_COUNT

(828601953, (3840, 2160), 29.97002997002997, 2278)

In [5]:
# This is the bounding box
bx1_t_orig = np.empty(shape = (VIDEO_INPUT_FRAMES_COUNT, 0), dtype = np.int16) # dimensions = frames count x no of actors
bx2_t_orig = np.empty(shape = (VIDEO_INPUT_FRAMES_COUNT, 0), dtype = np.int16) # dimensions = frames count x no of actors
by1_t_orig = np.empty(shape = (VIDEO_INPUT_FRAMES_COUNT, 0), dtype = np.int16) # dimensions = frames count x no of actors
by2_t_orig = np.empty(shape = (VIDEO_INPUT_FRAMES_COUNT, 0), dtype = np.int16) # dimensions = frames count x no of actors
for file_name in sorted(glob.glob(DATASET_FOLDER + VIDEO_FOLDER + "/tracks/*.txt")):
    # np_temp_data = np.rint(np.loadtxt(file_name, delimiter=',')).astype(np.int16) # for carol_2
    
    np_temp_data = np.rint(np.loadtxt(file_name)).astype(np.int16) # for braunfels_1
    np_temp_data = np.append(np_temp_data, np_temp_data[[-1], :], axis = 0)
    
    bx1_t_orig = np.append(bx1_t_orig, np_temp_data[:, [0]], axis = 1)
    bx2_t_orig = np.append(bx2_t_orig, np_temp_data[:, [2]], axis = 1)
    by1_t_orig = np.append(by1_t_orig, np_temp_data[:, [1]], axis = 1)
    by2_t_orig = np.append(by2_t_orig, np_temp_data[:, [3]], axis = 1)
x_t = (bx1_t_orig + bx2_t_orig)//2 # midpoint of bbox along horizontal direction # dimensions = frames count x no of actors
y_t = (by1_t_orig + by2_t_orig)//2 # midpoint of bbox along vertical direction # dimensions = frames count x no of actors
s_t = (by2_t_orig - by1_t_orig)//2 # half of the height of the bbox # dimensions = frames count x no of actors
w_t = (bx2_t_orig - bx1_t_orig)//2 # half of the width of the bbox # dimensions = frames count x no of actors

actors_left_to_right = x_t.argsort(axis = 1)

# Layout selection algorithm

In [6]:
overlap_cost_left_to_right = np.empty(shape = (VIDEO_INPUT_FRAMES_COUNT, 0)) # dimensions = frames count x no of actors - 1
for i in range(x_t.shape[1] - 1):
    temp = ASPECT_RATIO * (s_t[range(VIDEO_INPUT_FRAMES_COUNT), actors_left_to_right[:, i]] + s_t[range(VIDEO_INPUT_FRAMES_COUNT), actors_left_to_right[:, i+1]]) \
        - np.abs(x_t[range(VIDEO_INPUT_FRAMES_COUNT), actors_left_to_right[:, i]] - x_t[range(VIDEO_INPUT_FRAMES_COUNT), actors_left_to_right[:, i+1]])
    overlap_cost_left_to_right = np.append(overlap_cost_left_to_right, temp.reshape(-1, 1), axis = 1)

graph_nodes = np.empty(shape = (0, VIDEO_INPUT_FRAMES_COUNT))
graph_nodes = np.append(graph_nodes, (overlap_cost_left_to_right[:, 0] + overlap_cost_left_to_right[:, 1]).reshape(1, VIDEO_INPUT_FRAMES_COUNT), axis = 0)
graph_nodes = np.append(graph_nodes, (-overlap_cost_left_to_right[:, 0] + overlap_cost_left_to_right[:, 1]).reshape(1, VIDEO_INPUT_FRAMES_COUNT), axis = 0)
graph_nodes = np.append(graph_nodes, (overlap_cost_left_to_right[:, 0] - overlap_cost_left_to_right[:, 1]).reshape(1, VIDEO_INPUT_FRAMES_COUNT), axis = 0)
graph_nodes = np.append(graph_nodes, (-overlap_cost_left_to_right[:, 0] - overlap_cost_left_to_right[:, 1]).reshape(1, VIDEO_INPUT_FRAMES_COUNT), axis = 0)

# Forward pass
lamb = 5
graph_cost = lamb * np.array([[0, 1, 1, 2], [1, 0, np.inf, 1], [1, np.inf, 0, 1], [2, 1, 1, 0]])
graph_costs = np.zeros(shape = graph_nodes.shape + (2, )) # The 3rd dimension = minimum cost and argmin to trace back
graph_costs[:, 0, 0] = graph_nodes[:, 0]; graph_costs[:, 0, 1] = -1 * np.ones(shape = len(graph_cost))
for i in range(1, VIDEO_INPUT_FRAMES_COUNT):
    graph_costs[:, i, 1] = np.array([np.argmin(graph_costs[:, i-1, 0] + graph_cost[j, :]) for j in range(graph_costs.shape[0])])
    for j in range(graph_costs.shape[0]):
        graph_costs[j, i, 0] = graph_costs[int(graph_costs[j, i, 1]), i-1, 0] + graph_nodes[j, i] + graph_cost[j, int(graph_costs[j, i, 1])]
    # graph_costs[:, i] = np.array([np.argmin(graph_costs[:, i-1] + graph_cost[j, :]) + graph_nodes[j, i] for j in range(graph_costs.shape[0])])

# Backward pass
selected_layouts = -1 * np.ones(shape=VIDEO_INPUT_FRAMES_COUNT).astype(np.int8)
selected_layouts[-1] = graph_costs[:, -1, 0].argmin()
for i in range(VIDEO_INPUT_FRAMES_COUNT-2, -1, -1):
    selected_layouts[i] = graph_costs[selected_layouts[i+1], i+1, 1]
with open(DATASET_FOLDER + VIDEO_FOLDER + "/selected_layouts.pkl", "wb") as f:
    pickle.dump(selected_layouts, f)
selected_layouts

array([0, 0, 0, ..., 0, 0, 0], dtype=int8)

# Layout transition

In [7]:
layout_transition_2_to_3, layout_transition_1_to_3, layout_transition_3_to_2, layout_transition_3_to_1, layout_transition_1_to_2, layout_transition_2_to_1 = [], [], [], [], [], []
temp_previous_element = selected_layouts[0]
for i in range(1, VIDEO_INPUT_FRAMES_COUNT):
    if selected_layouts[i] != temp_previous_element:
        if temp_previous_element == 0:
            if selected_layouts[i] in [1, 2]: layout_transition_1_to_2.append(i-1)
            elif selected_layouts[i] == 3: layout_transition_1_to_3.append(i-1)
            else: raise Exception("Error: Invalid layout")
        elif temp_previous_element in [1, 2]:
            if selected_layouts[i] == 0: layout_transition_2_to_1.append(i-1)
            elif selected_layouts[i] == 3: layout_transition_2_to_3.append(i-1)
            else: raise Exception("Error: Invalid layout")
        elif temp_previous_element == 3:
            if selected_layouts[i] == 0: layout_transition_3_to_1.append(i-1)
            elif selected_layouts[i] in [1, 2]: layout_transition_3_to_2.append(i-1)
            else: raise Exception("Error: Invalid layout")
        else: raise Exception("Error: Invalid layout")
        temp_previous_element = selected_layouts[i]

# Split screen optimization

In [8]:
virtual_camera_positions = [[] for i in range(VIDEO_INPUT_FRAMES_COUNT)] # index = frame number, value = list of views (fx_t, fy_t, fs_t)

## Checking the views of order == 3

In [9]:
d_epsilon = cp.Constant(0)
m1_epsilon = cp.Constant(0)
m2_epsilon = cp.Constant(0)
m3_epsilon = cp.Constant(0)
cvxpy_variables = dict() # key = frame number, value = [cvxpy variables fx_t, fy_t, fs_t]
list_constraints = []
for i in range(VIDEO_INPUT_FRAMES_COUNT):
    if selected_layouts[i] == 3:
        temp_x_t = x_t[i, :].mean()
        temp_y_t = y_t[i, :].mean()
        temp_s_t = (by2_t_orig[i, :].max() - by1_t_orig[i, :].min()) // 2
        cvxpy_variables[i] = cp.Variable(shape = 3) # fx_t, fy_t, fs_t
        d_epsilon += cp.square(cvxpy_variables[i][0] - temp_x_t) + cp.square(cvxpy_variables[i][1] - temp_y_t) + cp.square(cvxpy_variables[i][2] - temp_s_t)
        
        temp_actors_top_to_bottom = y_t[i, :].argsort()
        list_constraints.extend([
            1 <= cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2] <= x_t[i, actors_left_to_right[i, 0]] - w_t[i, actors_left_to_right[i, 0]],

            x_t[i, actors_left_to_right[i, -1]] + w_t[i, actors_left_to_right[i, -1]] <= cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[0] - 1,

            1 <= cvxpy_variables[i][1] - cvxpy_variables[i][2],
            cvxpy_variables[i][1] - cvxpy_variables[i][2] <= y_t[i, temp_actors_top_to_bottom[0]] - s_t[i, temp_actors_top_to_bottom[0]],

            y_t[i, temp_actors_top_to_bottom[-1]] + s_t[i, temp_actors_top_to_bottom[-1]] <= cvxpy_variables[i][1] + cvxpy_variables[i][2],
            cvxpy_variables[i][1] + cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[1] - 1
        ])

        if i-1 >= 0 and selected_layouts[i-1] == 3:
            m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i] - cvxpy_variables[i-1]))
        
        if i-2 >= 0 and selected_layouts[i-1] == 3 and selected_layouts[i-2] == 3:
            m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                    cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])
        
        if i-3 >= 0 and selected_layouts[i-1] == 3 and selected_layouts[i-2] == 3 and selected_layouts[i-3] == 3:
            m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                    cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])

if len(list_constraints) != 0:
    opt_problem = cp.Problem(cp.Minimize(d_epsilon + 10*m1_epsilon + 10*m2_epsilon + 10*m3_epsilon), list_constraints)
    opt_problem.solve(max_iter=50000)
    for index, variables in cvxpy_variables.items():
        virtual_camera_positions[index].append(variables.value)

## Checking the views of order == 2

In [10]:
list_constraints = []
d_epsilon = cp.Constant(0)
m1_epsilon = cp.Constant(0)
m2_epsilon = cp.Constant(0)
m3_epsilon = cp.Constant(0)
cvxpy_variables = dict() # key = frame number, value = [cvxpy variables fx_t, fy_t, fs_t]
for i in range(VIDEO_INPUT_FRAMES_COUNT):
    if selected_layouts[i] == 2:
        temp_x_t = x_t[i, actors_left_to_right[i, 1:]].mean()
        temp_y_t = y_t[i, actors_left_to_right[i, 1:]].mean()
        temp_s_t = (by2_t_orig[i, actors_left_to_right[i, 1:]].max() - by1_t_orig[i, actors_left_to_right[i, 1:]].min()) // 2
        cvxpy_variables[i] = cp.Variable(shape = 3) # fx_t, fy_t, fs_t
        d_epsilon += cp.square(cvxpy_variables[i][0] - temp_x_t) + cp.square(cvxpy_variables[i][1] - temp_y_t) + cp.square(cvxpy_variables[i][2] - temp_s_t)

        temp_actors_top_to_bottom = y_t[i, actors_left_to_right[i, 1:]].argsort()
        list_constraints.extend([
            1 <= cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2] <= x_t[i, actors_left_to_right[i, 1]] - w_t[i, actors_left_to_right[i, 1]],

            x_t[i, actors_left_to_right[i, -1]] + w_t[i, actors_left_to_right[i, -1]] <= cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[0] - 1,

            1 <= cvxpy_variables[i][1] - cvxpy_variables[i][2],
            cvxpy_variables[i][1] - cvxpy_variables[i][2] <= y_t[i, actors_left_to_right[i, 1:]][temp_actors_top_to_bottom[0]] - s_t[i, actors_left_to_right[i, 1:]][temp_actors_top_to_bottom[0]],

            y_t[i, actors_left_to_right[i, 1:]][temp_actors_top_to_bottom[-1]] + s_t[i, actors_left_to_right[i, 1:]][temp_actors_top_to_bottom[-1]] <= cvxpy_variables[i][1]+cvxpy_variables[i][2],
            cvxpy_variables[i][1] + cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[1] - 1
        ])

        if i-1 >= 0 and selected_layouts[i-1] == 2:
            m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i] - cvxpy_variables[i-1]))

        if i-2 >= 0 and selected_layouts[i-1] == 2 and selected_layouts[i-2] == 2:
            m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                    cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])

        if i-3 >= 0 and selected_layouts[i-1] == 2 and selected_layouts[i-2] == 2 and selected_layouts[i-3] == 2:
            m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                    cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])

    elif selected_layouts[i] == 1:
        temp_x_t = x_t[i, actors_left_to_right[i, :-1]].mean()
        temp_y_t = y_t[i, actors_left_to_right[i, :-1]].mean()
        temp_s_t = (by2_t_orig[i, actors_left_to_right[i, :-1]].max() - by1_t_orig[i, actors_left_to_right[i, :-1]].min()) // 2
        cvxpy_variables[i] = cp.Variable(shape = 3) # fx_t, fy_t, fs_t
        d_epsilon += cp.square(cvxpy_variables[i][0] - temp_x_t) + cp.square(cvxpy_variables[i][1] - temp_y_t) + cp.square(cvxpy_variables[i][2] - temp_s_t)

        temp_actors_top_to_bottom = y_t[i, :-1].argsort()
        list_constraints.extend([
            1 <= cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2] <= x_t[i, actors_left_to_right[i, 0]] - w_t[i, actors_left_to_right[i, 0]],

            x_t[i, actors_left_to_right[i, 1]] + w_t[i, actors_left_to_right[i, 1]] <= cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[0] - 1,

            1 <= cvxpy_variables[i][1] - cvxpy_variables[i][2],
            cvxpy_variables[i][1] - cvxpy_variables[i][2] <= y_t[i, actors_left_to_right[i,:-1]][temp_actors_top_to_bottom[0]] - s_t[i, actors_left_to_right[i,:-1]][temp_actors_top_to_bottom[0]],

            y_t[i, actors_left_to_right[i,:-1]][temp_actors_top_to_bottom[-1]] + s_t[i, actors_left_to_right[i,:-1]][temp_actors_top_to_bottom[-1]] <= cvxpy_variables[i][1]+cvxpy_variables[i][2],
            cvxpy_variables[i][1] + cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[1] - 1
        ])

        if i-1 >= 0 and selected_layouts[i-1] == 1:
            m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i] - cvxpy_variables[i-1]))

        if i-2 >= 0 and selected_layouts[i-1] == 1 and selected_layouts[i-2] == 1:
            m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                    cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])

        if i-3 >= 0 and selected_layouts[i-1] == 1 and selected_layouts[i-2] == 1 and selected_layouts[i-3] == 1:
            m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                    cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])

In [11]:
for i in layout_transition_2_to_3:
    if selected_layouts[i] == 1:
        list_constraints.extend([
            cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][0],
            cvxpy_variables[i][1] == virtual_camera_positions[i+1][0][1], cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][2]
        ])
    elif selected_layouts[i] == 2:
        list_constraints.extend([
            cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][0],
            cvxpy_variables[i][1] == virtual_camera_positions[i+1][0][1], cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][2]
        ])
    else: raise Exception("Error: Invalid layout")
for i in layout_transition_3_to_2:
    if selected_layouts[i+1] == 1:
        list_constraints.extend([
            cvxpy_variables[i+1][0] + ASPECT_RATIO * cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][0],
            cvxpy_variables[i+1][1] == virtual_camera_positions[i][0][1], cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][2]
        ])
    elif selected_layouts[i+1] == 2:
        list_constraints.extend([
            cvxpy_variables[i+1][0] - ASPECT_RATIO * cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][0],
            cvxpy_variables[i+1][1] == virtual_camera_positions[i][0][1], cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][2]
        ])
    else: raise Exception("Error: Invalid layout")

In [12]:
if len(list_constraints) != 0:
    opt_problem = cp.Problem(cp.Minimize(d_epsilon + 10*m1_epsilon + 10*m2_epsilon + 10*m3_epsilon), list_constraints)
    opt_problem.solve(max_iter=50000)
    for index, variables in cvxpy_variables.items():
        virtual_camera_positions[index].append(variables.value)

## Checking the views of order == 1

In [13]:
list_constraints = []
d_epsilon = cp.Constant(0)
m1_epsilon = cp.Constant(0)
m2_epsilon = cp.Constant(0)
m3_epsilon = cp.Constant(0)
cvxpy_variables = dict() # key = frame number, value = [cvxpy variables fx_t, fy_t, fs_t]
for i in range(VIDEO_INPUT_FRAMES_COUNT):
    if selected_layouts[i] == 0:
        temp_x_t_1, temp_x_t_2, temp_x_t_3 = x_t[i, actors_left_to_right[i, 0]], x_t[i, actors_left_to_right[i, 1]], x_t[i, actors_left_to_right[i, 2]]
        temp_y_t_1, temp_y_t_2, temp_y_t_3 = y_t[i, actors_left_to_right[i, 0]], y_t[i, actors_left_to_right[i, 1]], y_t[i, actors_left_to_right[i, 2]]
        temp_s_t = (by2_t_orig[i, actors_left_to_right[i]] - by1_t_orig[i, actors_left_to_right[i]]) // 2
        cvxpy_variables[i] = cp.Variable(shape = 9) # fx_t1, fy_t1, fs_t1 (left), fx_t2, fy_t2, fs_t2 (middle), fx_t3, fy_t3, fs_t3 (right)
        d_epsilon += cp.square(cvxpy_variables[i][0] - temp_x_t_1) + cp.square(cvxpy_variables[i][1] - temp_y_t_1) + cp.square(cvxpy_variables[i][2] - temp_s_t[0])
        d_epsilon += cp.square(cvxpy_variables[i][3] - temp_x_t_2) + cp.square(cvxpy_variables[i][4] - temp_y_t_2) + cp.square(cvxpy_variables[i][5] - temp_s_t[1])
        d_epsilon += cp.square(cvxpy_variables[i][6] - temp_x_t_3) + cp.square(cvxpy_variables[i][7] - temp_y_t_3) + cp.square(cvxpy_variables[i][8] - temp_s_t[2])

        list_constraints.extend([
            1 <= cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2] <= x_t[i, actors_left_to_right[i, 0]] - w_t[i, actors_left_to_right[i, 0]],

            x_t[i, actors_left_to_right[i, 0]] + w_t[i, actors_left_to_right[i, 0]] <= cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[0] - 1,

            1 <= cvxpy_variables[i][1] - cvxpy_variables[i][2],
            cvxpy_variables[i][1] - cvxpy_variables[i][2] <= y_t[i, actors_left_to_right[i, 0]] - s_t[i, actors_left_to_right[i, 0]],

            y_t[i, actors_left_to_right[i, 0]] + s_t[i, actors_left_to_right[i, 0]] <= cvxpy_variables[i][1] + cvxpy_variables[i][2],
            cvxpy_variables[i][1] + cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[1] - 1
        ])
        list_constraints.extend([
            1 <= cvxpy_variables[i][3] - ASPECT_RATIO * cvxpy_variables[i][5],
            cvxpy_variables[i][3] - ASPECT_RATIO * cvxpy_variables[i][5] <= x_t[i, actors_left_to_right[i, 1]] - w_t[i, actors_left_to_right[i, 1]],

            x_t[i, actors_left_to_right[i, 1]] + w_t[i, actors_left_to_right[i, 1]] <= cvxpy_variables[i][3] + ASPECT_RATIO * cvxpy_variables[i][5],
            cvxpy_variables[i][3] + ASPECT_RATIO * cvxpy_variables[i][5] <= VIDEO_INPUT_FRAME_SIZE[0] - 1,

            1 <= cvxpy_variables[i][4] - cvxpy_variables[i][5],
            cvxpy_variables[i][4] - cvxpy_variables[i][5] <= y_t[i, actors_left_to_right[i, 1]] - s_t[i, actors_left_to_right[i, 1]],

            y_t[i, actors_left_to_right[i, 1]] + s_t[i, actors_left_to_right[i, 1]] <= cvxpy_variables[i][4] + cvxpy_variables[i][5],
            cvxpy_variables[i][4] + cvxpy_variables[i][5] <= VIDEO_INPUT_FRAME_SIZE[1] - 1
        ])
        list_constraints.extend([
            1 <= cvxpy_variables[i][6] - ASPECT_RATIO * cvxpy_variables[i][8],
            cvxpy_variables[i][6] - ASPECT_RATIO * cvxpy_variables[i][8] <= x_t[i, actors_left_to_right[i, 2]] - w_t[i, actors_left_to_right[i, 2]],

            x_t[i, actors_left_to_right[i, 2]] + w_t[i, actors_left_to_right[i, 2]] <= cvxpy_variables[i][6] + ASPECT_RATIO * cvxpy_variables[i][8],
            cvxpy_variables[i][6] + ASPECT_RATIO * cvxpy_variables[i][8] <= VIDEO_INPUT_FRAME_SIZE[0] - 1,

            1 <= cvxpy_variables[i][7] - cvxpy_variables[i][8],
            cvxpy_variables[i][7] - cvxpy_variables[i][8] <= y_t[i, actors_left_to_right[i, 2]] - s_t[i, actors_left_to_right[i, 2]],

            y_t[i, actors_left_to_right[i, 2]] + s_t[i, actors_left_to_right[i, 2]] <= cvxpy_variables[i][7] + cvxpy_variables[i][8],
            cvxpy_variables[i][7] + cvxpy_variables[i][8] <= VIDEO_INPUT_FRAME_SIZE[1] - 1
        ])

        if i-1 >= 0:
            if selected_layouts[i-1] == 0: m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i] - cvxpy_variables[i-1]))
            elif selected_layouts[i-1] == 1: m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i][-3:] - cvxpy_variables[i-1]))
            elif selected_layouts[i-1] == 2: m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i][:3] - cvxpy_variables[i-1]))

        if i-2 >= 0:
            if selected_layouts[i-1] == 0 and selected_layouts[i-2] == 0:
                m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])
                m2_epsilon += cp.abs(cvxpy_variables[i][3] - 2*cvxpy_variables[i-1][3] + cvxpy_variables[i-2][3]) + \
                    cp.abs(cvxpy_variables[i][4] - 2*cvxpy_variables[i-1][4] + cvxpy_variables[i-2][4]) + \
                        cp.abs(cvxpy_variables[i][5] - 2*cvxpy_variables[i-1][5] + cvxpy_variables[i-2][5])
                m2_epsilon += cp.abs(cvxpy_variables[i][6] - 2*cvxpy_variables[i-1][6] + cvxpy_variables[i-2][6]) + \
                    cp.abs(cvxpy_variables[i][7] - 2*cvxpy_variables[i-1][7] + cvxpy_variables[i-2][7]) + \
                        cp.abs(cvxpy_variables[i][8] - 2*cvxpy_variables[i-1][8] + cvxpy_variables[i-2][8])
            elif selected_layouts[i-1] == 2 and selected_layouts[i-2] == 2:
                m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])
            elif selected_layouts[i-1] == 1 and selected_layouts[i-2] == 1:
                m2_epsilon += cp.abs(cvxpy_variables[i][6] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                    cp.abs(cvxpy_variables[i][7] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                        cp.abs(cvxpy_variables[i][8] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])
            elif selected_layouts[i-1] == 0 and selected_layouts[i-2] == 1:
                m2_epsilon += cp.abs(cvxpy_variables[i][6] - 2*cvxpy_variables[i-1][6] + cvxpy_variables[i-2][0]) + \
                    cp.abs(cvxpy_variables[i][7] - 2*cvxpy_variables[i-1][7] + cvxpy_variables[i-2][1]) + \
                        cp.abs(cvxpy_variables[i][8] - 2*cvxpy_variables[i-1][8] + cvxpy_variables[i-2][2])
            elif selected_layouts[i-1] == 0 and selected_layouts[i-2] == 2:
                m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])

        if i-3 >= 0:
            if selected_layouts[i-1] == 0 and selected_layouts[i-2] == 0 and selected_layouts[i-3] == 0:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])
                m3_epsilon += cp.abs(cvxpy_variables[i][3] - 3*cvxpy_variables[i-1][3] + 3*cvxpy_variables[i-2][3] - 3*cvxpy_variables[i-3][3]) + \
                    cp.abs(cvxpy_variables[i][4] - 3*cvxpy_variables[i-1][4] + 3*cvxpy_variables[i-2][4] - 3*cvxpy_variables[i-3][4]) + \
                        cp.abs(cvxpy_variables[i][5] - 3*cvxpy_variables[i-1][5] + 3*cvxpy_variables[i-2][5] - 3*cvxpy_variables[i-3][5])
                m3_epsilon += cp.abs(cvxpy_variables[i][6] - 3*cvxpy_variables[i-1][6] + 3*cvxpy_variables[i-2][6] - 3*cvxpy_variables[i-3][6]) + \
                    cp.abs(cvxpy_variables[i][7] - 3*cvxpy_variables[i-1][7] + 3*cvxpy_variables[i-2][7] - 3*cvxpy_variables[i-3][7]) + \
                        cp.abs(cvxpy_variables[i][8] - 3*cvxpy_variables[i-1][8] + 3*cvxpy_variables[i-2][8] - 3*cvxpy_variables[i-3][8])
            elif selected_layouts[i-1] == 2 and selected_layouts[i-2] == 2 and selected_layouts[i-3] == 2:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])
            elif selected_layouts[i-1] == 1 and selected_layouts[i-2] == 1 and selected_layouts[i-3] == 1:
                m3_epsilon += cp.abs(cvxpy_variables[i][6] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][7] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][8] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])
            elif selected_layouts[i-1] == 0 and selected_layouts[i-2] == 1 and selected_layouts[i-3] == 1:
                m3_epsilon += cp.abs(cvxpy_variables[i][6] - 3*cvxpy_variables[i-1][6] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][7] - 3*cvxpy_variables[i-1][7] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][8] - 3*cvxpy_variables[i-1][8] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])
            elif selected_layouts[i-1] == 0 and selected_layouts[i-2] == 0 and selected_layouts[i-3] == 1:
                m3_epsilon += cp.abs(cvxpy_variables[i][6] - 3*cvxpy_variables[i-1][6] + 3*cvxpy_variables[i-2][6] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][7] - 3*cvxpy_variables[i-1][7] + 3*cvxpy_variables[i-2][7] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][8] - 3*cvxpy_variables[i-1][8] + 3*cvxpy_variables[i-2][8] - 3*cvxpy_variables[i-3][2])
            elif selected_layouts[i-1] == 0 and selected_layouts[i-2] == 2 and selected_layouts[i-3] == 2:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])
            elif selected_layouts[i-1] == 0 and selected_layouts[i-2] == 0 and selected_layouts[i-3] == 2:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])

    elif selected_layouts[i] == 1:
        temp_x_t = x_t[i, actors_left_to_right[i, 2]]
        temp_y_t = y_t[i, actors_left_to_right[i, 2]]
        temp_s_t = s_t[i, actors_left_to_right[i, 2]]
        cvxpy_variables[i] = cp.Variable(shape = 3) # fx_t, fy_t, fs_t
        d_epsilon += cp.square(cvxpy_variables[i][0] - temp_x_t) + cp.square(cvxpy_variables[i][1] - temp_y_t) + cp.square(cvxpy_variables[i][2] - temp_s_t)
        
        list_constraints.extend([
            1 <= cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2] <= x_t[i, actors_left_to_right[i, 2]] - w_t[i, actors_left_to_right[i, 2]],

            x_t[i, actors_left_to_right[i, 2]] + w_t[i, actors_left_to_right[i, 2]] <= cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[0] - 1,

            1 <= cvxpy_variables[i][1] - cvxpy_variables[i][2],
            cvxpy_variables[i][1] - cvxpy_variables[i][2] <= y_t[i, actors_left_to_right[i, 2]] - s_t[i, actors_left_to_right[i, 2]],

            y_t[i, actors_left_to_right[i, 2]] + s_t[i, actors_left_to_right[i, 2]] <= cvxpy_variables[i][1] + cvxpy_variables[i][2],
            cvxpy_variables[i][1] + cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[1] - 1
        ])

        if i-1 >= 0:
            if selected_layouts[i-1] == 0: m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i] - cvxpy_variables[i-1][6:]))
            elif selected_layouts[i-1] == 1: m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i] - cvxpy_variables[i-1]))
        
        if i-2 >= 0:
            if selected_layouts[i-1] == 0 and selected_layouts[i-2] == 0:
                m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][6] + cvxpy_variables[i-2][6]) + \
                    cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][7] + cvxpy_variables[i-2][7]) + \
                        cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][8] + cvxpy_variables[i-2][8])
            elif selected_layouts[i-1] == 1 and selected_layouts[i-2] == 1:
                m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])
            elif selected_layouts[i-1] == 1 and selected_layouts[i-2] == 0:
                m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][6]) + \
                    cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][7]) + \
                        cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][8])

        if i-3 >= 0:
            if selected_layouts[i-1] == 0 and selected_layouts[i-2] == 0 and selected_layouts[i-3] == 0:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][6] + 3*cvxpy_variables[i-2][6] - 3*cvxpy_variables[i-3][6]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][7] + 3*cvxpy_variables[i-2][7] - 3*cvxpy_variables[i-3][7]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][8] + 3*cvxpy_variables[i-2][8] - 3*cvxpy_variables[i-3][8])
            elif selected_layouts[i-1] == 1 and selected_layouts[i-2] == 1 and selected_layouts[i-3] == 1:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])
            elif selected_layouts[i-1] == 1 and selected_layouts[i-2] == 0 and selected_layouts[i-3] == 0:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][6] - 3*cvxpy_variables[i-3][6]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][7] - 3*cvxpy_variables[i-3][7]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][8] - 3*cvxpy_variables[i-3][8])
            elif selected_layouts[i-1] == 1 and selected_layouts[i-2] == 1 and selected_layouts[i-3] == 0:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][6]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][7]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][8])

    elif selected_layouts[i] == 2:
        temp_x_t = x_t[i, actors_left_to_right[i, 0]]
        temp_y_t = y_t[i, actors_left_to_right[i, 0]]
        temp_s_t = s_t[i, actors_left_to_right[i, 0]]
        cvxpy_variables[i] = cp.Variable(shape = 3) # fx_t, fy_t, fs_t
        d_epsilon += cp.square(cvxpy_variables[i][0] - temp_x_t) + cp.square(cvxpy_variables[i][1] - temp_y_t) + cp.square(cvxpy_variables[i][2] - temp_s_t)

        list_constraints.extend([
            1 <= cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][2] <= x_t[i, actors_left_to_right[i, 0]] - w_t[i, actors_left_to_right[i, 0]],

            x_t[i, actors_left_to_right[i, 0]] + w_t[i, actors_left_to_right[i, 0]] <= cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2],
            cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[0] - 1,

            1 <= cvxpy_variables[i][1] - cvxpy_variables[i][2],
            cvxpy_variables[i][1] - cvxpy_variables[i][2] <= y_t[i, actors_left_to_right[i, 0]] - s_t[i, actors_left_to_right[i, 0]],

            y_t[i, actors_left_to_right[i, 0]] + s_t[i, actors_left_to_right[i, 0]] <= cvxpy_variables[i][1] + cvxpy_variables[i][2],
            cvxpy_variables[i][1] + cvxpy_variables[i][2] <= VIDEO_INPUT_FRAME_SIZE[1] - 1
        ])

        if i-1 >= 0:
            if selected_layouts[i-1] == 0: m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i] - cvxpy_variables[i-1][:3]))
            elif selected_layouts[i-1] == 2: m1_epsilon += cp.sum(cp.abs(cvxpy_variables[i] - cvxpy_variables[i-1]))

        if i-2 >= 0:
            if selected_layouts[i-1] == 0 and selected_layouts[i-2] == 0:
                m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])
            elif selected_layouts[i-1] == 2 and selected_layouts[i-2] == 2:
                m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])
            elif selected_layouts[i-1] == 2 and selected_layouts[i-2] == 0:
                m2_epsilon += cp.abs(cvxpy_variables[i][0] - 2*cvxpy_variables[i-1][0] + cvxpy_variables[i-2][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 2*cvxpy_variables[i-1][1] + cvxpy_variables[i-2][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 2*cvxpy_variables[i-1][2] + cvxpy_variables[i-2][2])
        
        if i-3 >= 0:
            if selected_layouts[i-1] == 0 and selected_layouts[i-2] == 0 and selected_layouts[i-3] == 0:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])
            elif selected_layouts[i-1] == 2 and selected_layouts[i-2] == 2 and selected_layouts[i-3] == 2:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])
            elif selected_layouts[i-1] == 2 and selected_layouts[i-2] == 0 and selected_layouts[i-3] == 0:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])
            elif selected_layouts[i-1] == 2 and selected_layouts[i-2] == 2 and selected_layouts[i-3] == 0:
                m3_epsilon += cp.abs(cvxpy_variables[i][0] - 3*cvxpy_variables[i-1][0] + 3*cvxpy_variables[i-2][0] - 3*cvxpy_variables[i-3][0]) + \
                    cp.abs(cvxpy_variables[i][1] - 3*cvxpy_variables[i-1][1] + 3*cvxpy_variables[i-2][1] - 3*cvxpy_variables[i-3][1]) + \
                        cp.abs(cvxpy_variables[i][2] - 3*cvxpy_variables[i-1][2] + 3*cvxpy_variables[i-2][2] - 3*cvxpy_variables[i-3][2])

In [14]:
for i in layout_transition_1_to_2:
    if selected_layouts[i+1] == 1:
        list_constraints.extend([
            cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][0],
            cvxpy_variables[i][3] - ASPECT_RATIO * cvxpy_variables[i][5] == virtual_camera_positions[i+1][0][0],
            # cvxpy_variables[i][0] == virtual_camera_positions[i+1][0][0] - ASPECT_RATIO * virtual_camera_positions[i+1][0][2],
            # cvxpy_variables[i][3] == virtual_camera_positions[i+1][0][0] + ASPECT_RATIO * virtual_camera_positions[i+1][0][2],
            cvxpy_variables[i][1] == virtual_camera_positions[i+1][0][1], cvxpy_variables[i][4] == virtual_camera_positions[i+1][0][1],
            cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][2], cvxpy_variables[i][5] == virtual_camera_positions[i+1][0][2]
        ])
    elif selected_layouts[i+1] == 2:
        list_constraints.extend([
            cvxpy_variables[i][3] + ASPECT_RATIO * cvxpy_variables[i][5] == virtual_camera_positions[i+1][0][0],
            cvxpy_variables[i][6] - ASPECT_RATIO * cvxpy_variables[i][8] == virtual_camera_positions[i+1][0][0],
            # cvxpy_variables[i][3] == virtual_camera_positions[i+1][0][0] - ASPECT_RATIO * virtual_camera_positions[i+1][0][2],
            # cvxpy_variables[i][6] == virtual_camera_positions[i+1][0][0] + ASPECT_RATIO * virtual_camera_positions[i+1][0][2],
            cvxpy_variables[i][4] == virtual_camera_positions[i+1][0][1], cvxpy_variables[i][7] == virtual_camera_positions[i+1][0][1],
            cvxpy_variables[i][5] == virtual_camera_positions[i+1][0][2], cvxpy_variables[i][8] == virtual_camera_positions[i+1][0][2]
        ])
    else: raise Exception("Error: Invalid layout")
for i in layout_transition_1_to_3:
    list_constraints.extend([
        cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] == cvxpy_variables[i][3] - ASPECT_RATIO * cvxpy_variables[i][5],
        cvxpy_variables[i][3] + ASPECT_RATIO * cvxpy_variables[i][5] == cvxpy_variables[i][6] - ASPECT_RATIO * cvxpy_variables[i][8],
        cvxpy_variables[i][3] == virtual_camera_positions[i+1][0][0],
        cvxpy_variables[i][1] == virtual_camera_positions[i+1][0][1], cvxpy_variables[i][4] == virtual_camera_positions[i+1][0][1], cvxpy_variables[i][7] == virtual_camera_positions[i+1][0][1],
        cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][2], cvxpy_variables[i][5] == virtual_camera_positions[i+1][0][2], cvxpy_variables[i][8] == virtual_camera_positions[i+1][0][2]
    ])
for i in layout_transition_2_to_1:
    if selected_layouts[i] == 1:
        list_constraints.extend([
            cvxpy_variables[i+1][0] + ASPECT_RATIO * cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][0],
            cvxpy_variables[i+1][3] - ASPECT_RATIO * cvxpy_variables[i+1][5] == virtual_camera_positions[i][0][0],
            # cvxpy_variables[i+1][0] == virtual_camera_positions[i][0][0] - ASPECT_RATIO * virtual_camera_positions[i][0][2],
            # cvxpy_variables[i+1][3] == virtual_camera_positions[i][0][0] + ASPECT_RATIO * virtual_camera_positions[i][0][2],
            cvxpy_variables[i+1][1] == virtual_camera_positions[i][0][1], cvxpy_variables[i+1][4] == virtual_camera_positions[i][0][1],
            cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][2], cvxpy_variables[i+1][5] == virtual_camera_positions[i][0][2]
        ])
    elif selected_layouts[i] == 2:
        list_constraints.extend([
            cvxpy_variables[i+1][3] + ASPECT_RATIO * cvxpy_variables[i+1][5] == virtual_camera_positions[i][0][0],
            cvxpy_variables[i+1][6] - ASPECT_RATIO * cvxpy_variables[i+1][8] == virtual_camera_positions[i][0][0],
            # cvxpy_variables[i+1][3] == virtual_camera_positions[i][0][0] - ASPECT_RATIO * virtual_camera_positions[i][0][2],
            # cvxpy_variables[i+1][6] == virtual_camera_positions[i][0][0] + ASPECT_RATIO * virtual_camera_positions[i][0][2],
            cvxpy_variables[i+1][4] == virtual_camera_positions[i][0][1], cvxpy_variables[i+1][7] == virtual_camera_positions[i][0][1],
            cvxpy_variables[i+1][5] == virtual_camera_positions[i][0][2], cvxpy_variables[i+1][8] == virtual_camera_positions[i][0][2]
        ])
    else: raise Exception("Error: Invalid layout")
for i in layout_transition_3_to_1:
    list_constraints.extend([
        cvxpy_variables[i+1][0] + ASPECT_RATIO * cvxpy_variables[i+1][2] == cvxpy_variables[i+1][3] - ASPECT_RATIO * cvxpy_variables[i+1][5],
        cvxpy_variables[i+1][3] + ASPECT_RATIO * cvxpy_variables[i+1][5] == cvxpy_variables[i+1][6] - ASPECT_RATIO * cvxpy_variables[i+1][8],
        cvxpy_variables[i+1][3] == virtual_camera_positions[i][0][0],
        cvxpy_variables[i+1][1] == virtual_camera_positions[i][0][1], cvxpy_variables[i+1][4] == virtual_camera_positions[i][0][1], cvxpy_variables[i+1][7] == virtual_camera_positions[i][0][1],
        cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][2], cvxpy_variables[i+1][5] == virtual_camera_positions[i][0][2], cvxpy_variables[i+1][8] == virtual_camera_positions[i][0][2]
    ])
for i in layout_transition_2_to_3:
    if selected_layouts[i] == 1:
        list_constraints.extend([
            cvxpy_variables[i][0] - ASPECT_RATIO * cvxpy_variables[i][1] == virtual_camera_positions[i+1][0][0],
            cvxpy_variables[i][1] == virtual_camera_positions[i+1][0][1], cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][2]
        ])
    elif selected_layouts[i] == 2:
        list_constraints.extend([
            cvxpy_variables[i][0] + ASPECT_RATIO * cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][0],
            cvxpy_variables[i][1] == virtual_camera_positions[i+1][0][1], cvxpy_variables[i][2] == virtual_camera_positions[i+1][0][2]
        ])
    else: raise Exception("Error: Invalid layout")
for i in layout_transition_3_to_2:
    if selected_layouts[i+1] == 1:
        list_constraints.extend([
            cvxpy_variables[i+1][0] - ASPECT_RATIO * cvxpy_variables[i+1][1] == virtual_camera_positions[i][0][0],
            cvxpy_variables[i+1][1] == virtual_camera_positions[i][0][1], cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][2]
        ])
    elif selected_layouts[i+1] == 2:
        list_constraints.extend([
            cvxpy_variables[i+1][0] + ASPECT_RATIO * cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][0],
            cvxpy_variables[i+1][1] == virtual_camera_positions[i][0][1], cvxpy_variables[i+1][2] == virtual_camera_positions[i][0][2]
        ])
    else: raise Exception("Error: Invalid layout")

In [15]:
if len(list_constraints) != 0:
    opt_problem = cp.Problem(cp.Minimize(d_epsilon + 10*m1_epsilon + 10*m2_epsilon + 10*m3_epsilon), list_constraints)
    opt_problem.solve(max_iter=50000)
    for index, variables in cvxpy_variables.items():
        virtual_camera_positions[index].append(variables.value)

## Save the intermeditate output

In [16]:
with open(DATASET_FOLDER + VIDEO_FOLDER + "/virtual_camera_positions.pkl", 'wb') as f:
    pickle.dump(virtual_camera_positions, f)

# Save the results

## Check for the validity

In [17]:
for i in range(VIDEO_INPUT_FRAMES_COUNT):
    if selected_layouts[i] == 0:
        if not (len(virtual_camera_positions[i]) == 1 and len(virtual_camera_positions[i][0]) == 9): raise Exception("Error: frame no. %d: %s" % (i, virtual_camera_positions[i]))
    elif selected_layouts[i] in [1,2]:
        if not (len(virtual_camera_positions[i]) == 2 and len(virtual_camera_positions[i][0]) == 3 and len(virtual_camera_positions[i][1]) == 3): raise Exception("Error: frame no. %d: %s" % (i, virtual_camera_positions[i]))
    elif selected_layouts[i] == 3:
        if not (len(virtual_camera_positions[i]) == 1 and len(virtual_camera_positions[i][0]) == 3): raise Exception("Error: frame no. %d: %s" % (i, virtual_camera_positions[i]))
    else: raise Exception("Error: Invalid layout")

In [18]:
for i, value in enumerate(virtual_camera_positions):
    print(i, value)

0 [array([1051.        , 1108.26315789,  134.73684211, 1621.57636365,
       1072.82209156,  135.18277105, 2036.26359325, 1087.4926293 ,
        135.51130212])]
1 [array([1054.        , 1104.26315789,  134.73684211, 1619.41902327,
       1072.82208664,  135.17792227, 2037.75173277, 1087.49904875,
        135.5268148 ])]
2 [array([1057.        , 1100.26315789,  134.73684211, 1618.58554579,
       1071.7045347 ,  135.1730905 , 2039.24130128, 1087.50626997,
        135.53488361])]
3 [array([1060.        , 1098.26315789,  134.73684211, 1618.5854285 ,
       1070.58697761,  135.17321876, 2037.76038826, 1087.51635507,
        135.53740991])]
4 [array([1083.        , 1096.26315789,  134.73684211, 1618.5851959 ,
       1069.82648351,  135.17354157, 2039.31201188, 1087.53581583,
        135.46087501])]
5 [array([1098.        , 1092.26315789,  134.73684211, 1618.58464132,
       1069.06600778,  135.17406588, 2039.38981517, 1087.55601936,
        135.37979799])]
6 [array([1104.        , 1088.2631

## Load the .pkl file

In [19]:
with open(DATASET_FOLDER + VIDEO_FOLDER + "/virtual_camera_positions.pkl", 'rb') as f:
    virtual_camera_positions = pickle.load(f)

## Save the video

In [20]:
file_video_input = cv2.VideoCapture(DATASET_FOLDER + VIDEO_FOLDER + "/" + VIDEO_FOLDER + ".mp4")
file_video_output = cv2.VideoWriter(DATASET_FOLDER + VIDEO_FOLDER + "/" + 'output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), VIDEO_INPUT_FPS, VIDEO_INPUT_FRAME_SIZE)
random_colors = np.random.randint(256, size = (3, 3)).tolist()
for i in range(VIDEO_INPUT_FRAMES_COUNT):
    success, img_frame = file_video_input.read()
    if selected_layouts[i] == 0:
        coordinates = np.rint(virtual_camera_positions[i][0]).astype(np.int16)
        cv2.rectangle(img_frame, (int(np.rint(coordinates[0] - ASPECT_RATIO * coordinates[2])), coordinates[1] - coordinates[2]), (int(np.rint(coordinates[0] + ASPECT_RATIO * coordinates[2])), coordinates[1] + coordinates[2]), random_colors[0], 1)
        cv2.rectangle(img_frame, (int(np.rint(coordinates[3] - ASPECT_RATIO * coordinates[5])), coordinates[4] - coordinates[5]), (int(np.rint(coordinates[3] + ASPECT_RATIO * coordinates[5])), coordinates[4] + coordinates[5]), random_colors[1], 1)
        cv2.rectangle(img_frame, (int(np.rint(coordinates[6] - ASPECT_RATIO * coordinates[8])), coordinates[7] - coordinates[8]), (int(np.rint(coordinates[6] + ASPECT_RATIO * coordinates[8])), coordinates[7] + coordinates[8]), random_colors[2], 1)
    elif selected_layouts[i] in [1, 2]:
        coordinates = np.rint(virtual_camera_positions[i][0]).astype(np.int16)
        cv2.rectangle(img_frame, (int(np.rint(coordinates[0] - ASPECT_RATIO * coordinates[2])), coordinates[1] - coordinates[2]), (int(np.rint(coordinates[0] + ASPECT_RATIO * coordinates[2])), coordinates[1] + coordinates[2]), random_colors[0], 1)
        coordinates = np.rint(virtual_camera_positions[i][1]).astype(np.int16)
        cv2.rectangle(img_frame, (int(np.rint(coordinates[0] - ASPECT_RATIO * coordinates[2])), coordinates[1] - coordinates[2]), (int(np.rint(coordinates[0] + ASPECT_RATIO * coordinates[2])), coordinates[1] + coordinates[2]), random_colors[0], 1)
    elif selected_layouts[i] == 3:
        coordinates = np.rint(virtual_camera_positions[i][0]).astype(np.int16)
        cv2.rectangle(img_frame, (int(np.rint(coordinates[0] - ASPECT_RATIO * coordinates[2])), coordinates[1] - coordinates[2]), (int(np.rint(coordinates[0] + ASPECT_RATIO * coordinates[2])), coordinates[1] + coordinates[2]), random_colors[0], 1)
    file_video_output.write(img_frame)
file_video_input.release()
file_video_output.release()