# Vision

The purpose of this notebook is to create images that will make the workflow of moving apples easy to do. 
The final product will be a Streamlit site where I can go forward and backwards and see frames and the transitions between them.

Requirements: Original frame from the video, the frame downsampled to circles with coordinates of every apple, and a diff frame with the added apples in green and removed apples in red.

Also, add travelling salesman problem to the diffed frames to find the best path to move the apples, draw arrows for those

In [1]:
import cv2
import numpy as np
from pathlib import Path
import os

In [2]:
badapple_path = Path(".").resolve().parent / "badapple-small.mp4"

cap = cv2.VideoCapture(str(badapple_path))

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"width: {width}, height: {height}, fps: {fps}, n_frames: {n_frames}")

square_side = 30
black_threshold = 50

width: 960, height: 720, fps: 30, n_frames: 6572


# Store real_frames from the original video


In [3]:
ret, frame = cap.read()
index = 0
while ret:
    cv2.imwrite(f"real_frames/{index}.jpg", frame)
    ret, frame = cap.read()
    index += 1

### Generate circle, downsampled frames with coordinates of apples labeled

In [14]:
# reset cap
cap = cv2.VideoCapture(str(badapple_path))
index = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    # convert to grayscale
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    if index % 100 == 0 or index == n_frames - 1:
        print(f"frame {index}/{n_frames}", end="\r")
    out_frame = np.ones((height, width), dtype=np.uint8) * 255

    current_frame_pixels = set()
    for i in range(0, width - 1, square_side):
        for j in range(0, height - 1, square_side):
            section = frame[j: j + square_side, i: i + square_side]
            # if its dark, add dark circle with white coordinate text
            if (np.mean(section)) < black_threshold:
                cv2.circle(out_frame, (i+square_side//2, j+square_side//2), square_side//2, 0, -1)
                text_color = 255
            else:
                text_color = 0
            # otherwise, black coordinate text
            # put first coordinate higher, second coordinate lower
            
            cv2.putText(out_frame, f"{j//square_side+1}", (i+square_side//3-1, j + 22), cv2.FONT_HERSHEY_SIMPLEX, 0.3, text_color, 1, cv2.LINE_AA)
            cv2.putText(out_frame, f"{i//square_side+1}", (i+square_side//3-1, j + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.3, text_color, 1, cv2.LINE_AA)
    cv2.imwrite(f"circle_frames/{index}.jpg", out_frame)
    index += 1

frame 6571/6572

### Generate diff frames with additions in green and removals in red

In [3]:
# reset cap
cap = cv2.VideoCapture(str(badapple_path))
index = 0
last_frame_pixels = set()

while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        break
    # convert to grayscale
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    if index % 100 == 0 or index == n_frames - 1:
        print(f"frame {index}/{n_frames}", end="\r")
        
    out_frame = np.ones((height, width, 3), dtype=np.uint8) * 255

    current_frame_pixels = set()
    for i in range(0, width - 1, square_side):
        for j in range(0, height - 1, square_side):
            section = frame[j: j + square_side, i: i + square_side]
             # three cases we care about: a pixel is persisted, a pixel is added, a pixel is removed
            circle_colour = None
            if (np.mean(section)) < 50:
                current_frame_pixels.add((i, j))
                # black pixel
                if (i, j) in last_frame_pixels:
                    # persisted from previous frame. Black circle, white text
                    circle_colour = (0, 0, 0)
                    text_color = (255, 255, 255)
                else:
                    # new black dot. Green circle, black text
                    circle_colour = (0, 255, 0)
                    text_color = (0, 0 ,0)
            else:
                # white pixel
                if (i, j) in last_frame_pixels:
                    # removed black dot. Red circle, black text
                    circle_colour = (0, 0, 255)
                    text_color = (0, 0, 0)
                else:
                    # persisted nothing. Just use black text
                    text_color = (0, 0 ,0)
                    
            if circle_colour is not None:
                cv2.circle(out_frame, (i+square_side//2, j+square_side//2), square_side//2, circle_colour, -1)
            cv2.putText(out_frame, f"{j//square_side+1}", (i+square_side//3-1, j + 22), cv2.FONT_HERSHEY_SIMPLEX, 0.3, text_color, 1, cv2.LINE_AA)
            cv2.putText(out_frame, f"{i//square_side+1}", (i+square_side//3-1, j + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.3, text_color, 1, cv2.LINE_AA)
    cv2.imwrite(f"diff_frames/{index}.jpg", out_frame)

    last_frame_pixels = current_frame_pixels
    index += 1

frame 6571/6572

# Additions and removals in different colours but also arrows using TSP algorithm

In [52]:
# travelling salesman solving function
from python_tsp.heuristics import solve_tsp_simulated_annealing
from python_tsp.heuristics import solve_tsp_local_search
from python_tsp.exact import solve_tsp_dynamic_programming
from python_tsp.distances import euclidean_distance_matrix
import numpy as np

def closest_to_origin_index(points):
    # given a list of points, return the point closest to (0, 0)
    points_arr = np.array(points)
    closest_to_origin = np.argmin(np.linalg.norm(points_arr, axis=1))
    return closest_to_origin

def tsp_solver(points):
    # given a list of points, return a list of location tuples representing the path to take
    # points is a list of (x, y) tuples, this is done on cartesian grid
    # start closest to top right, because laptop will be in bottom right so top left is the least convenient to end on
    points_arr = np.array(points)
    # sort points_arr by distance from the origin actually
    points_arr = points_arr[np.argsort(np.linalg.norm(points_arr, axis=1))]
    # return points_arr
    # find the path from that point to the rest of the points
    # make nxn distance matrix
    num_points = len(points)
    distance_matrix = euclidean_distance_matrix(points_arr)
    # make distance array where the distances from closest_to_origin to the rest of the points are 0
    distance_matrix[0, :] = 0
    # permutation, distance = solve_tsp_local_search(distance_matrix)
    permutation, distance = solve_tsp_dynamic_programming(distance_matrix)
    route = []
    for i in range(num_points):
        route.append(points[permutation[i]])
    return route

In [53]:
# reset cap
cap = cv2.VideoCapture(str(badapple_path))
index = 0
last_frame_pixels = set()

while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        break
    # convert to grayscale
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    if index % 100 == 0 or index == n_frames - 1:
        print(f"frame {index}/{n_frames}", end="\r")
        
    out_frame = np.ones((height, width, 3), dtype=np.uint8) * 255

    current_frame_pixels = set()
    pixels_that_change = set()
    for i in range(0, width - 1, square_side):
        for j in range(0, height - 1, square_side):
            section = frame[j: j + square_side, i: i + square_side]
             # three cases we care about: a pixel is persisted, a pixel is added, a pixel is removed
            text_color = None
            circle_colour = None
            if (np.mean(section)) < 50:
                current_frame_pixels.add((i, j))
                # black pixel
                if (i, j) in last_frame_pixels:
                    # persisted from previous frame. Black circle, white text
                    circle_colour = (0, 0, 0)
                    text_color = (255, 255, 255)
                else:
                    # new black dot. Green circle, no text because tsp markers will be used
                    circle_colour = (0, 255, 0)
                    pixels_that_change.add((i, j))
            else:
                # white pixel
                if (i, j) in last_frame_pixels:
                    # removed black dot. Red circle, no text because tsp markers will be used
                    circle_colour = (0, 0, 255)
                    pixels_that_change.add((i, j))
                else:
                    # persisted nothing. Just use black text
                    text_color = (0, 0 ,0)
                    
            if circle_colour is not None:
                cv2.circle(out_frame, (i+square_side//2, j+square_side//2), square_side//2, circle_colour, -1)
            if text_color is not None:
                cv2.putText(out_frame, f"{j//square_side+1}", (i+square_side//3-1, j + 22), cv2.FONT_HERSHEY_SIMPLEX, 0.3, text_color, 1, cv2.LINE_AA)
                cv2.putText(out_frame, f"{i//square_side+1}", (i+square_side//3-1, j + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.3, text_color, 1, cv2.LINE_AA)
    change_list = list(pixels_that_change)
    change_length = len(change_list)
    print(f"frame {index}/{n_frames}, change_length: {change_length}")
    if change_length == 1:
        # one change, no arrows
        pass
    elif change_length == 2: # only two changes, don't run TSP, label the closest one 1 and the other 2
        start = closest_to_origin_index(change_list)
        change_arr = np.array(change_list) # only used so I can add square_side//2 to the coordinates without having to index the tuple
        cv2.putText(out_frame, f"1", (change_arr[start][0]+square_side//3-1, change_arr[start][1] + 18), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(out_frame, f"2", (change_arr[1-start][0]+square_side//3-1, change_arr[1-start][1] + 18), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1, cv2.LINE_AA)
    elif index != 0 and len(pixels_that_change) != 0:
        route = tsp_solver(change_list)
        for i, point in enumerate(route):
            cv2.putText(out_frame, f"{i+1}", (point[0]+square_side//3-1, point[1] + 18), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1, cv2.LINE_AA)
            
    cv2.imwrite(f"diff_frames_tsp/{index}.jpg", out_frame)

    last_frame_pixels = current_frame_pixels
    index += 1

frame 0/6572, change_length: 768
frame 1/6572, change_length: 0
frame 2/6572, change_length: 0
frame 3/6572, change_length: 0
frame 4/6572, change_length: 0
frame 5/6572, change_length: 0
frame 6/6572, change_length: 0
frame 7/6572, change_length: 0
frame 8/6572, change_length: 0
frame 9/6572, change_length: 0
frame 10/6572, change_length: 0
frame 11/6572, change_length: 0
frame 12/6572, change_length: 0
frame 13/6572, change_length: 0
frame 14/6572, change_length: 0
frame 15/6572, change_length: 0
frame 16/6572, change_length: 0
frame 17/6572, change_length: 0
frame 18/6572, change_length: 0
frame 19/6572, change_length: 0
frame 20/6572, change_length: 0
frame 21/6572, change_length: 0
frame 22/6572, change_length: 0
frame 23/6572, change_length: 0
frame 24/6572, change_length: 0
frame 25/6572, change_length: 0
frame 26/6572, change_length: 0
frame 27/6572, change_length: 0
frame 28/6572, change_length: 0
frame 29/6572, change_length: 0
frame 30/6572, change_length: 0
frame 31/6572, c

KeyboardInterrupt: 