In [19]:
import os
import cv2
import numpy as np
import csv

def extract_frames(video_path, sampling_frequency):
    if not os.path.exists(video_path):
        print(f"{video_path}: File not found.")
        exit()

    # Load video
    cap = cv2.VideoCapture(video_path)

    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    ret, frame = cap.read()
    # height, width, channels = frame.shape

    # Extracting frames, chronologically forwards
    frames = []
    count = 0
    frame_index = 0
    while frame_index <= cap.get(cv2.CAP_PROP_FRAME_COUNT):
        ret, frame = cap.read()
        if not ret:
            break  # End of video
        frames.append(frame)
        
        # Increment frame_index
        count += 1
        cap.set(cv2.CAP_PROP_POS_FRAMES, int((count * fps) / sampling_frequency))

    cap.release()

    return frames, count

def median_cut_quantize(img, img_arr):
    # when it reaches the end, color quantize
    # print("to quantize: ", len(img_arr))
    r_average = np.mean(img_arr[:,0])
    g_average = np.mean(img_arr[:,1])
    b_average = np.mean(img_arr[:,2])
    
    for data in img_arr:
        img[data[3]][data[4]] = [r_average, g_average, b_average]
        
def split_into_buckets(img, img_arr, depth):
    
    if len(img_arr) == 0:
        print("what")
        return 
        
    if depth == 0:
        median_cut_quantize(img, img_arr)
        return
    
    r_range = np.max(img_arr[:,0]) - np.min(img_arr[:,0])
    g_range = np.max(img_arr[:,1]) - np.min(img_arr[:,1])
    b_range = np.max(img_arr[:,2]) - np.min(img_arr[:,2])
    
    space_with_highest_range = 0

    if g_range >= r_range and g_range >= b_range:
        space_with_highest_range = 1
    elif b_range >= r_range and b_range >= g_range:
        space_with_highest_range = 2
    elif r_range >= b_range and r_range >= g_range:
        space_with_highest_range = 0

    # print("space_with_highest_range:",space_with_highest_range)

    # sort the image pixels by color space with highest range 
    # and find the median and divide the array.
    img_arr = img_arr[img_arr[:,space_with_highest_range].argsort()]
    median_index = int((len(img_arr)+1)/2)
    # print("median_index:", median_index)

    
    #split the array into two buckets along the median
    split_into_buckets(img, img_arr[0:median_index], depth-1)
    split_into_buckets(img, img_arr[median_index:], depth-1)

def flatten_img_array(img):
    flattened_img_array = []
    for rindex, rows in enumerate(img):
        for cindex, color in enumerate(rows):
            flattened_img_array.append([color[0],color[1],color[2],rindex, cindex]) 
            
    return np.array(flattened_img_array)

def median_cut_quantize_image(img):
    flattened_img_array = flatten_img_array(img)

    # the 3rd parameter represents how many colors are needed in the power of 2. If the parameter 
    # passed is 4 its means 2^4 = 16 colors
    return split_into_buckets(img, flattened_img_array, 4)

def median_cut_quantize_video(frames):
    for frame in frames:
        median_cut_quantize_image(frame)

def remove_all_backgrounds(frames_array, background_color, null_color, height, width):
    for frame in frames_array:
        for row in range(height):
            for col in range(width):
                if (frame[row][col] == background_color).all():
                    frame[row][col] = null_color # might add a transparency channel instead

def subtract_two_masks(curr_frame, next_frame, height, width, null_color):
    for row in range(height):
        for col in range(width): 
            if ((curr_frame[row][col] == next_frame[row][col]).all()):
                curr_frame[row][col] = null_color 

def subtract_all_masks(frames_array, len, height, width, null_color):
    for index in range(len - 1): 
        curr_frame = frames_array[index]
        next_frame = frames_array[index + 1]
        subtract_two_masks(curr_frame, next_frame, height, width, null_color)

def count_remaining(frames_array, output_array, height, width, null_color):
    for frame in frames_array:
        count = 0
        for row in range(height):
            for col in range(width): 
                if (frame[row][col] == null_color).all():
                    continue
                count += 1
        output_array.append(count)

def output_to_csv(array, output_csv, fps):
    with open(output_csv, 'a', newline='') as csvfile:
        fieldnames = ['index', 'frame', 'value']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        index = 0
        for value in array:
            writer.writerow({'index' : index, 'frame' : int((index * fps) / sampling_frequency), 'value' : value})
            index += 1

def save_frames(frames, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        exit()
    index = 0
    for frame in frames:
        img_file = os.path.join(output_dir, f"scene_{index}.jpg")
        cv2.imwrite(img_file, frame)
        index += 1

def run_mask_segmentation(video_path, output_csv, output_dir, sampling_frequency):
    # Load video
    cap = cv2.VideoCapture(video_path)

    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    ret, frame = cap.read()
    height, width, channels = frame.shape
    cap.release()

    print("height: ", height)
    print("width: ", width)

    # frames NumPy array
    print("Extracting frames")
    frames, count = extract_frames(video_path, sampling_frequency) #lol does this work
    # height, width, channels = frames[0].shape
    frames_array = np.array(frames)
    print("Finished extracting frames")

    # Assuming white background
    background_color = [253, 253, 253] 
    null_color = [50, 205, 50]

    print("Quantizing frames")
    median_cut_quantize_video(frames_array)
    print("Finished quantizing frames")

    # Removing background pixels, creating masks
    print("Removing backgrounds")
    remove_all_backgrounds(frames_array, background_color, null_color, height, width)
    print("Finished reomving backgrounds")

    # Subtracting adjacent frames
    print("Subtracting masks")
    subtract_all_masks(frames_array, count, height, width, null_color)
    print("Finished subtracting masks")

    # Checking emptiness of frame
    print("Counting remaining pixels")
    frame_pixel_count = []
    count_remaining(frames_array, frame_pixel_count, height, width, null_color)
    print("Finished counting remaining pixels")

    # Output to csv
    print("Outputting to csv")
    output_to_csv(frame_pixel_count, output_csv, fps)
    print("Finished outputting to csv")

    # frames_array to image for visualization
    print("Saving frames")
    save_frames(frames_array, output_dir)
    # save_frames(processed_frames, output_dir)
    print("Finished saving frames")

# Video, output csv, output dir path
video_path = 'data/videos/test9_cs412-21s.mp4'
output_csv = 'output/txt/annotations.csv'
output_dir = "output/image/maskSeg"
sampling_frequency = 1  # seconds between samples

run_mask_segmentation(video_path, output_csv, output_dir, sampling_frequency)

height:  1440
width:  2560
Extracting frames
Finished extracting frames
Quantizing frames
Finished quantizing frames
Removing backgrounds
Finished reomving backgrounds
Subtracting masks
Finished subtracting masks
Counting remaining pixels
Finished counting remaining pixels
Outputting to csv
Finished outputting to csv
Saving frames
Finished saving frames
