In [19]:
import os
import cv2
import numpy as np
import csv

# Video and output csv path
video_path = 'data/videos/IMG_9917.mp4'
output_csv = 'output/txt/annotations.csv'

if not os.path.exists(video_path):
        print(f"{video_path}: File not found.")


# Load video
cap = cv2.VideoCapture(video_path)
sampling_frequency = 1  # seconds between samples

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
ret, frame = cap.read()
height, width, channels = frame.shape

# frames Numpy array 
frames = []

# Extracting frames, chronologically forwards
count = 0
frame_index = 0
while frame_index <= cap.get(cv2.CAP_PROP_FRAME_COUNT) :
    ret, frame = cap.read()
    if not ret:
        break  # End of video
    frames.append(frame)
    
    # Increment frame_index
    count += 1
    cap.set(cv2.CAP_PROP_POS_FRAMES, int((count * fps) / sampling_frequency))

cap.release()

# Convert to NumPy array
frames_array = np.array(frames)

# Assuming white background
background_color = [255, 255, 255] 

# Removing background pixels, creating masks
for frame in frames_array :
    for row in range(height) :
        for col in range(width) :
            if (frame[row][col] == background_color).all() :
                frame[row][col] = [-1, -1, -1] # could add a transparency channel instead of setting to invalid value

# Helper function
def subtract_pixels(pixel1, pixel2) :
    for index in range(3) : # 3 color channels
        if (pixel1[index] <= pixel2[index]) :
            pixel1[index] = -1
            continue
        pixel1[index] -= pixel1[index]
    return pixel1

# Subtracting adjacent frames
for index in range(count - 1) : 
    curr_frame = frames[index]
    next_frame = frames[index + 1]
    for row in range(height) :
        for col in range(width) : 
            if ((next_frame[row][col] == background_color).all() or (curr_frame[row][col] == background_color).all()) :
                continue
            curr_frame[row][col] = subtract_pixels(curr_frame[row][col], next_frame[row][col]) # so, the thing is, i don't know what i'm doing

# Checking emptiness of frame
frame_pixel_count = [0] * count 
for index in range(count) : 
    curr_frame = frames_array[index]
    for row in range(height) :
        for col in range(width) : 
            if ((curr_frame[row][col] == background_color).all()) :
                continue
            frame_pixel_count[index] += 1

# Output to csv
with open(output_csv, 'a', newline='') as csvfile:
    fieldnames = ['index', 'frame', 'value']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    index = 0
    for value in frame_pixel_count :
        writer.writerow({'index' : index, 'frame' : int((index * fps) / sampling_frequency), 'value' : value})
        index += 1
            

For the old behavior, usually:
    np.array(value).astype(dtype)
will give the desired result (the cast overflows).
  frame[row][col] = [-1, -1, -1] # could add a transparency channel instead of setting to invalid value
For the old behavior, usually:
    np.array(value).astype(dtype)
will give the desired result (the cast overflows).
  pixel1[index] = -1
