Adapted from https://github.com/shreyas-bk/u2netdemo

 U-2-NET Paper: [U2-Net: Going Deeper with Nested U-Structure for Salient Object Detection](https://arxiv.org/abs/2005.09007)

 Original Repo: [U-2-Net Github repo](https://github.com/NathanUA/U-2-Net)

References: X. Qin, Z. Zhang, C. Huang, M. Dehghan, O. R. Zaiane, and M. Jagersand, “U2-net: Going deeper with nested u-structure for salient object
detection,” Pattern Recognition, vol. 106, p. 107404, 2020

In [1]:
import os
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
import numpy as np
from PIL import Image as Img
import cv2
from skimage.transform import resize
import shutil

Converting frames to videos and videos to frames

In [2]:
def clear_directory(directory):
    if os.path.exists(directory):
        shutil.rmtree(directory)
    os.makedirs(directory)

def video_to_frames(video_path, frames_dir, num_frames=100):
    # Clear the frames directory
    clear_directory(frames_dir)

    # Capture the video
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = [int(i * total_frames / num_frames) for i in range(num_frames)]
    
    count = 0
    frame_id = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count in frame_indices:
            frame_filename = os.path.join(frames_dir, f'frame_{frame_id:04d}.png')
            cv2.imwrite(frame_filename, frame)
            frame_id += 1
        count += 1

    cap.release()
    print(f"Extracted {frame_id} frames.")


def frames_to_video(frames_dir, video_path, fps=30):
    # List all frames sorted by name
    frames = sorted([os.path.join(frames_dir, img) for img in os.listdir(frames_dir) if img.endswith(".png")])

    if not frames:
        print("No frames found in the directory.")
        return

    # Read the first frame to get the width and height
    frame = cv2.imread(frames[0])
    height, width, layers = frame.shape

    # Define the codec and create a VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can also use 'XVID', 'MJPG', etc.
    video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))

    for frame in frames:
        img = cv2.imread(frame)
        video.write(img)

    video.release()
    print(f"Video created at {video_path}")

# Example usage
video_to_frames('input_vids/cat.mp4', 'images')


Extracted 100 frames.


In [3]:
!python -W ignore u2net_test.py

Traceback (most recent call last):
  File "u2net_test.py", line 23, in <module>
    from model import U2NET # full size version 173.6 MB
ModuleNotFoundError: No module named 'model'


In [17]:
image_dir = os.path.join(os.getcwd(), 'images')
names = [name[:-4] for name in os.listdir(image_dir)]
names

['frame_0028',
 'frame_0000',
 'frame_0014',
 'frame_0015',
 'frame_0001',
 'frame_0029',
 'frame_0017',
 'frame_0003',
 'frame_0002',
 'frame_0016',
 'frame_0012',
 'frame_0006',
 'frame_0007',
 'frame_0013',
 'frame_0005',
 'frame_0011',
 'frame_0039',
 'frame_0038',
 'frame_0010',
 'frame_0004',
 'frame_0063',
 'frame_0077',
 'frame_0088',
 'frame_0089',
 'frame_0076',
 'frame_0062',
 'frame_0048',
 'frame_0074',
 'frame_0060',
 'frame_0061',
 'frame_0075',
 'frame_0049',
 'frame_0071',
 'frame_0065',
 'frame_0059',
 'frame_0058',
 'frame_0064',
 'frame_0070',
 'frame_0066',
 'frame_0072',
 'frame_0099',
 'frame_0098',
 'frame_0073',
 'frame_0067',
 'frame_0042',
 'frame_0056',
 'frame_0081',
 'frame_0095',
 'frame_0094',
 'frame_0080',
 'frame_0057',
 'frame_0043',
 'frame_0069',
 'frame_0055',
 'frame_0041',
 'frame_0096',
 'frame_0082',
 'frame_0083',
 'frame_0097',
 'frame_0040',
 'frame_0054',
 'frame_0068',
 'frame_0050',
 'frame_0044',
 'frame_0078',
 'frame_0093',
 'frame_00

In [19]:
def process_img(name):
    if not os.path.exists("processed_images"):
        os.makedirs("processed_images")

    RESCALE = 255
    output = load_img( 'results/'+name+'.png')
    input = load_img( 'images/'+name+'.png')
    inp_img = img_to_array(input)
    inp_img /= RESCALE

    out_img = img_to_array(output)
    out_img /= RESCALE

    THRESHOLD = 0.9

    # refine the output
    out_img[out_img > THRESHOLD] = 1
    out_img[out_img <= THRESHOLD] = 0

    LAYER = 2
    out_layer = out_img[:,:,LAYER]

    x_starts = [np.where(out_layer[i]==1)[0][0] if len(np.where(out_layer[i]==1)[0])!=0 else out_layer.shape[0]+1 for i in range(out_layer.shape[0])]
    x_ends = [np.where(out_layer[i]==1)[0][-1] if len(np.where(out_layer[i]==1)[0])!=0 else 0 for i in range(out_layer.shape[0])]
    y_starts = [np.where(out_layer.T[i]==1)[0][0] if len(np.where(out_layer.T[i]==1)[0])!=0 else out_layer.T.shape[0]+1 for i in range(out_layer.T.shape[0])]
    y_ends = [np.where(out_layer.T[i]==1)[0][-1] if len(np.where(out_layer.T[i]==1)[0])!=0 else 0 for i in range(out_layer.T.shape[0])]

    # get the starting and ending coordinated for the box
    startx = min(x_starts)
    endx = max(x_ends)
    starty = min(y_starts)
    endy = max(y_ends)

    # show the resulting coordinates
    start = (startx,starty)
    end = (endx,endy)


    COLOR = (0, 0, 0)
    THICKNESS = 4
    box_img = inp_img.copy()
    box_img *= RESCALE
    box_img = cv2.rectangle(box_img, start, end, COLOR, THICKNESS)
    box_img_scaled = Img.fromarray(box_img.astype('uint8'), 'RGB')

    SAL_SHIFT = 75
    sal_img = inp_img.copy()
    sal_img*=RESCALE
    add_layer = out_img.copy()
    add_layer[add_layer==1] = SAL_SHIFT
    sal_img[:,:,LAYER] += add_layer[:,:,LAYER]
    sal_img_scaled = Img.fromarray(sal_img.astype('uint8'), 'RGB')

    sal_img_scaled.save('processed_images/' + name + '.png')
    print(name)

    
for name in names:

    process_img(name)

frame_0028
frame_0000
frame_0014
frame_0015
frame_0001
frame_0029
frame_0017
frame_0003
frame_0002
frame_0016
frame_0012
frame_0006
frame_0007
frame_0013
frame_0005
frame_0011
frame_0039
frame_0038
frame_0010
frame_0004
frame_0063
frame_0077
frame_0088
frame_0089
frame_0076
frame_0062
frame_0048
frame_0074
frame_0060
frame_0061
frame_0075
frame_0049
frame_0071
frame_0065
frame_0059
frame_0058
frame_0064
frame_0070
frame_0066
frame_0072
frame_0099
frame_0098
frame_0073
frame_0067
frame_0042
frame_0056
frame_0081
frame_0095
frame_0094
frame_0080
frame_0057
frame_0043
frame_0069
frame_0055
frame_0041
frame_0096
frame_0082
frame_0083
frame_0097
frame_0040
frame_0054
frame_0068
frame_0050
frame_0044
frame_0078
frame_0093
frame_0087
frame_0086
frame_0092
frame_0079
frame_0045
frame_0051
frame_0047
frame_0053
frame_0084
frame_0090
frame_0091
frame_0085
frame_0052
frame_0046
frame_0009
frame_0021
frame_0035
frame_0034
frame_0020
frame_0008
frame_0036
frame_0022
frame_0023
frame_0037
frame_0033

In [24]:
frames_to_video('processed_images', 'output_vids/cat_salient.mp4', fps=30)


Video created at output_vids/cat_salient.mp4
