In [None]:
import os
import torch
from PIL import Image
import imageio
import numpy as np
from torchvision.transforms.functional import to_pil_image
from torchvision.transforms import ToTensor
from moviepy.editor import concatenate_videoclips, VideoFileClip
import cv2

### HELPERS ###
def image_to_tensor(file_path):
    # Open the image using PIL
    img = Image.open(file_path)

    # Apply the ToTensor transform
    tensor = ToTensor()(img)

    return tensor

def tensor_to_avi(tensor, output_path, fps=30):
    ''' THIS IS FOR AVI ITS BUGGY SO FORGET IT EVEN THOUGH ITS LOSSLESS'''
    assert output_path.endswith(".avi"), "Output path must end with '.avi' so that it's lossless"

    # Create a list to store numpy arrays representing the images
    images = []

    # Convert each frame in the tensor to a PIL image and then to a numpy array
    for i in range(tensor.shape[0]):
        img = to_pil_image(tensor[i])
        images.append(np.array(img))

    # Save the list of numpy arrays as a lossless video using the FFV1 codec
    imageio.mimwrite(output_path, images, fps=fps, codec='ffv1')

def tensor_to_mp4(video_tensor, output_path):
    # Convert the tensor to a numpy array
    video_np = video_tensor.numpy()

    # Rescale the pixel values to the range [0, 255]
    video_np = (255 * video_np).astype('uint8')

    # Transpose the tensor to match the shape expected by imageio (frames, height, width, channels)
    video_np = video_np.transpose((0, 2, 3, 1))

    # Write the frames to an MP4 file using imageio
    with imageio.get_writer(output_path, fps=30) as writer:
        for frame in video_np:
            writer.append_data(frame)

def normalize_tensor(tensor):
    # Ensures values are bounded between 0 and 1
    return (tensor - tensor.min()) / (tensor.max() - tensor.min())

def concat_video_files(file1, file2, output_file):
    assert os.path.exists(file1)
    assert os.path.exists(file2)
    
    # Load the video files
    clip1 = VideoFileClip(file1)
    clip2 = VideoFileClip(file2)

    # Concatenate the videos
    final_clip = concatenate_videoclips([clip1, clip2])

    # Write the concatenated video to the output file
    final_clip.write_videofile(output_file)

def save_frames(frames, video_fn, fps=30):
    writer = imageio.get_writer(video_fn,fps=fps)
    for frame in frames:
        writer.append_data((frame).astype('uint8'))
    writer.close()

def detensorize(frames):
    if len(frames.shape) == 5:
        N, T, C, H, W = frames.shape
        frames = frames.view((N * T, C, H, W))
    return frames.cpu().squeeze(0).permute((0, 2, 3, 1)).numpy() * 255

def tensorize(frames, device_id=0):
    frames = torch.from_numpy(frames) / 255.
    # B (T C H W) 
    return frames.permute((0, 3, 1, 2)).to(f'cuda:{device_id}')

def load_frames_imageio(video_fn, start=0, stop=float('inf')):
    reader = imageio.get_reader(video_fn)
    frames = []
    for i, frame in enumerate(reader):
        if i == stop:
            break
        if i >= start:
            frames.append(frame)
    fps = reader.get_meta_data()['fps']
    reader.close()
    frames = np.stack(frames)
    return frames, fps

def concat_videos(*videos):
    return np.concatenate(videos, axis=0)



: 

In [3]:
### DAVIS ###
segmentation_folder = "/data/katop1234/mae_testing_data/DAVIS/Annotations/"
RGB_folder = "/data/katop1234/mae_testing_data/DAVIS/JPEGImages/"
output_dir = "/data/katop1234/mae_testing_data/DAVIS/final_temporal_videos/"
videos_list = []

for video_label in os.listdir(RGB_folder):
    videos_list.append(video_label)

### Temporal ###
for video_label in videos_list:
    if video_label + ".mp4" not in os.listdir(output_dir):

        print("Using label", video_label)
        segmented_video_folder = os.path.join(segmentation_folder, video_label)
        RGB_video_folder = os.path.join(RGB_folder, video_label)
        assert(len(os.listdir(segmented_video_folder)) == len(os.listdir(RGB_video_folder))), "Number of files in segmented vs RGB folder must be equal"
        
        RGB_frames = []
        segmented_frames = []
        home_dir = os.getcwd()
        
        rgb_images = [os.path.join(RGB_video_folder, f) for f in sorted(os.listdir(RGB_video_folder))]
        RGB_frames = np.stack([imageio.imread(image_file) for image_file in rgb_images], axis=0)
        
        segmented_images = [os.path.join(segmented_video_folder, f) for f in sorted(os.listdir(segmented_video_folder))]

        segmented_frames = []
        for image_file in segmented_images:
            seg_image_np_array = imageio.imread(image_file)
            if len(seg_image_np_array.shape) == 2:
                
                print("all black seg file at", image_file, "got shape and array", seg_image_np_array.shape, seg_image_np_array)
                seg_image_np_array = np.repeat(seg_image_np_array[:, :, np.newaxis], 4, axis=2)

            # Note, I removed the 4th channel for transparency. It doesn't matter since it's a black background anyway. 
            segmented_frames.append(seg_image_np_array[:, :, :3])
        segmented_frames = np.stack(segmented_frames, axis=0)

        final_video_path = os.path.join(output_dir, video_label + ".mp4")

        print("rgb shape", RGB_frames.shape)
        print("seg shape", segmented_frames.shape)

        concated_videos = concat_videos(RGB_frames, segmented_frames)
        save_frames(concated_videos, final_video_path, 30)

### Spatial ###
output_dir = "/data/katop1234/mae_testing_data/DAVIS/final_spatiotemporal_videos/"
for video_label in videos_list:
    if video_label + "_spatial.mp4" not in os.listdir(output_dir):

        print("Using label", video_label)
        segmented_video_folder = os.path.join(segmentation_folder, video_label)
        RGB_video_folder = os.path.join(RGB_folder, video_label)
        assert(len(os.listdir(segmented_video_folder)) == len(os.listdir(RGB_video_folder))), "Number of files in segmented vs RGB folder must be equal"
        
        RGB_frames = []
        segmented_frames = []
        home_dir = os.getcwd()
        
        rgb_images = [os.path.join(RGB_video_folder, f) for f in sorted(os.listdir(RGB_video_folder))]
        RGB_frames = np.stack([imageio.imread(image_file) for image_file in rgb_images], axis=0)
        
        segmented_images = [os.path.join(segmented_video_folder, f) for f in sorted(os.listdir(segmented_video_folder))]

        segmented_frames = []
        for image_file in segmented_images:
            seg_image_np_array = imageio.imread(image_file)
            if len(seg_image_np_array.shape) == 2:
                
                print("all black seg file at", image_file, "got shape and array", seg_image_np_array.shape, seg_image_np_array)
                seg_image_np_array = np.repeat(seg_image_np_array[:, :, np.newaxis], 3, axis=2)

            segmented_frames.append(seg_image_np_array[:, :, :3])
        segmented_frames = np.stack(segmented_frames, axis=0)

        final_video_path = os.path.join(output_dir, video_label + "_spatial.mp4")

        print("rgb shape", RGB_frames.shape)
        print("seg shape", segmented_frames.shape)

        # Spatial concatenation
        concated_videos = np.concatenate((RGB_frames, segmented_frames), axis=1)
        save_frames(concated_videos, final_video_path, 30)


In [29]:
### SegTrack ###
segmentation_folder = "/data/katop1234/mae_testing_data/segtrack/SegTrackv2/GroundTruth/"
RGB_folder = "/data/katop1234/mae_testing_data/segtrack/SegTrackv2/JPEGImages/"
output_dir = "/data/katop1234/mae_testing_data/segtrack/final_videos/"
videos_list = []

bad_labels = ["penguin", "cheetah"]
# both has .bmp idk wat to do with that

for video_label in os.listdir(RGB_folder):
    videos_list.append(video_label)

### Temporal ###
for video_label in videos_list:
    if video_label + ".mp4" not in os.listdir(output_dir) and video_label not in bad_labels:

        print("Using label", video_label)
        segmented_video_folder = os.path.join(segmentation_folder, video_label)
        RGB_video_folder = os.path.join(RGB_folder, video_label)
        assert(len(os.listdir(segmented_video_folder)) == len(os.listdir(RGB_video_folder))), "Number of files in segmented vs RGB folder must be equal"
        
        RGB_frames = []
        segmented_frames = []
        home_dir = os.getcwd()
        
        rgb_images = [os.path.join(RGB_video_folder, f) for f in sorted(os.listdir(RGB_video_folder))]
        RGB_frames = np.stack([imageio.imread(image_file) for image_file in rgb_images], axis=0)

        segmented_images = [os.path.join(segmented_video_folder, f) for f in sorted(os.listdir(segmented_video_folder))]

        segmented_frames = []
        for image_file in segmented_images:
            seg_image_np_array = imageio.imread(image_file)
            segmented_frames.append(seg_image_np_array)
        segmented_frames = np.stack(segmented_frames, axis=0)

        final_video_path = os.path.join(output_dir, video_label + ".mp4")

        print("rgb shape", RGB_frames.shape)
        print("seg shape", segmented_frames.shape)

        concated_videos = concat_videos(RGB_frames, segmented_frames)
        save_frames(concated_videos, final_video_path, 30)

### Spatial ###
output_dir = "/data/katop1234/mae_testing_data/segtrack/final_videos/"
for video_label in videos_list:
    if video_label + "_spatial.mp4" not in os.listdir(output_dir) and video_label not in bad_labels:

        print("Using label", video_label)
        segmented_video_folder = os.path.join(segmentation_folder, video_label)
        RGB_video_folder = os.path.join(RGB_folder, video_label)
        assert(len(os.listdir(segmented_video_folder)) == len(os.listdir(RGB_video_folder))), "Number of files in segmented vs RGB folder must be equal"
        
        RGB_frames = []
        segmented_frames = []
        home_dir = os.getcwd()
        
        rgb_images = [os.path.join(RGB_video_folder, f) for f in sorted(os.listdir(RGB_video_folder))]
        RGB_frames = np.stack([imageio.imread(image_file) for image_file in rgb_images], axis=0)
        
        segmented_images = [os.path.join(segmented_video_folder, f) for f in sorted(os.listdir(segmented_video_folder))]

        segmented_frames = []
        for image_file in segmented_images:
            seg_image_np_array = imageio.imread(image_file)
            if len(seg_image_np_array.shape) == 2:
                
                print("all black seg file at", image_file, "got shape and array", seg_image_np_array.shape, seg_image_np_array)
                seg_image_np_array = np.repeat(seg_image_np_array[:, :, np.newaxis], 3, axis=2)

            segmented_frames.append(seg_image_np_array[:, :, :3])
        segmented_frames = np.stack(segmented_frames, axis=0)

        final_video_path = os.path.join(output_dir, video_label + "_spatial.mp4")

        print("rgb shape", RGB_frames.shape)
        print("seg shape", segmented_frames.shape)

        # Spatial concatenation
        concated_videos = np.concatenate((RGB_frames, segmented_frames), axis=1)
        save_frames(concated_videos, final_video_path, 30)

Using label soldier
rgb shape (32, 224, 528, 3)
seg shape (32, 224, 528, 3)




Using label birdfall
rgb shape (30, 327, 259, 3)
seg shape (30, 327, 259, 3)




Using label monkey
rgb shape (31, 270, 480, 3)
seg shape (31, 270, 480, 3)
Using label frog
rgb shape (279, 264, 480, 3)
seg shape (279, 264, 480, 3)
Using label girl
rgb shape (21, 320, 400, 3)
seg shape (21, 320, 400, 3)
Using label drift
rgb shape (74, 360, 640, 3)
seg shape (74, 360, 640, 3)
Using label bird_of_paradise
rgb shape (98, 360, 640, 3)
seg shape (98, 360, 640, 3)
Using label bmx
rgb shape (36, 360, 640, 3)
seg shape (36, 360, 640, 3)
Using label parachute




rgb shape (51, 352, 414, 3)
seg shape (51, 352, 414, 3)




Using label worm
rgb shape (243, 264, 480, 3)
seg shape (243, 264, 480, 3)
Using label monkeydog
rgb shape (71, 240, 320, 3)
seg shape (71, 240, 320, 3)
Using label hummingbird
rgb shape (28, 360, 640, 3)
seg shape (28, 360, 640, 3)
