<a href="https://colab.research.google.com/github/steinhaug/stable-diffusion/blob/main/tool/batch_depth_anything_frames.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/LiheYoung/Depth-Anything
%cd Depth-Anything
!pip install -r requirements.txt
depth_anything_init = False

In [None]:
# Mount gDrive
from pathlib import Path
inip = Path("/content/drive/MyDrive")
if not inip.is_dir():
    from google.colab import drive
    drive.mount('/content/drive')

gdrive_save_folder = "/content/drive/MyDrive/depth_frames"

In [None]:
#@title Notebook functions

import os, sys
from tqdm import tqdm

_B=True
_A=False
def return__isValidDir(directory):
    if os.path.isdir(directory):return _B
    else:return _A

def return__fileCount(directory_path):
    file_count = 0

    for root, dirs, files in os.walk(directory_path):
        file_count += len(files)

    return file_count

def resettable_progress_meter(maximum, progress):
    bar_length = 40
    progress_ratio = progress / maximum
    bar = int(bar_length * progress_ratio)

    sys.stdout.write("\r[{}{}] {}%".format("=" * bar, " " * (bar_length - bar), int(progress_ratio * 100)))
    sys.stdout.flush()

    if progress == maximum:
        sys.stdout.write("\n")
        sys.stdout.flush()

def return__folderName(directory_path, verify_folder=False):

    if not verify_folder:
        return os.path.basename(os.path.normpath(directory_path))

    # Ensure the path is a valid directory
    if os.path.isdir(directory_path):
        # Split the path into components and get the last one
        last_folder_name = os.path.basename(os.path.normpath(directory_path))
        return last_folder_name
    else:
        return None  # Return None for invalid paths

def move_file(source_file, destination_file, copyOnly=False):
    file_size = os.path.getsize(source_file)

    with open(source_file, "rb") as src_file, \
         open(destination_file, "wb") as dest_file, \
         tqdm(total=file_size, unit="B", unit_scale=True, desc="Moving file", ncols=80) as progress:

        chunk_size = 1024 * 1024  # 1 MB
        bytes_copied = 0

        while True:
            chunk = src_file.read(chunk_size)

            if not chunk:
                break

            dest_file.write(chunk)
            bytes_copied += len(chunk)
            progress.update(len(chunk))

    if not copyOnly:
        os.remove(source_file)
        print(f"File '{source_file}' moved to '{destination_file}'.")
    else:
        print(f"File '{source_file}' copied to '{destination_file}'.")

def copy_file(source_file, destination_file):
    return move_file(source_file, destination_file, True)

In [None]:
video_file = "/content/video1.mp4"
frames_directory = "/content/frames/video1"

# Process video, copy ZIPs of frames and depth frames into gDrive

Run all cells or do them manually

## 1.0 Xtract frames and crop frames

In [None]:
#@title . 1.1 Extract frames
if not return__isValidDir(frames_directory):
    os.makedirs(frames_directory)

!ffmpeg -i {video_file} -vf "scale=910:512" {frames_directory}/c01_%04d.png

total_progress = return__fileCount(frames_directory)
clear_output();print(f'{total_progress} frames extracted into {frames_directory}')

In [None]:
#@title . 1.2 Crop frames

from PIL import Image
import os
import sys

def process_images(input_directory, output_directory, target_width=768, target_height=512):

    n_progress = 1

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Get a list of all files in the input directory
    image_files = [f for f in os.listdir(input_directory) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

    for image_file in image_files:
        input_path = os.path.join(input_directory, image_file)
        output_path = os.path.join(output_directory, image_file)

        # Open the image
        with Image.open(input_path) as img:
            # Get the original image dimensions
            original_width, original_height = img.size

            # Calculate cropping dimensions to maintain aspect ratio and center the crop
            left = max(0, (original_width - target_width) // 2)
            top = max(0, (original_height - target_height) // 2)
            right = min(original_width, left + target_width)
            bottom = min(original_height, top + target_height)

            # Crop the image
            cropped_img = img.crop((left, top, right, bottom))

            # Save the cropped image
            cropped_img.save(output_path)
            resettable_progress_meter(total_progress, n_progress)
            n_progress = n_progress + 1

process_images(frames_directory, frames_directory)

clear_output();print(f'Frames cropped into 768x512')

## 2.0 Create depth maps

In [7]:
#@title . 2.1 Initialise Depth Anything
import matplotlib.pyplot as plt

import cv2
import numpy as np
import os
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision.transforms import Compose
import tempfile
from gradio_imageslider import ImageSlider

from depth_anything.dpt import DepthAnything
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet

if not depth_anything_init:
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE)

    transform = Compose([
            Resize(
                width=518,
                height=518,
                resize_target=False,
                keep_aspect_ratio=True,
                ensure_multiple_of=14,
                resize_method='lower_bound',
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
    ])

    depth_anything_init = True


def predict_depth(model, image):
    return model(image)

def create_depth_map(image):
    original_image = image.copy()

    h, w = image.shape[:2]

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
    image = transform({'image': image})['image']
    image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        depth = predict_depth(model, image)

    depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]

    raw_depth = Image.fromarray(depth.cpu().numpy().astype('uint16'))
    tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
    raw_depth.save(tmp.name)

    depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
    depth = depth.cpu().numpy().astype(np.uint8)
    colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1]

    return colored_depth
    return [(original_image, colored_depth), tmp.name]

# Load image as numpy.ndarray
def load_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)

    # Check if the image was successfully loaded
    if image is None:
        raise FileNotFoundError(f"Image not found at path: {image_path}")

    # Convert the image to a NumPy array
    image_array = np.array(image)

    return image_array


In [None]:
#@title . 2.2 Process frames
import cv2, os
import numpy as np
from PIL import Image

def process_images(input_directory, output_directory):

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Get a list of all files in the input directory
    image_files = [f for f in os.listdir(input_directory) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

    n_progress = 1
    for image_file in image_files:
        input_path = os.path.join(input_directory, image_file)
        output_path = os.path.join(output_directory, image_file)

        image_data = load_image(input_path)
        depth_data = create_depth_map(image_data)
        final_image = Image.fromarray(depth_data)
        final_image.save(output_path)

        resettable_progress_meter(total_progress, n_progress)
        n_progress = n_progress + 1

process_images(frames_directory, f"{frames_directory}_depth")
clear_output();print('Depth frames completed!')


## 3.0 zip it up

In [None]:
#@title . 3.1 Create zip files of frames and frames_depth folders
import os
import zipfile

def zip_directory(directory_path, zip_file_path):
    with zipfile.ZipFile(zip_file_path, 'w') as zipf:
        for root, dirs, files in os.walk(directory_path):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, directory_path)
                zipf.write(file_path, arcname=arcname)

batch_name = return__folderName(frames_directory)

zip_directory(frames_directory, f"/content/{batch_name}_frames.zip")
print(f"Zip file '{batch_name}_frames.zip' created successfully.")
zip_directory(f"{frames_directory}_depth", f"/content/{batch_name}_frames_depth.zip")
print(f"Zip file '{batch_name}_frames_depth.zip' created successfully.")

In [None]:
#@title . 3.2 Copy zips into gDrive
copy_file(f"/content/{batch_name}_frames.zip", f"{gdrive_save_folder}/{batch_name}_frames.zip")
copy_file(f"/content/{batch_name}_frames_depth.zip", f"{gdrive_save_folder}/{batch_name}_frames_depth.zip")

## END

In [8]:
#@title Inference working

image_data = load_image("/content/c01_0100.png")
depth_data = create_depth_map(image_data)
final_image = Image.fromarray(depth_data)
final_image.save(f"/content/1.png")

plt.imshow(depth_data)