<a href="https://colab.research.google.com/github/steinhaug/stable-diffusion/blob/main/tool/batch_depth_anything_frames.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Batch Depth-Anything frames
#@markdown ![Visitors](https://api.visitorbadge.io/api/combined?path=https%3A%2F%2Fgithub.com%2Fsteinhaug%2Fstable-diffusion%2Fblob%2Fmain%2Fbatch_depth_anything_frames.ipynb&countColor=%23263759&style=flat)
#@markdown <a href="https://github.com/steinhaug/" target="_blank"><img alt="Open Github profile" src="https://img.shields.io/badge/Steinhaug-Profile-black?logo=github"></a>

#@markdown [![Buy me a beer](https://raw.githubusercontent.com/steinhaug/stable-diffusion/main/assets/buy-me-a-beer.png ) ](https://steinhaug.com/donate/)


# Installer blocks

Make sure all blocks are completed, you may need to restart after downgrading torch. If you need to restart, just continue runnig the blocks.

In [None]:
%cd /content
!git clone https://github.com/LiheYoung/Depth-Anything
%cd Depth-Anything
!pip install kaleido cohere openai tiktoken
!pip install fastapi python-multipart uvicorn
!pip install -r requirements.txt

In [10]:
# Downgrade torch, xformers requires v2.1.0
!pip install torch==2.1.0

Collecting torch==2.1.0
  Downloading torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl (670.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m670.2/670.2 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch
  Attempting uninstall: torch
    Found existing installation: torch 2.1.2
    Uninstalling torch-2.1.2:
      Successfully uninstalled torch-2.1.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
xformers 0.0.23.post1 requires torch==2.1.2, but you have torch 2.1.0 which is incompatible.[0m[31m
[0mSuccessfully installed torch-2.1.0


In [3]:
import torch
def check_torch_version():
    return torch.__version__
torch_version = check_torch_version()
print(f"Installed PyTorch version: {torch_version}")

Installed PyTorch version: 2.1.0+cu121


In [None]:
!pip install --pre torch
!pip install xformers pytorch_lightning numpy

# Initialize

In [17]:
#@title Mount gDrive and load Notebook functions
from IPython.display import display, Markdown, clear_output
from pathlib import Path
inip = Path("/content/drive/MyDrive")
if not inip.is_dir():
    from google.colab import drive
    drive.mount('/content/drive')
gdrive_save_folder = "/content/drive/MyDrive/depth_frames"

depth_anything_init = False

import os, sys
from tqdm import tqdm

_B=True
_A=False
def return__isValidDir(directory):
    if os.path.isdir(directory):return _B
    else:return _A

def return__fileCount(directory_path):
    file_count = 0

    for root, dirs, files in os.walk(directory_path):
        file_count += len(files)

    return file_count

def resettable_progress_meter(maximum, progress):
    bar_length = 40
    progress_ratio = progress / maximum
    bar = int(bar_length * progress_ratio)

    sys.stdout.write("\r[{}{}] {}%".format("=" * bar, " " * (bar_length - bar), int(progress_ratio * 100)))
    sys.stdout.flush()

    if progress == maximum:
        sys.stdout.write("\n")
        sys.stdout.flush()

def return__folderName(directory_path, verify_folder=False):

    if not verify_folder:
        return os.path.basename(os.path.normpath(directory_path))

    # Ensure the path is a valid directory
    if os.path.isdir(directory_path):
        # Split the path into components and get the last one
        last_folder_name = os.path.basename(os.path.normpath(directory_path))
        return last_folder_name
    else:
        return None  # Return None for invalid paths

def move_file(source_file, destination_file, copyOnly=False):
    file_size = os.path.getsize(source_file)

    with open(source_file, "rb") as src_file, \
         open(destination_file, "wb") as dest_file, \
         tqdm(total=file_size, unit="B", unit_scale=True, desc="Moving file", ncols=80) as progress:

        chunk_size = 1024 * 1024  # 1 MB
        bytes_copied = 0

        while True:
            chunk = src_file.read(chunk_size)

            if not chunk:
                break

            dest_file.write(chunk)
            bytes_copied += len(chunk)
            progress.update(len(chunk))

    if not copyOnly:
        os.remove(source_file)
        print(f"File '{source_file}' moved to '{destination_file}'.")
    else:
        print(f"File '{source_file}' copied to '{destination_file}'.")

def copy_file(source_file, destination_file):
    return move_file(source_file, destination_file, True)

In [7]:
video_file = "/content/video1.mkv.mp4"
frames_directory = "/content/frames/video1mkv_"

# Process video, copy ZIPs of frames and depth frames into gDrive

Run all cells or do them manually

## 1.0 Xtract frames and crop frames

In [8]:
#@title . 1.1 Extract frames
if not return__isValidDir(frames_directory):
    os.makedirs(frames_directory)

!ffmpeg -i {video_file} -vf "scale=910:512" {frames_directory}/c01_%04d.png

total_progress = return__fileCount(frames_directory)
clear_output();print(f'{total_progress} frames extracted into {frames_directory}')

1199 frames extracted into /content/frames/video1mkv_


In [9]:
#@title . 1.2 Crop frames

import cv2
import os
import sys

def process_images(input_directory, output_directory, target_width=768, target_height=512):

    n_progress = 1

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Get a list of all files in the input directory
    image_files = [f for f in os.listdir(input_directory) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

    for image_file in image_files:
        input_path = os.path.join(input_directory, image_file)
        output_path = os.path.join(output_directory, image_file)

        img = cv2.imread(input_path)
        if img is None:
            print(f"Error: Unable to read the image at {input_path}")
            return
        height, width, _ = img.shape
        crop_x = max(0, (width - target_width) // 2)
        crop_y = max(0, (height - target_height) // 2)
        cropped_img = img[crop_y:crop_y + target_height, crop_x:crop_x + target_width]
        cv2.imwrite(output_path, cropped_img)

        resettable_progress_meter(total_progress, n_progress)
        n_progress = n_progress + 1

process_images(frames_directory, frames_directory)

clear_output();print(f'Frames cropped into 768x512')

Frames cropped into 768x512


## 2.0 Create depth maps

In [13]:
#@title . 2.1 Initialise Depth Anything
%cd /content/Depth-Anything

import matplotlib.pyplot as plt

import cv2
import numpy as np
import os
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision.transforms import Compose
import tempfile
from gradio_imageslider import ImageSlider

from depth_anything.dpt import DepthAnything
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet

if not depth_anything_init:
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE)

    transform = Compose([
            Resize(
                width=518,
                height=518,
                resize_target=False,
                keep_aspect_ratio=True,
                ensure_multiple_of=14,
                resize_method='lower_bound',
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
    ])

    depth_anything_init = True


def predict_depth(model, image):
    return model(image)

def create_depth_map(image):
    original_image = image.copy()

    h, w = image.shape[:2]

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
    image = transform({'image': image})['image']
    image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        depth = predict_depth(model, image)

    depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]

    raw_depth = Image.fromarray(depth.cpu().numpy().astype('uint16'))
    tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
    raw_depth.save(tmp.name)

    depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
    depth = depth.cpu().numpy().astype(np.uint8)
    colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1]

    return colored_depth
    return [(original_image, colored_depth), tmp.name]

# Load image as numpy.ndarray
def load_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)

    # Check if the image was successfully loaded
    if image is None:
        raise FileNotFoundError(f"Image not found at path: {image_path}")

    # Convert the image to a NumPy array
    image_array = np.array(image)

    return image_array


/content/Depth-Anything


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

In [14]:
#@title . 2.2 Process frames
import cv2, os
import numpy as np
from PIL import Image

def process_images(input_directory, output_directory):

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory, exist_ok=True)

    # Get a list of all files in the input directory
    image_files = [f for f in os.listdir(input_directory) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

    n_progress = 1
    for image_file in image_files:
        input_path = os.path.join(input_directory, image_file)
        output_path = os.path.join(output_directory, image_file)

        image_data = load_image(input_path)
        depth_data = create_depth_map(image_data)
        final_image = Image.fromarray(depth_data)
        final_image.save(output_path)

        resettable_progress_meter(total_progress, n_progress)
        n_progress = n_progress + 1

process_images(frames_directory, f"{frames_directory}_depth")
clear_output();print('Depth frames completed!')


Depth frames completed!


## 3.0 zip it up

In [15]:
#@title . 3.1 Create zip files of frames and frames_depth folders
import os
import zipfile

def zip_directory(directory_path, zip_file_path):
    with zipfile.ZipFile(zip_file_path, 'w') as zipf:
        for root, dirs, files in os.walk(directory_path):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, directory_path)
                zipf.write(file_path, arcname=arcname)

batch_name = return__folderName(frames_directory)

zip_directory(frames_directory, f"/content/{batch_name}_frames.zip")
print(f"Zip file '{batch_name}_frames.zip' created successfully.")
zip_directory(f"{frames_directory}_depth", f"/content/{batch_name}_frames_depth.zip")
print(f"Zip file '{batch_name}_frames_depth.zip' created successfully.")

Zip file 'video1mkv__frames.zip' created successfully.
Zip file 'video1mkv__frames_depth.zip' created successfully.


In [16]:
#@title . 3.2 Copy zips into gDrive
copy_file(f"/content/{batch_name}_frames.zip", f"{gdrive_save_folder}/{batch_name}_frames.zip")
copy_file(f"/content/{batch_name}_frames_depth.zip", f"{gdrive_save_folder}/{batch_name}_frames_depth.zip")

Moving file: 100%|████████████████████████████| 528M/528M [00:02<00:00, 259MB/s]


File '/content/video1mkv__frames.zip' copied to '/content/drive/MyDrive/depth_frames/video1mkv__frames.zip'.


Moving file: 100%|████████████████████████████| 114M/114M [00:00<00:00, 277MB/s]


File '/content/video1mkv__frames_depth.zip' copied to '/content/drive/MyDrive/depth_frames/video1mkv__frames_depth.zip'.


# Addendum


<h1>Some benchmarking</h1>

Imagesize: 1100 pcs  

__Cropping images__  
PIL image: 198s  
OpenCV: 46s  
OpenCV is approximately 4.3 X faster  

__Calculating depth images__  
Without xFormers: 1100s  
With xFormers: 900s  
Installing xFormers gives a 1.2 X speedup in processing  


In [None]:
#@title code reference: Complete working Inference for calculating depth map

%cd /content/Depth-Anything
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision.transforms import Compose
import tempfile
from gradio_imageslider import ImageSlider

from depth_anything.dpt import DepthAnything
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet

if not depth_anything_init:
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE)

    transform = Compose([
            Resize(
                width=518,
                height=518,
                resize_target=False,
                keep_aspect_ratio=True,
                ensure_multiple_of=14,
                resize_method='lower_bound',
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
    ])

    depth_anything_init = True


def predict_depth(model, image):
    return model(image)

def create_depth_map(image):
    original_image = image.copy()

    h, w = image.shape[:2]

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
    image = transform({'image': image})['image']
    image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        depth = predict_depth(model, image)

    depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]

    raw_depth = Image.fromarray(depth.cpu().numpy().astype('uint16'))
    tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
    raw_depth.save(tmp.name)

    depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
    depth = depth.cpu().numpy().astype(np.uint8)
    colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1]

    return colored_depth
    return [(original_image, colored_depth), tmp.name]

# Load image as numpy.ndarray
def load_image(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)

    # Check if the image was successfully loaded
    if image is None:
        raise FileNotFoundError(f"Image not found at path: {image_path}")

    # Convert the image to a NumPy array
    image_array = np.array(image)

    return image_array

image_data = load_image("/content/input.png")
depth_data = create_depth_map(image_data)
final_image = Image.fromarray(depth_data)
final_image.save(f"/content/output.png")
plt.imshow(depth_data)