In [32]:
%load_ext autoreload
%autoreload 2

import os
import torch
import matplotlib.pyplot as plt
import numpy as np
from lib.models.latent_encoder import LatentEncoder
from lib.models.deepsdf import DeepSDF
from torchvision.transforms import v2
from lib.utils.config import load_config
from lib.data.metainfo import MetaInfo
import hydra
import glob
import cv2

from lib.utils.config import load_config
from lib.data.sampler import ChunkSampler
from lib.data.metainfo import MetaInfo

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Extract Frames from Raw Video

In [44]:
# capture all .mov videos in raw_video directory
video_folder = "/home/korth/sketch2shape/demo_video"
video_files = glob.glob(f"{video_folder}/*.mov")
frames_folder = f"{video_folder}/video_frames"
frames = []

num_frames_to_extract = 100  # Set the number of frames to extract
for video_file in video_files:
    # Open the video file
    video = cv2.VideoCapture(video_file)
    
    # Get the total number of frames in the video
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Calculate the frame interval
    frame_interval = max(total_frames // num_frames_to_extract, 1)
    
    # Read frames at the specified interval
    frame_count = 0
    while video.isOpened() and frame_count < total_frames:
        # Read the current frame
        ret, frame = video.read()
        
        # If the frame was read successfully
        if ret:
            # Add the frame to the list of frames
            if frame_count % frame_interval == 0:
                frames.append(frame)
            frame_count += 1
        else:
            # Break the loop if the video is completed
            break
    
    # Release the video file
    video.release()

print(f"Extracted {len(frames)} frames from {len(video_files)} videos")

# create folder if it does not exist (in python)
os.makedirs(frames_folder, exist_ok=True)

# save the frames to disk
for i, frame in enumerate(frames):
    cv2.imwrite(f"{frames_folder}/frame_{i:03}.png", frame)

print(f"Saved {len(frames)} frames to disk at '{frames_folder}'")

Extracted 102 frames from 1 videos
Saved 102 frames to disk at '/home/korth/sketch2shape/demo_video/video_frames'


# Load Models

In [45]:
cfg = load_config(
    "train_loss", ["+experiment/train_loss=latent_encoder_shapenet_chair_4096"]
)
metainfo = MetaInfo(cfg.data.data_dir, split="val_latent")
metainfo.load_snn()
sampler = ChunkSampler(metainfo.snn_labels, chunk_size=1)
max([i for i in sampler]), len([i for i in sampler])
loss_ckpt_path = "/home/borth/sketch2shape/logs/train_latent_encoder/runs/2024-02-15_22-31-15/checkpoints/epoch_019.ckpt"
shape_view_id = 11
shape_k = 16

cfg = load_config("optimize_sketch", ["+dataset=shapenet_chair_4096"])
metainfo = MetaInfo(cfg.data.data_dir)

cfg.loss_ckpt_path = loss_ckpt_path
cfg.model.shape_k = shape_k
cfg.model.shape_view_id = shape_view_id
cfg.model.shape_init = True
cfg.model.obj_id = metainfo.obj_ids[0]
model = hydra.utils.instantiate(cfg.model).to("cuda")

model.deepsdf.create_camera()

100%|██████████| 4096/4096 [00:20<00:00, 195.55it/s]


In [46]:
transform = v2.Compose([
    v2.ToTensor(),
    v2.Resize((256, 256)),
    # v2.CenterCrop(256),
    v2.Normalize(mean=[0.5], std=[0.5])
])

def load_handrawn(img_path):
    # load img, convert to torch tensor and resize to 256x256
    img1 = plt.imread(img_path)
    img1 = img1 / 255.0

    return transform(img1).to(torch.float32)

def load_sketch(img_path):
    # load img, convert to torch tensor and resize to 256x256
    img1 = plt.imread(img_path)
    return transform(img1).to(torch.float32)



In [47]:
# Process each frame and extract normals by rendering the latents
visualize = False

original_images = []
normal_images = []

for path in sorted(glob.glob(frames_folder + "/*.png")):
    if "handrawn" in path:
        img = load_handrawn(path)
    else:
        img = load_sketch(path)
    with torch.no_grad():
        latent = model.loss(img.unsqueeze(0).cuda())
        normals = model.deepsdf.capture_camera_frame(latent.squeeze())
    
    # Save original and normal images into separate lists
    original_images.append(plt.imread(path))
    normal_images.append(normals.cpu().numpy())
    
    if visualize:
        # Print original image, preprocessed image, and normals
        plt.figure()
        plt.subplot(1, 3, 1)
        plt.title("Original")
        plt.imshow(plt.imread(path))
        plt.axis("off")
        
        plt.subplot(1, 3, 2)
        plt.title("Preprocessed")
        plt.imshow(img.cpu().numpy().transpose(1, 2, 0))
        plt.axis('off')
        
        plt.subplot(1, 3, 3)
        plt.title("Normals")
        plt.imshow(normals.cpu().numpy())
        plt.axis("off")

        plt.show()



# Individual Videos

In [43]:
import cv2

# Define the output video filenames
original_video_filename = f"{video_folder}/original_video.mp4"
normal_video_filename = f"{video_folder}/normal_video.mp4"

# Define the video codec and frame rate
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
fps = 10

res_original = original_images[0].shape[0]

# Create the video writers
original_video_writer = cv2.VideoWriter(original_video_filename, fourcc, fps, (res_original, res_original))
normal_video_writer = cv2.VideoWriter(normal_video_filename, fourcc, fps, (256, 256))

# Write the frames to the videos
for idx, frame in enumerate(original_images):
    original_video_writer.write((frame*255).astype(np.uint8))

for frame in normal_images:
    normal_video_writer.write((frame*255).astype(np.uint8))

# Release the video writers
original_video_writer.release()
normal_video_writer.release()

print("Videos created successfully.")


Videos created successfully.


# Side by Side Video

In [42]:
# Define the output video filename
side_by_side_video_filename = f"{video_folder}/side_by_side_video.mp4"

# Create the video writer
side_by_side_video_writer = cv2.VideoWriter(side_by_side_video_filename, fourcc, 5, (392, 196))

# Write the frames to the video
for idx, (original_frame, normal_frame) in enumerate(zip(original_images, normal_images)):
    # Resize the frames to have the same height
    original_frame_resized = cv2.resize(original_frame, (196, 196))
    normal_frame_resized = cv2.resize(normal_frame, (196, 196))
    
    # Concatenate the frames side by side
    side_by_side_frame = cv2.hconcat([original_frame_resized, normal_frame_resized])
    
    # Write the side by side frame to the video
    side_by_side_video_writer.write((side_by_side_frame*255).astype(np.uint8))

# Release the video writer
side_by_side_video_writer.release()

print("Side by side video created successfully.")


Side by side video created successfully.
