In [None]:
!pip install -q torch torchvision opencv-python diffusers transformers accelerate xformers

# **1- Imports**

In [None]:
import cv2
import numpy as np
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from PIL import Image
import logging
import os
from google.colab import files
from IPython.display import display, HTML

# **2- Logging Setup**

In [None]:
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# **3- Main Converter Class**

In [None]:
class AnimationVideoConverter:
    def __init__(self, style_prompt="Studio Ghibli style", skip_frames=1):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.dtype = torch.float16
        self.style_prompt = style_prompt
        self.skip_frames = skip_frames
        self._load_models()

    #Model_Loading
    def _load_models(self):
        logger.info("Loading Stable Diffusion and ControlNet models...")
        self.controlnet = ControlNetModel.from_pretrained(
            "lllyasviel/control_v11p_sd15_canny",
            torch_dtype=self.dtype
        )
        self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
            "runwayml/stable-diffusion-v1-5",
            controlnet=self.controlnet,
            torch_dtype=self.dtype,
            safety_checker=None
        ).to(self.device)
        self.pipe.enable_attention_slicing()
        if self.device == "cuda":
            self.pipe.enable_xformers_memory_efficient_attention()

    #Frame_Processing
    def stylize_frame(self, frame):
        try:
            frame = cv2.resize(frame, (512, 512))
            rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(rgb_image)
            canny_edges = Image.fromarray(cv2.Canny(rgb_image, 100, 200))
            result = self.pipe(
                self.style_prompt,
                image=canny_edges,
                num_inference_steps=15,
                guidance_scale=7.0,
                generator=torch.Generator(device=self.device).manual_seed(42)
            )
            return result.images[0]
        except Exception as e:
            logger.error(f"Failed to process frame: {e}")
            return None

    #Video_Conversion
    def convert(self, input_video, output_video):
        cap = cv2.VideoCapture(input_video)
        if not cap.isOpened():
            raise RuntimeError(f"Could not open video file: {input_video}")
        fps = cap.get(cv2.CAP_PROP_FPS)
        writer = cv2.VideoWriter(
            output_video,
            cv2.VideoWriter_fourcc(*'mp4v'),
            fps/(self.skip_frames+1),
            (512, 512)
        )
        frame_idx = 0
        written_frames = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if frame_idx % (self.skip_frames+1) == 0:
                stylized = self.stylize_frame(frame)
                if stylized is not None:
                    stylized_np = np.array(stylized)
                    writer.write(cv2.cvtColor(stylized_np, cv2.COLOR_RGB2BGR))
                    written_frames += 1
                    logger.info(f"Stylized frame {written_frames}")
            frame_idx += 1
            if frame_idx % 10 == 0:
                torch.cuda.empty_cache()
        cap.release()
        writer.release()
        return os.path.exists(output_video)

# **4- Side by Side Comparison**

In [None]:
def create_side_by_side(original_path, stylized_path, output_path='comparison.mp4'):
    cap_o = cv2.VideoCapture(original_path)
    cap_s = cv2.VideoCapture(stylized_path)
    h = min(int(cap_o.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap_s.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fps = min(cap_o.get(cv2.CAP_PROP_FPS), cap_s.get(cv2.CAP_PROP_FPS))
    w_o = int(cap_o.get(cv2.CAP_PROP_FRAME_WIDTH) * (h/int(cap_o.get(cv2.CAP_PROP_FRAME_HEIGHT))))
    w_s = int(cap_s.get(cv2.CAP_PROP_FRAME_WIDTH) * (h/int(cap_s.get(cv2.CAP_PROP_FRAME_HEIGHT))))
    writer = cv2.VideoWriter(
        output_path,
        cv2.VideoWriter_fourcc(*'mp4v'),
        fps,
        (w_o+w_s, h)
    )
    while True:
        ret_o, f_o = cap_o.read()
        ret_s, f_s = cap_s.read()
        if not ret_o or not ret_s:
            break
        f_o = cv2.resize(f_o, (w_o, h))
        f_s = cv2.resize(f_s, (w_s, h))
        cv2.putText(f_o, "Original", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
        cv2.putText(f_s, "Stylized", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,255), 2)
        writer.write(np.hstack((f_o, f_s)))
    cap_o.release()
    cap_s.release()
    writer.release()
    return os.path.exists(output_path)

# **5- Main Execution**

In [None]:
if __name__ == "__main__":
    print("Upload your video file:")
    uploaded = files.upload()
    input_video = list(uploaded.keys())[0]
    stylized_video = "stylized.mp4"
    converter = AnimationVideoConverter()
    if converter.convert(input_video, stylized_video):
        print("✅ Stylized video created!")
        comparison_video = "comparison.mp4"
        if create_side_by_side(input_video, stylized_video, comparison_video):
            print("🎬 Comparison video ready!")
            display(HTML(f"""
            <div style="margin: 20px; text-align: center;">
                <h3>Original vs Stylized</h3>
                <video width="800" controls>
                    <source src="{comparison_video}" type="video/mp4">
                </video>
            </div>
            """))
            files.download(comparison_video)
        else:
            print("Failed to create comparison video.")
    else:
        print("Failed to process stylized video.")