In [None]:
!pip install ultralytics


Use YOLO to detect persons. More accurate as library solution.

In [11]:
import cv2
import numpy as np
from ultralytics import YOLO
from moviepy.editor import VideoFileClip
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
import mediapipe as mp


class SketchDanceEditor:
    def __init__(
            self,
            input_path,
            output_path,
            background_mode="color",      # "color", "image", "none"
            background_color=(210, 210, 210),
            background_image=None,
            clipsec=None,
            show_personEdge=False,
            sketch_persons=True,
            face_mode = "none" , # "image",
            face_image=None
        ):

        self.face_mode = face_mode
        self.input_path = input_path
        self.output_path = output_path
        self.background_mode = background_mode
        self.background_color = background_color
        self.background_image = background_image
        self.clipsec = clipsec
        self.show_personEdge = show_personEdge
        self.sketch_persons = sketch_persons

        # Load YOLO segmentation model
        # Small and fast: yolov8s-seg.pt
        # If you want even better accuracy, replace with yolov8x-seg.pt
        #self.detector = YOLO("yolov8s-seg.pt")
        self.detector = YOLO("yolov8x-seg.pt")
        
        #face recognition
        self.cartoon_face = face_image # Your cartoon face image
        self.face_detector = mp.solutions.face_detection.FaceDetection(
            model_selection=1,
            min_detection_confidence=0.6
)

    def swap_face_with_cartoon(self, frame):
        # Convert frame to RGB
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        results = self.face_detector.process(rgb)
        if not results or not results.detections:
            return frame

        h, w = frame.shape[:2]

        for detection in results.detections:
            # Extract bounding box (normalized)
            box = detection.location_data.relative_bounding_box
            x1 = int(box.xmin * w)
            y1 = int(box.ymin * h)
            box_w = int(box.width * w)
            box_h = int(box.height * h)

            # Boundary safety
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(w, x1 + box_w)
            y2 = min(h, y1 + box_h)

            # Resize cartoon face
            resized_cartoon = cv2.resize(
                self.cartoon_face,
                (x2 - x1, y2 - y1),
                interpolation=cv2.INTER_AREA
            )

            # Replace region
            #frame[y1:y2, x1:x2] = resized_cartoon
            alpha = 0.9
            frame[y1:y2, x1:x2] = cv2.addWeighted(
                frame[y1:y2, x1:x2],
                1 - alpha,
                resized_cartoon,
                alpha,
                0
            )


        return frame


    def detect_person_mask(self, frame):
        # Convert BGR -> RGB
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        results = self.detector.predict(rgb, verbose=False)

        # Start with empty binary mask
        mask = np.zeros(frame.shape[:2], dtype=np.uint8)

        if not hasattr(results[0], "masks") or results[0].masks is None:
            return mask

        for m, cls in zip(results[0].masks.data, results[0].boxes.cls):
            if int(cls) == 0:  # 0 = 'person' class
                person_mask = m.cpu().numpy().astype(np.uint8)

                # Resize to full frame size
                person_mask = cv2.resize(
                    person_mask,
                    (frame.shape[1], frame.shape[0]),
                    interpolation=cv2.INTER_NEAREST
                )

                mask = np.bitwise_or(mask, person_mask)

        return mask


    def sketch_frame(self, frame, mask=None):
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        inv = 255 - gray
        blur = cv2.GaussianBlur(inv, (21, 21), 0)
        sketch = cv2.divide(gray, 255 - blur, scale=256)
        return cv2.cvtColor(sketch, cv2.COLOR_GRAY2BGR)

    def get_background(self, frame, mask):
        h, w = frame.shape[:2]

        if self.background_mode == "none":
            return frame.copy()

        elif self.background_mode == "color":
            return np.full((h, w, 3), self.background_color, dtype=np.uint8)

        elif self.background_mode == "image":
            if self.background_image is None:
                raise ValueError("background_mode='image' but no background_image provided")
            return cv2.resize(self.background_image, (w, h))

        raise ValueError(f"Unknown background_mode: {self.background_mode}")

    def process_video(self):
        clip = VideoFileClip(self.input_path).subclip(*self.clipsec) if self.clipsec else VideoFileClip(self.input_path)
        processed_frames = []

        for frame in clip.iter_frames():
            mask = self.detect_person_mask(frame)

            background = self.get_background(frame,mask)
                     
            foreground = np.where(mask[..., None] == 1, frame, background)

            if self.sketch_persons:
                final = self.sketch_frame(foreground, mask)
            else:
                final = foreground

            if self.show_personEdge:
                edges = cv2.Canny((mask * 255).astype(np.uint8), 80, 180)
                final[edges != 0] = [255, 0, 0]
                
            if self.face_mode == "image":
                face = self.swap_face_with_cartoon(final)
            else:
                face = final
             

            processed_frames.append(face)

        out_clip = ImageSequenceClip(processed_frames, fps=clip.fps)
        out_clip.write_videofile(self.output_path, codec="libx264")


In [None]:
#setup prompt and background image
prompt_face = """
Replace face with a generic, unrecognizable synthetic face of a young adult male/female with neutral expression
"""

bg = cv2.imread("./backstage/ballroom.png")  # or "dancefloor.jpg"

if bg is None:
    print("❌ Image not found or failed to load.")
    exit()
else:
    print("✅ Image loaded successfully.")


img = np.full((512, 512, 3), 220, dtype=np.uint8)
pil_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

editor = SketchDanceEditor(
    input_path="./solus/tango1.mp4",
    output_path="output_tango1_v20_ballroom.mp4",
    clipsec=(0,15),
    background_mode="image",      # "color", "image", "none"
    background_color=(210, 210, 210),
    background_image= bg,
    show_personEdge=False,
    sketch_persons=True,
    face_mode = "none",
    face_image = None
)
editor.process_video()


✅ Image loaded successfully.
Moviepy - Building video output_tango1_v19_ocean.mp4.
Moviepy - Writing video output_tango1_v19_ocean.mp4



                                                               

Moviepy - Done !
Moviepy - video ready output_tango1_v19_ocean.mp4
