<a href="https://colab.research.google.com/github/rishabh-2005/IITISoC-ML-05/blob/main/object_inpainting/user_click.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q ultralytics opencv-python-headless diffusers torch torchvision ffmpeg-python fastapi uvicorn python-multipart pyngrok supervision
!git clone https://github.com/sczhou/ProPainter.git
%cd ProPainter
!pip install -r requirements.txt


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m81.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m65.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from ultralytics import YOLO
import cv2, numpy as np, os, subprocess, shutil

def get_track_id_at_frame(model, video_path, click_x, click_y, frame_number):
    cap = cv2.VideoCapture(video_path)
    target_id = None
    for i in range(frame_number + 1):
        ret, frame = cap.read()
        if not ret: break
        if i == frame_number:
            result = model.track(frame, persist=True)[0]
            boxes, ids = result.boxes.xyxy, result.boxes.id
            for j, box in enumerate(boxes):
                x1, y1, x2, y2 = map(int, box.tolist())
                if x1 <= click_x <= x2 and y1 <= click_y <= y2:
                    target_id = int(ids[j])
                    break
    cap.release()
    return target_id

def generate_mask_for_id(model, video_path, target_id, mask_folder):
    os.makedirs(mask_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    idx = 0
    while True:
        ret, frame = cap.read()
        if not ret: break
        result = model.track(frame, persist=True)[0]
        masks = result.masks.data if result.masks else []
        ids = result.boxes.id if result.boxes.id is not None else []
        mask = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.uint8)
        for i, seg_mask in enumerate(masks):
            if int(ids[i]) == target_id:
                m = seg_mask.cpu().numpy().astype(np.uint8)
                m = cv2.resize(m, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_NEAREST)
                mask = m * 255
                break
        cv2.imwrite(f"{mask_folder}/frame_{idx:04d}.png", mask)
        idx += 1
    cap.release()

import subprocess
import shutil
import os
import cv2
from ultralytics import YOLO

def has_audio_stream(video_path):
    """Check if a video has an audio stream using ffprobe."""
    result = subprocess.run(
        ["ffprobe", "-i", video_path, "-show_streams", "-select_streams", "a", "-loglevel", "error"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    return result.stdout != b""



def run_pipeline(input_path, x, y, frame, output_path="/content/outputs/final_output_with_audio.mp4"):
    model = YOLO("yolov8x-seg.pt")
    mask_folder = "/content/yolo_seg_masks_object"
    shutil.rmtree(mask_folder, ignore_errors=True)

    print("🔍 Getting object track ID...")
    target_id = get_track_id_at_frame(model, input_path, x, y, frame)
    if target_id is None:
        raise Exception("Target object not found at given coordinate/frame")

    print("🎭 Generating masks...")
    generate_mask_for_id(model, input_path, target_id, mask_folder)

    cap = cv2.VideoCapture(input_path)
    w, h = int(cap.get(3)), int(cap.get(4))
    cap.release()

    print("🖌️ Running ProPainter...")
    subprocess.run([
        "python", "/content/ProPainter/inference_propainter.py",
        "--video", input_path,
        "--mask", mask_folder,
        "--output", "/content/outputs",
        "--width", "640", "--height", "360",
        "--subvideo_length", "40"
    ], check=True)

    os.chdir("/content")
    inpainted_path = "/content/outputs/input/inpaint_out.mp4"
    audio_path = "/content/original_audio.aac"

    if has_audio_stream(input_path):
        print("🔊 Extracting and adding audio...")
        subprocess.run(["ffmpeg", "-y", "-i", input_path, "-vn", "-acodec", "copy", audio_path], check=True)
        subprocess.run([
            "ffmpeg", "-y", "-i", inpainted_path, "-i", audio_path,
            "-c:v", "copy", "-c:a", "aac", "-strict", "experimental", output_path
        ], check=True)
    else:
        print("⚠️ No audio stream found. Skipping audio operations.")
        shutil.copy(inpainted_path, output_path)

    shutil.rmtree(mask_folder, ignore_errors=True)
    return output_path


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
⚠️ No audio stream found; skipping audio extraction.


In [None]:
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.responses import FileResponse
from pyngrok import ngrok
import threading
import uvicorn
import requests

app = FastAPI()

@app.post("/remove/")
async def remove_object(
    video_url: str = Form(...),
    x: int = Form(...),
    y: int = Form(...),
    frame: int = Form(...)
):

  try:
        print(" Downloading video from URL:", video_url)
        input_path = "/content/input.mp4"
        response = requests.get(video_url, stream=True)
        if response.status_code != 200:
            raise HTTPException(status_code=400, detail="Video download failed.")

        with open(input_path, "wb") as f:
            f.write(response.content)

        print(f" Click at ({x}, {y}) on frame {frame}")
        output_path = run_pipeline(input_path, x, y, frame)

        print("✅ Returning inpainted video with audio.")
        return FileResponse(output_path, media_type="video/mp4", filename="inpainted_with_audio.mp4")

  except Exception as e:
        print(" ERROR:", str(e))
        raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")





In [None]:
!ngrok config add-authtoken 2ylvH6sjNPaCpi9BxR8vCUxJmpP_4Ao7RZ4ALPQxFScyVwyvz

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
%cd /content/

/content


In [None]:


def run():
    uvicorn.run(app, host="0.0.0.0", port=8000)

thread = threading.Thread(target=run)
thread.start()

public_url = ngrok.connect(8000)
print("🚀 Public URL:", public_url)


INFO:     Started server process [509]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


🚀 Public URL: NgrokTunnel: "https://50fd-35-185-240-43.ngrok-free.app" -> "http://localhost:8000"


In [None]:

!rm -rf /content/outputs

In [None]:
# !curl -X POST https://d43a-34-105-35-253.ngrok-free.app/remove/ \
#   -F "video_url=https://res.cloudinary.com/dvdykz9el/video/upload/v1750874160/butterfly_ku8qyt.mp4" \
#   -F "x=984" \
#   -F "y=523" \
#   -F "frame=0" \
#   --output inpainted_with_audio.mp4


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0🌐 Downloading video from URL: https://res.cloudinary.com/dvdykz9el/video/upload/v1750874160/butterfly_ku8qyt.mp4
🎯 Click at (984, 523) on frame 0
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x-seg.pt to 'yolov8x-seg.pt'...
100   507    0     0  100   507      0    414  0:00:01  0:00:01 --:--:--   414

100%|██████████| 137M/137M [00:00<00:00, 293MB/s]


100   507    0     0  100   507      0    228  0:00:02  0:00:02 --:--:--   228🔍 Getting object track ID...

100   507    0     0  100   507      0    157  0:00:03  0:00:03 --:--:--   1570: 320x640 1 bird, 1 orange, 110.2ms
Speed: 2.2ms preprocess, 110.2ms inference, 13.4ms postprocess per image at shape (1, 3, 320, 640)
🎭 Generating masks...

0: 320x640 1 bird, 1 orange, 39.1ms
Speed: 2.8ms preprocess, 39.1ms inference, 4.6ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 bird, 1 orange, 39.5ms
Speed: 2.9ms preprocess, 39.5ms inference, 2.3ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 bird, 1 orange, 39.2ms
Speed: 3.1ms preprocess, 39.2ms inference, 2.3ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 bird, 1 orange, 39.1ms
Speed: 3.2ms preprocess, 39.1ms inference, 2.3ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 bird, 1 orange, 33.7ms
Speed: 3.7ms preprocess, 33.7ms inference, 2.4ms postprocess per image at shape (1

In [None]:
!ls -R /content/outputs/input/




/content/outputs/input/:


In [None]:
import shutil
shutil.rmtree("/content/outputs", ignore_errors=True)
shutil.rmtree("/content/yolo_seg_masks_object", ignore_errors=True)
if os.path.exists("/content/input.mp4"):
    os.remove("/content/input.mp4")
if os.path.exists("/content/original_audio.aac"):
    os.remove("/content/original_audio.aac")
