<a href="https://colab.research.google.com/github/rishabh-2005/IITISoC-ML-05/blob/main/features/object_inpainting/User_click.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q ultralytics opencv-python-headless diffusers torch torchvision ffmpeg-python fastapi uvicorn python-multipart pyngrok supervision
!git clone https://github.com/sczhou/ProPainter.git
%cd ProPainter
!pip install -r requirements.txt


In [None]:
from ultralytics import YOLO
import cv2, numpy as np, os, subprocess, shutil

def get_track_id_at_frame(model, video_path, click_x, click_y, frame_number):
    cap = cv2.VideoCapture(video_path)
    target_id = None
    for i in range(frame_number + 1):
        ret, frame = cap.read()
        if not ret: break
        if i == frame_number:
            result = model.track(frame, persist=True)[0]
            boxes, ids = result.boxes.xyxy, result.boxes.id
            for j, box in enumerate(boxes):
                x1, y1, x2, y2 = map(int, box.tolist())
                if x1 <= click_x <= x2 and y1 <= click_y <= y2:
                    target_id = int(ids[j])
                    break
    cap.release()
    return target_id

def generate_mask_for_id(model, video_path, target_id, mask_folder):
    os.makedirs(mask_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    idx = 0
    while True:
        ret, frame = cap.read()
        if not ret: break
        result = model.track(frame, persist=True)[0]
        masks = result.masks.data if result.masks else []
        ids = result.boxes.id if result.boxes.id is not None else []
        mask = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.uint8)
        for i, seg_mask in enumerate(masks):
            if int(ids[i]) == target_id:
                m = seg_mask.cpu().numpy().astype(np.uint8)
                m = cv2.resize(m, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_NEAREST)
                mask = m * 255
                break
        cv2.imwrite(f"{mask_folder}/frame_{idx:04d}.png", mask)
        idx += 1
    cap.release()

def run_pipeline(input_path, x, y, frame, output_path="/content/outputs/final_output_with_audio.mp4"):
    model = YOLO("yolov8x-seg.pt")
    mask_folder = "/content/yolo_seg_masks_object"
    shutil.rmtree(mask_folder, ignore_errors=True)


    target_id = get_track_id_at_frame(model, input_path, x, y, frame)
    if target_id is None:
        raise Exception("Target object not found at given coordinate/frame")


    generate_mask_for_id(model, input_path, target_id, mask_folder)

    cap = cv2.VideoCapture(input_path)
    w, h = int(cap.get(3)), int(cap.get(4))
    cap.release()

    !python /content/ProPainter/inference_propainter.py \
        --video {input_path} \
        --mask {mask_folder} \
        --output /content/outputs \
        --width 640 --height 360 \
        --subvideo_length 10

    %cd /content

    inpainted_path = "/content/outputs/input/inpaint_out.mp4"
    audio_path = "/content/original_audio.aac"


    subprocess.run(["ffmpeg", "-y", "-i", input_path, "-vn", "-acodec", "copy", audio_path], check=True)
    subprocess.run(["ffmpeg", "-y", "-i", inpainted_path, "-i", audio_path,
                    "-c:v", "copy", "-c:a", "aac", "-strict", "experimental", output_path], check=True)
    return output_path

In [None]:
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.responses import FileResponse
from pyngrok import ngrok
import threading
import uvicorn
import requests

app = FastAPI()

@app.post("/remove/")
async def remove_object(
    video_url: str = Form(...),
    x: int = Form(...),
    y: int = Form(...),
    frame: int = Form(...)
):

    try:

        input_path = "/content/input.mp4"
        response = requests.get(video_url, stream=True)
        if response.status_code != 200:
            raise HTTPException(status_code=400, detail="Video download failed.")

        with open(input_path, "wb") as f:
            f.write(response.content)


        output_path = run_pipeline(input_path, x, y, frame)


        return FileResponse(output_path, media_type="video/mp4", filename="inpainted_with_audio.mp4")

    except Exception as e:
        print(" ERROR:", str(e))
        raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")

In [None]:
!ngrok config add-authtoken YOUR_AUTH_TOKEN

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:


def run():
    uvicorn.run(app, host="0.0.0.0", port=3000)

thread = threading.Thread(target=run)
thread.start()

public_url = ngrok.connect(3000)
print("Public URL:", public_url)


INFO:     Started server process [816]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 3000): address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


🚀 Public URL: NgrokTunnel: "https://16550d4c6910.ngrok-free.app" -> "http://localhost:3000"


In [None]:
!rm -rf /content/input.mp4

In [None]:
# !curl -X POST https://16550d4c6910.ngrok-free.app/remove/ \
#   -F "video_url=https://res.cloudinary.com/dvdykz9el/video/upload/v1750874160/butterfly_ku8qyt.mp4" \
#   -F "x=984" \
#   -F "y=523" \
#   -F "frame=0" \
#   --output inpainted_with_audio.mp4

In [None]:
!ls -R /content/outputs/input/




/content/outputs/input/:
