In [11]:
!pip install opencv-python-headless torch diffusers transformers pillow tqdm

#Installing the required librarires
# We use OpenCV to handle video input/output operations, frame extraction, and basic image manipulations (e.g., resizing, drawing text).
# pytorch is used to load and execute the diffusion model on the input frames.
# we use the diffusers library to load the pre-trained Stable Diffusion Inpainting model and perform inpainting on video frames.
## transformers included here as a part of the typical dependencies needed when working with Hugging Face's model ecosystem. However, in this specific script, we do not directly use the transformers library.
# we use Pillow to handle image conversions between different formats (e.g., converting OpenCV images to PIL format for the diffusion model) and to draw masks for inpainting.
# we use TQDM to provide visual feedback during the processing of video frames, showing the progress of batch processing.



In [12]:
import cv2
import numpy as np

#Here I am actually creating the dummy video for input
def create_demovideo(file_path, width=640, height=480, num_frames=10, fps=5):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter(file_path, fourcc, fps, (width, height))
    #Here I am simply making frame in white color where Numbers will come from 1 to 10 in the center in quick succession
    for i in range(num_frames):
        frame = np.ones((height, width, 3), dtype=np.uint8) * 255
        cv2.putText(frame, str(i + 1), (width // 2 - 20, height // 2), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 5, cv2.LINE_AA)
        video.write(frame)
    video.release()

#This is the name of the input file that I wanna create by running the above function of create_demovideo
input_video_path = 'simple_demo_sd_video.mp4'
create_demovideo(input_video_path)


In [13]:
def extract_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    #Here retur, frame will be output of cap.read()
    #retur will be true if frames is successfully read else it is false
    #frame is the actual frame read from the video
    while cap.isOpened():
        retur, frame = cap.read()
        if not retur:
            break
        frames.append(frame)
    cap.release()
    return frames

# After executing it we are releasing the VideoCapture object is crucial to avoid resource leaks, especially when dealing with multiple video files or long-running applications.
# At last we are returing the frames list which has frame in it as individual elements

In [14]:
import torch
from diffusers import StableDiffusionInpaintPipeline
from PIL import Image, ImageDraw

#Here I am importing the Stable Diffusion Model for this purpose
model_name = "runwayml/stable-diffusion-inpainting"
pipe = StableDiffusionInpaintPipeline.from_pretrained(model_name, torch_dtype=torch.float16)
pipe = pipe.to('cuda' if torch.cuda.is_available() else 'cpu')

def inpaint_and_resize(image, pipe):
    resized_image = cv2.resize(image, (512, 512))
    pil_image = Image.fromarray(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))

    #Lets create a new mask image of size 512x512 with a black background.
    mask = Image.new('L', (512, 512), 0)
    draw = ImageDraw.Draw(mask)
    #Just to indicate areas of inpainting
    draw.rectangle([(0, 0), (128, 512)], fill=255)  #Here we are drawing white rectangle on left side
    draw.rectangle([(384, 0), (512, 512)], fill=255)  # Here we are drawing white rectangle on the right side
    inpainted_image = pipe(prompt="", image=pil_image, mask_image=mask).images[0]


    inpainted_image_cv = cv2.cvtColor(np.array(inpainted_image), cv2.COLOR_RGB2BGR)

    center_original = resized_image[:, 128:384]
    inpainted_image_cv[:, 128:384] = center_original
    hd_image = cv2.resize(inpainted_image_cv, (1280, 720))
    return hd_image


safety_checker/model.safetensors not found


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--runwayml--stable-diffusion-inpainting/snapshots/51388a731f57604945fddd703ecb5c50e8e7b49d/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--runwayml--stable-diffusion-inpainting/snapshots/51388a731f57604945fddd703ecb5c50e8e7b49d/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--runwayml--stable-diffusion-inpainting/snapshots/51388a731f57604945fddd703ecb5c50e8e7b49d/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--runwayml--stable-diffusion-inpainting/snapshots/51388a731f57604945fddd703ecb5c50e8e7b49d/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


In [7]:
from concurrent.futures import ThreadPoolExecutor
#  Here we are importing ThreadPoolExecutor which is a high-level interface for asynchronously executing function calls using threads.
from tqdm import tqdm
# It is simply a library for showing progress bars in python loops

def process_frame(frame, pipe):
    return inpaint_and_resize(frame, pipe)
#Here we are passing frame and pipe as inputs and we are returning the processed frame after applying the inpaint_and_resize function

def process_frames(frames, pipe, number_of_workers=4):
    with ThreadPoolExecutor(max_workers=number_of_workers) as executor:
        processed_frames = list(tqdm(executor.map(lambda frame: process_frame(frame, pipe), frames), total=len(frames)))
    return processed_frames


#Here we are passing frames, pipe, number_of_workers where frames is a list of video frames to be processed, The inpainting pipeline is the pipe to process each frame
#number_of_workers is the Number of threads used for parallel processing

#executor map Maps the process_frame function to each frame in the frames list, distributing the work among the threads in the pool.
# it uses a lambda function lambda frame: process_frame(frame, pipe) to pass the pipe argument to process_frame along with each frame

# tqdm wraps the mapping operation with a progress bar, displaying the progress of frame processing.

#returning the list of processed frames after applying inpaint_and_resize to each frame in parallel.

In [15]:
def savevideo(frames, output_path, fps):
    height, width, _ = frames[0].shape
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    for frame in frames:
        out.write(frame)
    out.release()

#Here we are extracting the height and width of the frame by taking the first frame of the frames list.
#We are taking it's height and width and we don't need no. of channels of it as it it the third argument.
#No we have create video writer object for saving the video file using MPEG4 encodec format with fps of frame along with its width and height.
#Then we will loop through whole frames list and keeps on writing the current frame to the video file using the video writer object.
# At last the video writer object, finalizing and saving the video file. This is an important step to ensure that the video file is properly closed and the data is written to disk.

In [16]:
def convertvideo(input_video_path, output_video_path):
    cap = cv2.VideoCapture(input_video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()

    frames = extract_frames(input_video_path)
    batch_size = 2  # Let's take batch size as 2
    processed_frames = []

    for i in tqdm(range(0, len(frames), batch_size), desc="Processing Batches"):
        batch = frames[i:i + batch_size]
        processed_batch = process_frames(batch, pipe)
        processed_frames.extend(processed_batch)

    savevideo(processed_frames, output_video_path, fps)


#Here we are capturing the video file and taking its frames per second. Then releasing the capture object.
#Then we are extracting the frames. Taking the batch size as 2. It means that we want to work on 2 frames at a time.
#Then simply processing the frames alongside also displaying the progress bar and then saving the processed frames in the output path by calling the savevideo function
#Then we are converting the video from sd to hd while preserving the context.


if __name__ == "__main__":
    input_video_path = 'simple_demo_sd_video.mp4'
    output_video_path = 'output_hd_demo_video.mp4'
    convertvideo(input_video_path, output_video_path)


Processing Batches:   0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/2 [00:00<?, ?it/s][A

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]


100%|██████████| 2/2 [00:17<00:00,  8.67s/it]
Processing Batches:  20%|██        | 1/5 [00:17<01:09, 17.35s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]


100%|██████████| 2/2 [00:17<00:00,  8.58s/it]
Processing Batches:  40%|████      | 2/5 [00:34<00:51, 17.25s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]


100%|██████████| 2/2 [00:16<00:00,  8.37s/it]
Processing Batches:  60%|██████    | 3/5 [00:51<00:34, 17.02s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]


100%|██████████| 2/2 [00:16<00:00,  8.24s/it]
Processing Batches:  80%|████████  | 4/5 [01:07<00:16, 16.82s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]


100%|██████████| 2/2 [00:16<00:00,  8.29s/it]
Processing Batches: 100%|██████████| 5/5 [01:24<00:00, 16.87s/it]
