<a href="https://colab.research.google.com/github/neehasajja/waymo-opendataset/blob/main/videowithboxes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
from PIL import Image, ImageDraw
import pandas as pd
import cv2
from base64 import b64encode
from io import BytesIO
from moviepy.editor import ImageSequenceClip
from IPython.display import HTML


# Load the camera box data from the first parquet file
df_box = pd.read_parquet('/content/drive/MyDrive/validation/camera_box/file1.parquet')

# Load the camera image data from the second parquet file
df_image = pd.read_parquet('/content/drive/MyDrive/validation/camera_image/file1.parquet')

# Define the camera names
camera_names = [1]

# Define the desired frame timestamps
frame_timestamps = [1557845072764279, 1557845072864262, 1557845072964277, 1557845073064260, 1557845073164288, 1557845073264273, 1557845073364299, 1557845073464283, 1557845073564307, 1557845075364290, 1557845075564294, 1557845075664314, 1557845075764296, 1557845075864315, 1557845075964303, 1557845076064330, 1557845076164361, 1557845076264342, 1557845076364332, 1557845076464321, 1557845076564307, 1557845076664327, 1557845076764312, 1557845076864333, 1557845076964322, 1557845077064342, 1557845077164332, 1557845077264350, 1557845077364372, 1557845077464355, 1557845077564372, 1557845077664355, 1557845077764376, 1557845077864358, 1557845077964343, 1557845078064329, 1557845078164309, 1557845078264334, 1557845078364314, 1557845078464340, 1557845078564319, 1557845078664302, 1557845078764330, 1557845078864318, 1557845078964337, 1557845079064331, 1557845079164322, 1557845079264343, 1557845079364326, 1557845079464305, 1557845079564329, 1557845079664314, 1557845079764327, 1557845079864306, 1557845079964328, 1557845080064318, 1557845080164307, 1557845080264301, 1557845080364319, 1557845080464342, 1557845080564368, 1557845080664363, 1557845080764382, 1557845080864362, 1557845080964377, 1557845081064401, 1557845081164383, 1557845081264408, 1557845081364390, 1557845081464420, 1557845081564413, 1557845081664397, 1557845081764387, 1557845081864377, 1557845081964366, 1557845082064348, 1557845082164337, 1557845082264317, 1557845082364300, 1557845082464283, 1557845082564267, 1557845082664245, 1557845082764275, 1557845082864295, 1557845082964287, 1557845083064307, 1557845083164333, 1557845083264312, 1557845083364340, 1557845083464331,  1557845083564356, 1557845083664337, 1557845083764360, 1557845083864355, 1557845083964344,  1557845084064334, 1557845084164315, 1557845084264295, 1557845084364284, 1557845084464278, 1557845084564266,  1557845084664255, 1557845084764281, 1557845084864267, 1557845084964250, 1557845085064276, 1557845085164268, 1557845085264287, 1557845085364270, 1557845085464294]

# Create a list to store the annotated images
annotated_images = []

# Iterate over the frame timestamps
for frame_timestamp in frame_timestamps:
    # Iterate over the camera names
    for camera_name in camera_names:
        # Find the row in the camera box DataFrame that corresponds to the desired frame timestamp and camera name
        df_box_frame = df_box[(df_box['key.frame_timestamp_micros'] == frame_timestamp) & (df_box['key.camera_name'] == camera_name)]

        # Find the corresponding camera resolution
        width = int(df_box_frame['[CameraBoxComponent].box.size.x'].max())
        height = int(df_box_frame['[CameraBoxComponent].box.size.y'].max())

        # Find the row in the camera image DataFrame that corresponds to the desired frame timestamp, camera name
        df_image_frame = df_image[(df_image['key.frame_timestamp_micros'] == frame_timestamp) & (df_image['key.camera_name'] == camera_name)]

        # Read the camera image binary data into a stream
        img_data = df_image_frame.iloc[0]['[CameraImageComponent].image']
        img_stream = BytesIO(img_data)

        # Open the image stream as a PIL image
        pil_image = Image.open(img_stream)

        # Create an ImageDraw object
        draw = ImageDraw.Draw(pil_image)

        # Draw bounding boxes on the image
        for _, row in df_box_frame.iterrows():
            x_center = row['[CameraBoxComponent].box.center.x']
            y_center = row['[CameraBoxComponent].box.center.y']
            width = row['[CameraBoxComponent].box.size.x']
            height = row['[CameraBoxComponent].box.size.y']

            # Calculate the bounding box coordinates
            x1 = int(x_center - width / 2)
            y1 = int(y_center - height / 2)
            x2 = int(x_center + width / 2)
            y2 = int(y_center + height / 2)

            # Draw the bounding box rectangle
            draw.rectangle([x1, y1, x2, y2], outline='red')

        # Append the annotated image to the list
        annotated_images.append(np.array(pil_image))

# Get the size of the first image in the list
size = annotated_images[0].shape[:2]

# Resize all images to the same size with improved quality
resized_images = [cv2.resize(img, (2 * size[1], 2 * size[0]), interpolation=cv2.INTER_LANCZOS4) for img in annotated_images]

# Create a video clip from the resized images
clip = ImageSequenceClip(resized_images, fps=10)

# Define the output path for the final video
output_path = "output.mp4"

# Save the video clip to a file
clip.write_videofile(output_path, bitrate="5000k", codec="libx264", audio=False)

# Display the video in Colab
mp4 = open(output_path, 'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML(f"""
<video width=400 controls>
   <source src="{data_url}" type="video/mp4">
</video>
""")

Moviepy - Building video output.mp4.
Moviepy - Writing video output.mp4





Moviepy - Done !
Moviepy - video ready output.mp4
