In [None]:
import pandas as pd
import cv2
import os
import json
import string
import random as rand
import ast
from typing import Any, List, Tuple, Union
import random 

In [None]:
CVAT_FOLDER_PATH = "/home/mh731nk/_data/experiments_tmp/data/revision_8/cvat_project_raw_unzip"
df_videos = pd.read_pickle("/home/mh731nk/_data/experiments_tmp/data/revision_8/video.pkl", 'zip')
df_polygons = pd.read_json('/home/mh731nk/_data/experiments_tmp/data/revision_8/lp_polygons.json', orient='index')

# Video drawer 
Draw polygons and bountingboxes to original video

In [None]:
import cv2
import numpy as np
import pandas as pd
import os

## FUNCTION TO DRAW POLYGONS ON VIDEO
def draw_polygons_on_video(
    df_all_labels,
    original_videos_folder_path,
    video_df_row,
    store_path,
    polygon_col="interp_polygon",
    label_col="label_name",
    print_bbox=True
):
    """
    Reads one video from disk, overlays annotations on each frame, and writes the result to 'store_path'.

    Annotations with polygon data (in 'interp_polygon' or 'points') are drawn as polygons.
    If an annotation with the label "lungsliding" is present (representing a lungpoint), a vertical
    line is drawn at its x-coordinate spanning the entire height of the frame.

    Parameters
    ----------
    df_all_labels : pd.DataFrame
        Contains columns like [video_id, frame, interp_polygon (or points), label_name, ...].
        Each row corresponds to one annotation in one frame.
    original_videos_folder_path : str
        Path to the folder containing original videos.
    video_df_row : dict or pd.Series
        Contains metadata about which video to open. For example:
          {
            "video_id": ...,
            "video_subfolder_path": ...,
            "name_video": ...
          }
    store_path : str
        Where to save the annotated video (mp4).
    polygon_col : str
        The name of the column in df_all_labels that holds the polygon points. Default "interp_polygon".
    label_col : str
        The name of the column in df_all_labels that holds the label (e.g. "aline", "bline", "lungsliding", etc.).
        Default "label_name".
    """

    video_id = video_df_row["video_id"]
    print(f"Processing {video_id}")

    # 1) Filter df_all_labels for this video.
    df_video = df_all_labels[df_all_labels["video_id"] == video_id].copy()
    # Sort by frame (optional, helps for debugging)
    df_video.sort_values(by="frame", inplace=True)
    print(df_video)
    
    # 2) Build the path to the original video.
    folders_scan = [x for x in os.walk(
        os.path.join(original_videos_folder_path, video_df_row["video_subfolder_path"], 'data')
    )]
    if not folders_scan or not folders_scan[0][1]:
        raise ValueError("No subfolders found in data directory.")

    video_folder = folders_scan[0][1][0]
    path_video_folder = os.path.join(
        original_videos_folder_path,
        video_df_row["video_subfolder_path"],
        'data',
        video_folder
    )
    video_path = os.path.join(path_video_folder, video_df_row["name_video"])

    # 3) Open the video.
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: could not open {video_path}")
        cap.release()
        raise SystemExit

    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps    = cap.get(cv2.CAP_PROP_FPS)

    # 4) Create the VideoWriter.
    out_path = os.path.join(store_path, f"{video_id}.mp4")
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out_writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))

    print(f"Video {video_path}: size={width}x{height}, fps={fps}")
    print(f"Storing annotated video to: {out_path}")

    # 5) Predefine colors for each label.
    label_colors = {
        "aline": (255, 0, 0),              # Blue
        "bline": (0, 255, 0),              # Green
        "lungslidingpresent": (0, 165, 255),  # Orange
        "lungslidingabsent": (0, 0, 255),  # Red
        "lungpointpleura": (255, 255, 0),  # Cyan-ish
        "lungpoint": (0, 255, 255)       # Yellow-ish (for lungpoint vertical line)
    }
    default_color = (255, 255, 255)  # White

    frame_index = 0
    while True:
        ret, frame_img = cap.read()
        if not ret:
            break  # end of video

        # Select annotations for this frame.
        df_frame = df_video[df_video["frame"] == frame_index]

        # Draw each annotation.
        for _, row in df_frame.iterrows():
            # Get the polygon (or point) data from the specified column.
            polygon_points = row.get(polygon_col, None)
            if polygon_points is None:
                continue

            label_name = str(row.get(label_col, "unknown_label"))
            color = label_colors.get(label_name, default_color)

            if label_name == "lungpoint":
                # For lungsliding annotations, polygon_points is expected to contain a single point.
                # Draw a vertical line at that x coordinate from the top to the bottom of the frame.
                x = int(polygon_points[0][0])
                cv2.line(frame_img, (x, 0), (x, height), color, thickness=2)
                # Optionally, label the line near the top.
                cv2.putText(frame_img, label_name, (x, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            else:
                # For all other annotations, draw the polygon.
                pts_np = np.array(polygon_points, dtype=np.int32).reshape((-1, 1, 2))
                cv2.polylines(frame_img, [pts_np], isClosed=True, color=color, thickness=2)
                # Calculate the centroid of the polygon to place the label.
                cX = int(np.mean([p[0] for p in polygon_points]))
                cY = int(np.mean([p[1] for p in polygon_points]))
                cv2.putText(frame_img, label_name, (cX, cY), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            if print_bbox:
                x1, y1 = int(row["bb_min_x"]), int(row["bb_min_y"])
                x2, y2 = int(row["bb_max_x"]), int(row["bb_max_y"])
                label_name = str(row.get("label_name", "unknown_label"))

                # Get color
                color = label_colors.get(label_name, default_color)

                # Draw rectangle
                cv2.rectangle(frame_img, (x1, y1), (x2, y2), color, 2)
                # Label text
                cv2.putText(
                    frame_img,
                    label_name,
                    (x1, max(0, y1 - 5)),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    color,
                    2
                )

        # Write the annotated frame to the output video.
        out_writer.write(frame_img)
        frame_index += 1

    cap.release()
    out_writer.release()
    print("Done drawing polygons on video.")

In [None]:
ROOT_FOLDER_PATH = "/home/mh731nk/_data/experiments_tmp/data/revision_8/cvat_project_raw_unzip"
STORE_FOLDER_PATH = "/home/mh731nk/_data/experiments_tmp/data/revision_8/over_draw_videos/02/videos"

for index, row in df_videos.loc[df_videos["video_id"].isin(set(df_polygons["video_id"]))].iterrows():
    # print(vodeo_df_row)
    draw_polygons_on_video(
        df_polygons,
        ROOT_FOLDER_PATH,
        row,
        STORE_FOLDER_PATH,
        polygon_col="interp_polygon",
        label_col="polygon_label",
        print_bbox=True
    )

# Video encode
I want to play video in browser and in oirignal encoding it is not possible

In [None]:
import os
import subprocess

def encode_all_videos(source_folder, dest_folder):
    """
    Re-encode all .mp4 videos in 'source_folder' using H.264 and
    store them in 'dest_folder' with the same filename.
    """
    # Make sure the destination folder exists
    os.makedirs(dest_folder, exist_ok=True)

    # Loop over all .mp4 files in the source folder
    for filename in os.listdir(source_folder):
        if filename.lower().endswith(".mp4"):
            print(filename)
            input_path = os.path.join(source_folder, filename)
            output_path = os.path.join(dest_folder, filename)

            print(f"Re-encoding {input_path} -> {output_path}")

            # Build the ffmpeg command
            cmd = [
                "/usr/bin/ffmpeg",
                "-y",                # Overwrite output if exists
                "-i", input_path,    # Input file
                "-vcodec", "libx264",
                "-crf", "23",
                "-preset", "medium",
                "-pix_fmt", "yuv420p",
                output_path
            ]

            result = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True  # If using Python 3.7+, for string output
            )
            if result.returncode != 0:
                print("FFmpeg failed, see error below:")
                print(result.stderr)
            else:
                print("FFmpeg succeeded!")

    print("Done!")

In [None]:
source_folder = "/home/mh731nk/_data/experiments_tmp/data/revision_8/over_draw_videos/02/videos"
dest_folder   = "/home/mh731nk/_data/experiments_tmp/data/revision_8/over_draw_videos/02/videos_encoded"

encode_all_videos(source_folder, dest_folder)