<a href="https://colab.research.google.com/github/mobarakol/tutorial_notebooks/blob/main/Dataset_Preparation_Endonasal_Real.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Endonasal data preparation from PitVIS videos:

Video links:<br>
https://drive.google.com/file/d/1-aFmRseZ19OJvIEnFWNYfJdtKEw3gJmz/view?usp=sharing <br>
https://drive.google.com/file/d/1Qe_ml7kS62DWbrxRD-IUkmP-BpZNg1h-/view?usp=sharing <br>
https://drive.google.com/file/d/1n6uHQ2mPu0DTnBD_Bsn4V0eRIp2Z6Njk/view?usp=sharing

In [None]:
!gdown --id 1-aFmRseZ19OJvIEnFWNYfJdtKEw3gJmz
!gdown --id 1Qe_ml7kS62DWbrxRD-IUkmP-BpZNg1h-
!gdown --id 1n6uHQ2mPu0DTnBD_Bsn4V0eRIp2Z6Njk

/bin/sh: 1: gdown: not found


In [None]:
!ls

Annotations  Annotations.zip  imgs  masks  video_02.mp4  videos


Video to frame conversion:

In [None]:
# global imports
import argparse
import cv2
import numpy as np
import os

# strong typing
from pathlib import Path
from typing import List


def main(pt_videos: Path, pt_images: Path):
    convert_videos_to_images(pt_videos=pt_videos, pt_images=pt_images)


def convert_videos_to_images(pt_videos: Path, pt_images: Path):
    """convert all videos from {pt_videos} to images saved to {pt_images}"""
    create_directory(pt=pt_images)

    ls_videos: List[str] = os.listdir(pt_videos)
    ls_videos.sort()
    print(ls_videos)
    for str_video in ls_videos:
        pt_video: Path = pt_videos.joinpath(str_video)
        pt_image: Path = pt_images.joinpath(str_video.split(".")[0])
        print(pt_video)
        create_directory(pt=pt_image)
        convert_video_to_image(pt_video=pt_video, pt_image=pt_image)


def convert_video_to_image(pt_video: Path, pt_image: Path):
    """convert a single video from {pt_video} to images saved to {pt_image}"""
    video_capture = cv2.VideoCapture(str(pt_video))
    int_frames_per_second: int = np.ceil(video_capture.get(cv2.CAP_PROP_FPS))  # ceiling function to ensure integer
    print('int_frames_per_second', int_frames_per_second)
    int_frame: int = 0
    count: int = 0
    while video_capture.isOpened():
        bool_success, np_frame_matrix = video_capture.read()
        if bool_success:
            if int_frame % int_frames_per_second == 0:
                pt_image_frame: Path = pt_image.joinpath(f"{int(count):07}.png")
                cv2.imwrite(str(pt_image_frame), np_frame_matrix)
                count += 1
        else:
            break
        int_frame += 1

    video_capture.release()
    print(f"{pt_video} successfully converted to {int_frame} images.")


def create_directory(pt: Path):
    """create a directory for a given {path} if it does not already exist"""
    if not os.path.exists(pt):
        os.mkdir(pt)


if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument("--pt_videos", type=str, help="videos path (parent directory)", default='')
    arg_parser.add_argument("--pt_images", type=str, help="images path (parent directory)", default='')
    args = arg_parser.parse_args([])
    args.pt_videos = 'videos'
    args.pt_images = 'imgs'
    SystemExit(main(pt_videos=Path(args.pt_videos), pt_images=Path(args.pt_images)))


['video_01.mp4', 'video_02.mp4', 'video_03.mp4']
videos/video_01.mp4
int_frames_per_second 24.0
videos/video_01.mp4 successfully converted to 172812 images.
videos/video_02.mp4
int_frames_per_second 24.0
videos/video_02.mp4 successfully converted to 145631 images.
videos/video_03.mp4
int_frames_per_second 24.0
videos/video_03.mp4 successfully converted to 82049 images.


sub-grouping frames based on blurred interruption

In [None]:
from operator import itemgetter
from itertools import groupby
import pandas as pd
from glob import glob
import shutil
import os

imgs_videos = glob('imgs/video_*')
anno_root = 'Annotations'
imgs_videos.sort()
for imgs_video_path in imgs_videos:
    anno_path = os.path.join(anno_root, 'annotations_{}.csv'.format(imgs_video_path[-2:]))
    #print(anno_path)
    df = pd.read_csv(anno_path)
    nonblur_frames = list(df['int_time'][df['int_step'] != -1])
    print('number of clear frames:', len(nonblur_frames),'/', len(df))
    group_count = 0
    for key, group in groupby(enumerate(nonblur_frames), lambda i: i[0] - i[1]):
        group = list(map(itemgetter(1), group))

        group_count += 1
        group_folder = "{}_{:02d}".format(os.path.basename(imgs_video_path), group_count)
        group_path = os.path.join(imgs_video_path, group_folder)
        print('group_path:',group_path, anno_path)
        os.makedirs(group_path, mode = 0o777, exist_ok = True)
        for img_num in group:
            src_file = os.path.join(imgs_video_path, "{:07d}.png".format(img_num))
            try:
                shutil.move(src_file, group_path)
            except:
                continue

number of clear frames: 6816 / 7202
group_path: imgs/video_01/video_01_01 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_02 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_03 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_04 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_05 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_06 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_07 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_08 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_09 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_10 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_11 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_12 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_13 Annotations/annotations_01.csv
group_path: imgs/video_01/video_01_14 Annotations/annotations_01.cs

Removing blurred frames

In [None]:
blurred_imgs_videos = glob('imgs/video_*/*.png')
print(len(blurred_imgs_videos))
blurred_imgs_videos.sort()
for blurred_img_path in blurred_imgs_videos:
    os.remove(blurred_img_path)
    #print(blurred_img_path)

0


Images to masks to detect black padding of the limited view endoscope:

In [None]:
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt
import cv2
from skimage import morphology

imgs_videos = glob('imgs/video_*/video*/*.png')
imgs_videos.sort()
print(len(imgs_videos))
for img_path in imgs_videos:
    mask_path = os.path.join('masks',img_path[5:])
    os.makedirs(os.path.dirname(mask_path), mode = 0o777, exist_ok = True)
    img = Image.open(img_path).convert('L')
    img = np.array(img)
    mask = np.zeros(img.shape)
    mask1 = np.zeros(img.shape)
    mask[img!=0] = 255
    mask = np.array(mask, np.uint8)
    circles = cv2.HoughCircles(mask, cv2.HOUGH_GRADIENT, 4.0, 100)
    if circles is not None:
        circles = np.round(circles[0, :]).astype("int")
        (x, y, r) = circles[0:1][0]
        cv2.circle(mask1, (x, y), r-10, 255, -1)
        cv2.imwrite(mask_path, mask1)
    else:
        print('no mask:', img_path)

15432


Removing black padding and unwanted text in the padding from the images:

In [None]:
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt
import cv2
from skimage import morphology

imgs_videos = glob('imgs/video_*/video*/*.png')
imgs_videos.sort()
print(len(imgs_videos))
for img_path in imgs_videos:
    img_rgb = Image.open(img_path).convert('RGB')
    img = img_rgb.convert('L')
    img_rgb = np.array(img_rgb)
    img = np.array(img)
    mask = np.zeros(img.shape)
    mask1 = np.zeros(img.shape)
    mask[img!=0] = 255
    mask = np.array(mask, np.uint8)
    circles = cv2.HoughCircles(mask, cv2.HOUGH_GRADIENT, 4.0, 100)
    if circles is not None:
        circles = np.round(circles[0, :]).astype("int")
        (x, y, r) = circles[0:1][0]
        cv2.circle(mask1, (x, y), r, 255, -1)
        img_rgb[mask1==0] = 0
        img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
        cv2.imwrite(img_path, img_bgr)
    else:
        print('no mask:', img_path)