# Video Preprocessing

Description

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import glob
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import json
import gc
from tqdm.notebook import tqdm

In [6]:
from video_analyzer import VideoAnalyzer, analyze_none_landmarks
from preprocessor import Preprocessor

In [4]:
timestamp = "04242025"
motion_version = "versionB"
pose_version = "versionB"
preprocessing_version = "v4"
path_to_root = "/home/ben/projects/SaoPauloBrazilChapter_BrazilianSignLanguage/"

In [5]:
metadata = pd.read_csv(os.path.join(
    path_to_root,
    "data",
    "raw",
    "combined",
    "target_dataset_video_metadata.csv"
    ))

In [None]:
for i, metadata_row in metadata.iterrows():
    print(f"\rProcessing video {i+1} of {len(metadata)}: {metadata_row.filename}", end="")
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    analyzer = VideoAnalyzer(
        metadata_row,
        timestamp,
        path_to_root,
        verbose=False,
        motion_detection_version=motion_version,
        pose_detection_version=pose_version
    )
    pose_data = analyzer.pose_detect()
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    pose_result = analyzer.pose_analyze()
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
    motion_data = analyzer.motion_detect()
    motion_result = analyzer.motion_analyze()

    analyzer.save_analysis_info()

In [17]:
lm_folder = "/home/ben/projects/SaoPauloBrazilChapter_BrazilianSignLanguage/data/interim/RawPoseLandmarks/versionB/"

In [28]:
none_infos = []
for i, row in metadata.iterrows():
    fn = row['filename'].replace('.mp4', '.npy')
    if not os.path.exists(os.path.join(lm_folder, fn)):
        print(f"File {fn} does not exist")
        continue
    landmarks = np.load(os.path.join(lm_folder, fn), allow_pickle=True)
    none_info = analyze_none_landmarks(landmarks)
    none_info['filename'] = fn
    none_infos.append(none_info)

In [13]:
def nested_dict_to_df(nested_dict, prefix=''):
    flat_dict = {}
    
    def flatten(d, parent_key=''):
        for key, value in d.items():
            new_key = f"{parent_key}_{key}" if parent_key else key
            
            if isinstance(value, dict):
                flatten(value, new_key)
            else:
                flat_dict[new_key] = value
    
    flatten(nested_dict)
    return pd.DataFrame([flat_dict])

In [31]:
nested_dict_to_df(none_infos[0])

Unnamed: 0,overall_total_frames,overall_total_landmark_frames,overall_none_landmark_frames,overall_landmark_frame_percentage,overall_none_details,face_landmarks_total_frames,face_landmarks_none_frames,face_landmarks_frame_percentage,face_landmarks_continuous,face_landmarks_first_valid,...,right_hand_landmarks_frame_percentage,right_hand_landmarks_continuous,right_hand_landmarks_first_valid,right_hand_landmarks_last_valid,right_hand_landmarks_valid_range_total_frames,right_hand_landmarks_valid_range_none_frames,right_hand_landmarks_valid_range_frame_percentage,right_hand_landmarks_valid_range_none_details,right_hand_landmarks_none_details,filename
0,44,176,41,23.295455,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 33, 34, 35,...",44,0,0.0,True,0,...,45.454545,False,10,34,25,1,4.0,[31],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 35, 36, 37,...",ajudar_ne_1.npy


In [32]:
none_dfs = [nested_dict_to_df(info) for info in none_infos]

In [35]:
none_df = pd.concat(none_dfs)

In [37]:
none_df.columns

Index(['overall_total_frames', 'overall_total_landmark_frames',
       'overall_none_landmark_frames', 'overall_landmark_frame_percentage',
       'overall_none_details', 'face_landmarks_total_frames',
       'face_landmarks_none_frames', 'face_landmarks_frame_percentage',
       'face_landmarks_continuous', 'face_landmarks_first_valid',
       'face_landmarks_last_valid', 'face_landmarks_valid_range_total_frames',
       'face_landmarks_valid_range_none_frames',
       'face_landmarks_valid_range_frame_percentage',
       'face_landmarks_valid_range_none_details',
       'face_landmarks_none_details', 'pose_landmarks_total_frames',
       'pose_landmarks_none_frames', 'pose_landmarks_frame_percentage',
       'pose_landmarks_continuous', 'pose_landmarks_first_valid',
       'pose_landmarks_last_valid', 'pose_landmarks_valid_range_total_frames',
       'pose_landmarks_valid_range_none_frames',
       'pose_landmarks_valid_range_frame_percentage',
       'pose_landmarks_valid_range_none

In [39]:
len( none_df.query('overall_landmark_frame_percentage > 0'))

130

In [40]:
none_df.query('left_hand_landmarks_frame_percentage==100.0')

Unnamed: 0,overall_total_frames,overall_total_landmark_frames,overall_none_landmark_frames,overall_landmark_frame_percentage,overall_none_details,face_landmarks_total_frames,face_landmarks_none_frames,face_landmarks_frame_percentage,face_landmarks_continuous,face_landmarks_first_valid,...,right_hand_landmarks_frame_percentage,right_hand_landmarks_continuous,right_hand_landmarks_first_valid,right_hand_landmarks_last_valid,right_hand_landmarks_valid_range_total_frames,right_hand_landmarks_valid_range_none_frames,right_hand_landmarks_valid_range_frame_percentage,right_hand_landmarks_valid_range_none_details,right_hand_landmarks_none_details,filename
0,61,244,97,39.754098,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",61,0,0.0,True,0,...,59.016393,False,17,42,26,1,3.846154,[41],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",cabeça_ne_1.npy
0,39,156,62,39.74359,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",39,0,0.0,True,0,...,58.974359,False,7,29,23,7,30.434783,"[9, 12, 13, 14, 16, 17, 27]","[0, 1, 2, 3, 4, 5, 6, 9, 12, 13, 14, 16, 17, 2...",café_ne_1.npy
0,51,204,81,39.705882,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",51,0,0.0,True,0,...,58.823529,False,13,34,22,1,4.545455,[33],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 33,...",comer_ne_1.npy
0,46,184,74,40.217391,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",46,0,0.0,True,0,...,60.869565,False,17,36,20,2,10.0,"[21, 34]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",crescer_ne_1.npy
0,33,132,49,37.121212,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",33,0,0.0,True,0,...,48.484848,True,8,24,17,0,0.0,[],"[0, 1, 2, 3, 4, 5, 6, 7, 25, 26, 27, 28, 29, 3...",filho_ne_1.npy
0,39,156,74,47.435897,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",39,0,0.0,True,0,...,89.74359,False,10,30,21,17,80.952381,"[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 2...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14,...",garganta_ne_1.npy
0,41,164,91,55.487805,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",41,8,19.512195,True,0,...,82.926829,False,6,28,23,16,69.565217,"[9, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22...","[0, 1, 2, 3, 4, 5, 9, 10, 11, 14, 15, 16, 17, ...",homem_ne_1.npy
0,36,144,53,36.805556,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",36,0,0.0,True,0,...,47.222222,False,11,30,20,1,5.0,[29],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 29, 31, 32,...",ouvir_ne_1.npy
0,41,164,62,37.804878,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",41,0,0.0,True,0,...,51.219512,False,9,33,25,5,20.0,"[11, 14, 15, 30, 31]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 14, 15, 30, 31...",pai_ne_1.npy
0,41,164,56,34.146341,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",41,0,0.0,True,0,...,36.585366,False,8,36,29,3,10.344828,"[17, 18, 21]","[0, 1, 2, 3, 4, 5, 6, 7, 17, 18, 21, 37, 38, 3...",sopa_ne_1.npy


In [38]:
for i, row in none_df.query('overall_landmark_frame_percentage > 0').sort_values('overall_landmark_frame_percentage', ascending=False).iterrows():
    print(row['filename'], '-',row['overall_total_frames'], 'frames -',row['overall_landmark_frame_percentage'], '%')
    print('left hand: ', row['left_hand_landmarks_frame_percentage'],'%')
    print(row['left_hand_landmarks_none_details'])
    print('right hand: ', row['right_hand_landmarks_frame_percentage'],'%')
    print(row['right_hand_landmarks_none_details'])
    print('---')
    print()


homem_ne_1.npy - 41 frames - 55.487804878048784 %
left hand:  100.0 %
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
right hand:  82.92682926829268 %
[0, 1, 2, 3, 4, 5, 9, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
---

garganta_ne_1.npy - 39 frames - 47.43589743589743 %
left hand:  100.0 %
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]
right hand:  89.74358974358975 %
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 31, 32, 33, 34, 35, 36, 37, 38]
---

sorvete_ne_1.npy - 32 frames - 46.875 %
left hand:  100.0 %
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
right hand:  87.5 %
[0, 1, 2, 5, 6, 7, 8

In [36]:
none_df

Unnamed: 0,overall_total_frames,overall_total_landmark_frames,overall_none_landmark_frames,overall_landmark_frame_percentage,overall_none_details,face_landmarks_total_frames,face_landmarks_none_frames,face_landmarks_frame_percentage,face_landmarks_continuous,face_landmarks_first_valid,...,right_hand_landmarks_frame_percentage,right_hand_landmarks_continuous,right_hand_landmarks_first_valid,right_hand_landmarks_last_valid,right_hand_landmarks_valid_range_total_frames,right_hand_landmarks_valid_range_none_frames,right_hand_landmarks_valid_range_frame_percentage,right_hand_landmarks_valid_range_none_details,right_hand_landmarks_none_details,filename
0,44,176,41,23.295455,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 33, 34, 35,...",44,0,0.000000,True,0,...,45.454545,False,10,34,25,1,4.000000,[31],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 35, 36, 37,...",ajudar_ne_1.npy
0,79,316,0,0.000000,[],79,0,0.000000,True,0,...,0.000000,True,0,78,79,0,0.000000,[],[],ajudar_sb_2.npy
0,115,460,0,0.000000,[],115,0,0.000000,True,0,...,0.000000,True,0,114,115,0,0.000000,[],[],ajudar_uf_3.npy
0,141,564,45,7.978723,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14,...",141,0,0.000000,True,0,...,14.184397,False,12,140,129,8,6.201550,"[16, 17, 20, 75, 81, 86, 88, 111]","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17,...",ajudar_vl_4.npy
0,108,432,25,5.787037,"[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...",108,0,0.000000,True,0,...,11.111111,False,0,107,108,12,11.111111,"[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 62, 99]","[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 62, 99]",ajudar_vl_5.npy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,64,256,25,9.765625,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 21,...",64,0,0.000000,True,0,...,6.250000,False,0,63,64,4,6.250000,"[21, 22, 23, 45]","[21, 22, 23, 45]",vagina_sb_2.npy
0,110,440,1,0.227273,[71],110,0,0.000000,True,0,...,0.909091,False,0,109,110,1,0.909091,[71],[71],vagina_uf_3.npy
0,150,600,22,3.666667,"[29, 30, 31, 32, 35, 36, 38, 39, 41, 103, 105,...",150,0,0.000000,True,0,...,14.666667,False,0,138,139,11,7.913669,"[29, 30, 31, 32, 35, 36, 38, 39, 41, 103, 105]","[29, 30, 31, 32, 35, 36, 38, 39, 41, 103, 105,...",vagina_vl_4.npy
0,215,860,23,2.674419,"[160, 161, 162, 163, 164, 165, 166, 167, 168, ...",215,0,0.000000,True,0,...,10.697674,False,0,214,215,23,10.697674,"[160, 161, 162, 163, 164, 165, 166, 167, 168, ...","[160, 161, 162, 163, 164, 165, 166, 167, 168, ...",vagina_vl_5.npy


In [14]:
metadata[41:44]

Unnamed: 0,filename,label,data_source,sign_id,signer_number,frame_count,fps,duration_sec,width,height,needs_flip,cleaned
41,bebê_vl_6.mp4,bebê,vl,0,3,331,59.94006,5.522183,1920,1080,False,True
42,cabeça_ne_1.mp4,cabeça,ne,0,1,61,12.0,5.083333,240,176,False,True
43,cabeça_sb_2.mp4,cabeça,sb,1,1,88,29.97003,2.936267,1280,720,False,True


In [41]:
for i, metadata_row in tqdm(metadata.iterrows(), total=len(metadata)):
    gc.collect()
    
    with open(os.path.join(
        path_to_root, 
        "data", 
        "interim", 
        "Analysis",
        f"{timestamp}_motion{motion_version}_pose{pose_version}", 
        metadata_row["filename"].split(".")[0] + "_analysis_info.json"
        )) as f:
        analysis_info = json.load(f)
    
    preprocessing_params = {
        "face_width_aim": 0.155,
        "shoulders_width_aim": 0.35,
        "face_midpoint_to_shoulders_height_aim": 0.275,
        "shoulders_y_aim": 0.52,
        "use_statistic": "mean",
        "use_stationary_frames": True,
        "skip_stationary_frames": False,
        "start_frame": analysis_info['motion_analysis']['start_frame'],
        "end_frame": analysis_info['motion_analysis']['end_frame'],
    }

    preprocessor = Preprocessor(
            metadata_row,
            preprocessing_params,
            path_to_root,
            preprocess_version=preprocessing_version,
            verbose=False,
            save_intermediate=True,
        )

    preprocessor.preprocess_landmarks()
    # preprocessor.preprocess_video()
    

        
    # Force garbage collection after each video
    gc.collect()

  0%|          | 0/150 [00:00<?, ?it/s]

In [11]:
nf = [
                    0,
                    1,
                    2,
                    3,
                    4,
                    29,
                    31,
                    32,
                    33,
                    34,
                    35,
                    36
                ]

In [12]:
nf

[0, 1, 2, 3, 4, 29, 31, 32, 33, 34, 35, 36]

In [13]:
ns = []

In [17]:
            # Find sequences of consecutive None frames
none_sequences = []
seq_start = nf[0]
prev_frame = nf[0]
for frame in nf[1:] + [nf[-1] + 2]:  # Add sentinel value
    if frame != prev_frame + 1:
        # End of sequence
        none_sequences.append((seq_start, prev_frame))
        seq_start = frame
    prev_frame = frame

In [18]:
none_sequences

[(0, 4), (29, 29), (31, 36)]