In [1]:
import os
import json
import numpy as np
import torch
import random
import glob
import time
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from scipy.spatial import distance
from tqdm import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
import cv2
from ultralytics import YOLO
import mediapipe as mp

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
# 랜드마크 인덱스 정의 (예: 코, 왼쪽 어깨, 오른쪽 어깨 등)
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]  # 총 11개 랜드마크
DR = 'E' # 프로젝트 폴더가 있는 드라이브
MODEL_INPUT = {
    'bbox_xyxy': True,
    'bbox_ratio': True,
    'bbox_class': True,
    'head_torso_speed': True,
}

VID_RESOLUTION = (3840, 2160)
SEQUENCE_LEN = 3
YOLO_PT_PATH = DR + r':\project\CVProject\best.pt'
input_size = 22 + MODEL_INPUT['bbox_xyxy']*4 + MODEL_INPUT['bbox_ratio']*1 + MODEL_INPUT['bbox_class']*1 + MODEL_INPUT['head_torso_speed']*1
print('input_size: ', input_size)

# 개인의 경로에 따라서 조정
out_path = DR + r':\project\CVProject\results\result_vid'
trvl_root = DR + r':\addition_yolobbox_json_6'
test_root = DR + r':\project\New_Data\Video\videos'
data_root = DR + r':\addition_yolobbox_json_6'
train_json_folder = f'{data_root}\\train'
train_json_files = [os.path.join(train_json_folder, f) for f in os.listdir(train_json_folder) if f.endswith('.json')]

cls_filename_list = ['N', 'BY', 'FY', 'SY']

yolo_model = YOLO(YOLO_PT_PATH, verbose=False)
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# 클래스 이름 정의
class_names = {0: 'Normal', 1: 'Danger', 2: 'Fall'}

input_size:  29


### yolo, GRU 동시 실행

In [104]:
# 기존의 train, valid에서 사용된 데이터를 제외 & 원하는 개수로 N, BY, SY, FY의 비율이 같도록 샘플 추출
def test_data_sample(test_root, trvl_data_root, test_num):
    except_data_list = list(map(lambda x: x.split('\\')[-1].replace('.json', '.mp4'), glob.glob(f'{trvl_data_root}\\*\\*')))
    print(except_data_list)
    print(len(except_data_list))
    raw_test_list = list(map(lambda x: x.split('\\')[-1], glob.glob(f'{test_root}\\*')))
    print('raw_test_list_len: ', len(raw_test_list))
    test_list = list(set(raw_test_list) - set(except_data_list))
    print('test_list len: ', len(test_list))
    test_dict = {
        x: random.sample([i for i in test_list if i.split('_')[-2] == x], test_num // 4) 
        for x in cls_filename_list
        }
    print('test_dict length: ', [f'{i}: {len(test_dict[i])}' for i in cls_filename_list])
    return test_dict

# 비디오 파일 경로 지정
test_dict = test_data_sample(test_root, trvl_root, 4)
test_list = np.array(list(test_dict.values())).flatten().tolist()
test_list

['02970_L_F_FY_C5.mp4', '02970_L_F_FY_C6.mp4', '02970_L_F_FY_C7.mp4', '02970_L_F_FY_C8.mp4', '00060_H_A_SY_C1.mp4', '00060_H_A_SY_C2.mp4', '00060_H_A_SY_C3.mp4', '00060_H_A_SY_C4.mp4', '00060_H_A_SY_C5.mp4', '00060_H_A_SY_C6.mp4', '00060_H_A_SY_C7.mp4', '00060_H_A_SY_C8.mp4', '00093_H_A_FY_C1.mp4', '00093_H_A_FY_C2.mp4', '00093_H_A_FY_C3.mp4', '00093_H_A_FY_C4.mp4', '00093_H_A_FY_C5.mp4', '00093_H_A_FY_C6.mp4', '00093_H_A_FY_C7.mp4', '00093_H_A_FY_C8.mp4', '00096_H_A_FY_C1.mp4', '00096_H_A_FY_C2.mp4', '00096_H_A_FY_C3.mp4', '00096_H_A_FY_C4.mp4', '00096_H_A_FY_C5.mp4', '00096_H_A_FY_C6.mp4', '00096_H_A_FY_C7.mp4', '00096_H_A_FY_C8.mp4', '00108_H_A_SY_C1.mp4', '00108_H_A_SY_C2.mp4', '00108_H_A_SY_C3.mp4', '00108_H_A_SY_C4.mp4', '00108_H_A_SY_C5.mp4', '00108_H_A_SY_C6.mp4', '00108_H_A_SY_C7.mp4', '00108_H_A_SY_C8.mp4', '00112_H_A_SY_C1.mp4', '00112_H_A_SY_C2.mp4', '00112_H_A_SY_C3.mp4', '00112_H_A_SY_C4.mp4', '00112_H_A_SY_C5.mp4', '00112_H_A_SY_C6.mp4', '00112_H_A_SY_C7.mp4', '00112_H_A

['01840_Y_A_N_C7.mp4',
 '02375_H_A_BY_C4.mp4',
 '00786_O_E_FY_C8.mp4',
 '01490_O_F_SY_C3.mp4']

In [105]:
test_list = [
 '02429_H_A_N_C5.mp4',
 '00495_H_D_N_C8.mp4',
 '02034_H_A_N_C2.mp4',
 '01136_O_E_N_C3.mp4',
 '00769_O_E_N_C1.mp4',
 '00524_H_D_BY_C4.mp4',
 '00921_O_E_BY_C1.mp4',
 '00860_O_E_BY_C6.mp4',
 '01659_Y_E_BY_C7.mp4',
 '01095_O_E_BY_C4.mp4',
 '02746_H_A_FY_C5.mp4',
 '00561_H_D_FY_C4.mp4',
 '00669_H_D_FY_C2.mp4',
 '00570_H_D_FY_C1.mp4',
 '00297_H_A_FY_C3.mp4',
 '01919_Y_E_SY_C8.mp4',
 '02463_H_A_SY_C4.mp4',
 '00022_H_A_SY_C3.mp4',
 '01809_Y_A_SY_C3.mp4',
 '02301_H_A_SY_C5.mp4'
 ] # 하나의 test 데이터셋으로 계속 사용할 경우:

In [None]:
# GRU 모델 정의 및 로드 
class GRUModel(torch.nn.Module):
    def __init__(self, input_size=input_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size = 64
        self.num_layers = num_layers = 2
        self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True,
                          dropout=0.5)
        self.fc = nn.Linear(hidden_size, 3)  # output_size를 직접 지정합니다.
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out
    
class GRU_JKH(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=2, num_classes=3):
        super(GRU_JKH, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = torch.zeros(2, x.size(0), 128).to(x.device)  # 초기 은닉 상태 정의
        out, _ = self.gru(x, h_0)
        out = self.fc(out[:, -1, :])  # 마지막 time step의 출력을 사용
        return out
    
class GRU_deep(nn.Module):
    def __init__(self, input_size):
        super(GRU_deep, self).__init__()
        self.gru1 = nn.GRU(input_size=input_size, hidden_size=128, num_layers=2, batch_first=True)
        self.gru2 = nn.GRU(input_size=128, hidden_size=256, num_layers=2, batch_first=True)
        self.gru3 = nn.GRU(input_size=256, hidden_size=512, num_layers=2, batch_first=True)
        self.dropout1 = nn.Dropout(0.1)
        self.gru4 = nn.GRU(input_size=512, hidden_size=256, num_layers=2, batch_first=True)
        self.gru5 = nn.GRU(input_size=256, hidden_size=128, num_layers=2, batch_first=True)
        self.gru6 = nn.GRU(input_size=128, hidden_size=64, num_layers=2, batch_first=True)
        self.dropout2 = nn.Dropout(0.1)
        self.gru7 = nn.GRU(input_size=64, hidden_size=32, num_layers=2, batch_first=True)
        self.fc = nn.Linear(32,2)

    def forward(self, x) :
        x, _ = self.gru1(x)
        x, _ = self.gru2(x)
        x, _ = self.gru3(x)
        x = self.dropout1(x)
        x, _ = self.gru4(x)
        x, _ = self.gru5(x)
        x, _ = self.gru6(x)
        x = self.dropout2(x)
        x, _ = self.gru7(x)
        x = self.fc(x[:,-1,:]) # x[배치 크기, 시퀀스 길이, 은닉 상태 크기], [:, -1, :] -> 마지막 시간 단계만 선택

        return x


def calculate_head_upper_body_speed(keypoints, prev_keypoints, x1, y1, x2, y2):
    h = np.array([keypoints[0, 0], keypoints[0, 1]])   # 머리 좌표
    l = np.array([keypoints[11, 0], keypoints[11, 1]])  # 왼쪽 어깨 좌표
    r = np.array([keypoints[12, 0], keypoints[12, 1]])  # 오른쪽 어깨 좌표

    # 이전 프레임의 좌표
    prev_h = np.array([prev_keypoints[0, 0], prev_keypoints[0, 1]])
    prev_l = np.array([prev_keypoints[11, 0], prev_keypoints[11, 1]])
    prev_r = np.array([prev_keypoints[12, 0], prev_keypoints[12, 1]])

    # 현재 프레임과 이전 프레임의 상체 중심 계산
    center_new = (h + l + r) / 3
    center_prev = (prev_h + prev_l + prev_r) / 3

    # 유클리드 거리 계산 (속도)
    speed = distance.euclidean(center_new, center_prev)
    return speed


def detect_yolo(frame):
    # 비디오 프레임을 YOLO 입력 크기로 리사이즈
    # resized_frame = cv2.resize(frame, (640, 640))
    # YOLO를 사용하여 바운딩 박스 예측
    results = yolo_model(frame, verbose=False)
    # YOLO 예측 결과에서 바운딩 박스 정보 가져오기 
    bbox_info = results[0].boxes.xyxy.cpu().numpy() if results and len(results[0].boxes) > 0 else None
    
    if bbox_info is None or len(bbox_info) == 0:
       print("No bounding boxes detected.")
       return None, None, None, None, None, None

    # 첫 번째 바운딩 박스 정보 가져오기 (여러 개가 있을 경우 첫 번째만 사용)
    x1, y1, x2, y2 = bbox_info[0]

    # print(int(x1), int(y1), int(x2), int(y2))
    # print(f"Transformed coordinates: {(x1, y1, x2, y2)}")
    return int(x1), int(y1), int(x2), int(y2), results, bbox_info


def process_landmarks(landmarks): 
    selected_landmarks = landmarks[LANDMARKS]   # 지정된 랜드마크 선택 
    return selected_landmarks[:, :2].flatten()   # (x,y) 좌표 반환


def detect_fall_input(landmarks, prev_landmarks, x1, y1, x2, y2, width, height):
    speed = calculate_head_upper_body_speed(landmarks, prev_landmarks, x1, y1, x2, y2) if prev_landmarks is not None else 0
    processed_landmarks = process_landmarks(landmarks)

    bbox_xyxy = [x1 / width, 
                 y1 / height, 
                 x2 / width, 
                 y2 / height]
    

    bbox_width = x2 - x1 
    bbox_height = y2 - y1  
   
    bbox_ratio = bbox_width / bbox_height if bbox_height != 0 else float('inf')
   
    # 클래스 결정 
    if bbox_ratio < 0.7:
       bbox_class = 0   # N
    elif 0.7 <= bbox_ratio < 0.8:
       bbox_class = 1   # D 
    else:
       bbox_class = 2   # F

    # 입력 데이터 구성 
    input_data = list(processed_landmarks)
    
    if MODEL_INPUT['bbox_xyxy'] == True:
        input_data.extend(bbox_xyxy)
    if MODEL_INPUT['bbox_ratio'] == True:
        input_data.append(bbox_ratio)
    if MODEL_INPUT['bbox_class'] == True:
        input_data.append(bbox_class)
    if MODEL_INPUT['head_torso_speed'] == True:
        input_data.append(speed)
    if len(input_data) != input_size:
       print(f"Warning: input_data length is {len(input_data)}, expected {input_size}")
       return None
    return input_data


def detect_fall(input_data, isfit):
    if isfit == True:
        input_data = fit_test(input_data, base_data)
    input_tensor = torch.FloatTensor(input_data).unsqueeze(0)

    with torch.no_grad():
       output = gru_model(input_tensor)

    probabilities=torch.softmax(output , dim=1).cpu().numpy()[0]  
    predicted_class=torch.argmax(output).item()
    
    return predicted_class , probabilities

In [116]:
def calculate_head_upper_body_speed1(keypoints, prev_keypoints, bbox):
    h = np.array([keypoints['landmark_0']['x'], keypoints['landmark_0']['y']])
    l = np.array([keypoints['landmark_11']['x'], keypoints['landmark_11']['y']])
    r = np.array([keypoints['landmark_12']['x'], keypoints['landmark_12']['y']])
    
    prev_h = np.array([prev_keypoints['landmark_0']['x'], prev_keypoints['landmark_0']['y']])
    prev_l = np.array([prev_keypoints['landmark_11']['x'], prev_keypoints['landmark_11']['y']])
    prev_r = np.array([prev_keypoints['landmark_12']['x'], prev_keypoints['landmark_12']['y']])
    
    center_new = (h + l + r) / 3
    center_prev = (prev_h + prev_l + prev_r) / 3
    speed = distance.euclidean(center_new, center_prev)

    return speed


def bbox_ratio(bbox):
    w = bbox['x2'] - bbox['x1']
    h = bbox['y2'] - bbox['y1']
    return h / w if w != 0 else 0


def bbox_ratio_class(ratio):
    if ratio < 0.7:
        return 0  # Normal
    elif 0.7 <= ratio < 0.8:
        return 1
    else:
        return 2  # Fall


def val_dataset_for_fit(json_files):
    sequence_length = SEQUENCE_LEN
    sequences = []

    for json_file in tqdm(json_files, desc="Processing JSON files"):
        try:
            with open(json_file, 'r') as f:
                data = json.load(f)
            
            if len(data['pose_data']) < 90:
                continue
            
            frames = list(data['pose_data'].values())
            fall_start = data.get('fall_start_frame', None)
            fall_end = data.get('fall_end_frame', float('inf'))
            
            for i in range(0, len(frames) - sequence_length + 1, sequence_length):
                sequence = frames[i:i+sequence_length]
                landmarks = []

                if i != 0:
                    frame_prev = frames[i-1]
                else:
                    frame_prev = 0
                for j, frame in enumerate(sequence):
                    frame_landmarks = []
                    for landmark in LANDMARKS:
                        if f'landmark_{landmark}' not in frame:
                            print(f"Missing landmark {landmark} in frame")
                            continue
                        frame_landmarks.extend([
                            frame[f'landmark_{landmark}']['x'],
                            frame[f'landmark_{landmark}']['y']
                        ])
                    landmark
                    bbox = frame.get('bbox')
                    if bbox:
                        bbox_xyxy = [
                            bbox['x1'], bbox['y1'], bbox['x2'], bbox['y2']
                        ]
                        b_ratio = bbox_ratio(bbox)
                        bbox_class = bbox_ratio_class(b_ratio)
                    else:
                        bbox_xyxy = [0, 0, 1, 1]
                        bbox_class = 0

                    if j > 0:
                        head_torso_speed = calculate_head_upper_body_speed1(sequence[j], sequence[j-1], bbox)
                    # sequence 내의 첫 frame이라 이전의 frame이 sequence 내에 없을 경우: 이전 sequence의 마지막 프레임을 가져와서 속도 계산
                    elif j == 0 and frame_prev != 0:
                        head_torso_speed = calculate_head_upper_body_speed1(sequence[j], frame_prev, bbox)
                    else:
                        head_torso_speed = 0
                    # frame_landmarks = scaler.fit_transform(frame_landmarks)
                    # MODEL_INPUT_DATA_TYPE에 따라 설정
                    if MODEL_INPUT['bbox_xyxy'] == True:
                        # bbox_xyxy = scaler.fit_transform(bbox_xyxy)
                        frame_landmarks.extend(bbox_xyxy)
                    if MODEL_INPUT['bbox_ratio'] == True:
                        frame_landmarks.append(b_ratio)
                    if MODEL_INPUT['bbox_class'] == True:
                        frame_landmarks.append(bbox_class)
                    if MODEL_INPUT['head_torso_speed'] == True:
                        frame_landmarks.append(head_torso_speed)

                    landmarks.append(frame_landmarks)              

                sequences.extend(landmarks)

        except Exception as e:
            print(f"Error processing file {json_file}: {e}")
            continue
    if not sequences:
        raise ValueError("No valid sequences found in the dataset")
    return sequences

def fit_test(test_input, all_landmarks):
    all_landmarks.extend(test_input)
    all_landmarks_scaled = StandardScaler().fit_transform(np.array(all_landmarks))
    # print(all_landmarks_scaled[-SEQUENCE_LEN:])
    return all_landmarks_scaled[-SEQUENCE_LEN:]


### test 영상 재생 사용법
* test 영상은 자동으로 저장됨. 한 영상의 재생이 끝나면 'w'키를 눌러 저장
* 사용 가능 키:
    * q: test 종료
    * n: 현재 영상 종료. 영상 저장 여부 선택 후 다음 영상 재생
    * p: 이전 영상 재생
    * Space: 현재 영상 일시정지 / 재생
    * r: 현재 영상 처음부터 다시 재생
    * <, >: 1초 전/ 후로 이동
    

In [120]:
# GRU 모델 로드
end_test = 0
input_list = []
gru_pt_path = DR + r':\project\CVProject\results\result_pt_KDH\mediapipe_sensordata_bbox_ratio_speed.pt'

fit_scanf = input('use StandardScaler().fit_transform()? Y/other: ')
if fit_scanf == 'Y' or fit_scanf == 'y':
    isfit = True
    x_normalization = ''
    base_data = val_dataset_for_fit(train_json_files)
else: 
    isfit = False
    x_normalization = 'except_normalization_'
    gru_pt_path = gru_pt_path.replace('.pt', '_except_normalization.pt')
    
autosave_scanf = input('autosave all videos? Y/other: ')
if autosave_scanf.lower() == 'y':
    autosave = True
else:
    autosave = False

gru_model = GRUModel(input_size=input_size)
print(gru_pt_path)
gru_model.load_state_dict(torch.load(gru_pt_path)) # , map_location=torch.device('cpu')
gru_model.eval()

vid_idx = 0
auto_wkey = False
while True:
    if vid_idx == len(test_list): break
    video_path = test_list[vid_idx]
    print(vid_idx, video_path)
    # 비디오 파일 열기
    cap = cv2.VideoCapture(test_root + '\\' + video_path)
    vid_name = video_path.split('\\')[-1]

    # 비디오 속성 가져오기
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    window_w = int(width * 0.3)
    window_h = int(height * 0.3)

    cv2.namedWindow('Fall Detection', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('Fall Detection', window_w, window_h)
    fps = cap.get(cv2.CAP_PROP_FPS)

    # 출력 비디오 설정
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter(f'{out_path}\\test_temp.mp4', fourcc, fps // 6, (1920, 1080))

    confidence_threshold = 0.3

    previous_bbox = None
    previous_label = None
    frame_cnt = 0
    prev_landmarks=None
    label = 0
    probs = []
    # 프레임 처리 루프 
    while cap.isOpened():
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_cnt)
        ret, frame = cap.read()
        if not ret:
            break

        x1, y1, x2, y2, results, bbox_info = detect_yolo(frame)
        if x1 == None: 
            frame_cnt += 6
            continue
        bx1, by1, bx2, by2 = (
            max(int(x1 - (x2 - x1)*0.3), 0),
            max(int(y1 - (y2 - y1)*0.3), 0),
            min(int(x2 + (x2 - x1)*0.3), 3840),
            min(int(y2 + (y2 - y1)*0.3), 2160),
        )
        # if x1 == None: continue
        frame_bbox = frame[by1:by2, bx1:bx2]
        results_pose = pose.process(frame_bbox)

        if results_pose.pose_landmarks:
            landmarks = np.array([[(lm.x * (bx2 - bx1) + bx1) / width, (lm.y * (by2 - by1) + by1) / height, lm.z] for lm in results_pose.pose_landmarks.landmark])

            if prev_landmarks is not None:
                input_list.append(detect_fall_input(landmarks, prev_landmarks, x1, y1, x2, y2, width, height))
                if len(input_list) >= SEQUENCE_LEN:
                    result = detect_fall(input_list[-SEQUENCE_LEN:], isfit)
                    if result is not None:  
                        label, probs = result
                        probs = [float(x) for x in probs]
                        # print(f"Predicted Class: {label}, Probabilities: {probs}") 
                    else:
                        print("Detection failed.")
            else: 
                label = None 

            prev_landmarks = landmarks 
            # 랜드마크 표시 
            mp_drawing.draw_landmarks(
                frame_bbox, results_pose.pose_landmarks , mp_pose.POSE_CONNECTIONS, 
                mp_drawing.DrawingSpec(color=(245,117,66), thickness=6, circle_radius=4), 
                mp_drawing.DrawingSpec(color=(245,66,230), thickness=6, circle_radius=4)
                )
        else:
            label = None

        # 바운딩 박스와 라벨 그리기 
        color = (0 ,255 ,0) if label == 0 else ((255, 255, 0) if label == 2 else (255, 0, 0)) 
        cv2.rectangle(frame , (x1, y1), (x2, y2), color , 5)
        class_name = class_names[label] if label is not None else 'Unknown'
        cv2.putText(frame , f'{vid_name}, frame {frame_cnt}', (int(window_w*0.15), int(window_h*0.18)), cv2.FONT_HERSHEY_SIMPLEX, 4.5, (10, 10, 10), 5)
        cv2.putText(frame , f'GRU: {class_name, [f"{x:.3}" for x in probs]}', (int(x1), int(y1) -10), cv2.FONT_HERSHEY_SIMPLEX, 2, color, 4)

        # print("YOLO results:", results[0].boxes.xyxy.cpu().numpy())
        # print("Classes:", results[0].boxes.cls.cpu().numpy())
        # print("Confidences:", results[0].boxes.conf.cpu().numpy())

        # 프레임 저장 및 출력 
        resized_frame = cv2.resize(frame, (1920, 1080))
        out.write(resized_frame)
        cv2.imshow('Fall Detection', frame)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            out.release()
            os.remove(f'{out_path}\\test_temp.mp4')
            end_test = 1
            break
        elif key == ord('n'):
            frame_cnt = 594
        elif key == ord('p'):
            vid_idx -= 1
            break
        elif key == 0x20:
            print('paused')
            time.sleep(1)
            while True:
                pkey = cv2.waitKey() & 0xFF
                if pkey == 0x20:
                    print('resume')
                    break
                else:
                    continue
        elif key == ord('r'):
            frame_cnt = 0
            print('repeat')
            continue
        elif key == ord('.'):
            frame_cnt += 60
            continue
        elif key == ord(','):
            frame_cnt -= 60
            continue
        frame_cnt += 6
        if frame_cnt == 600:
            if autosave == True:
                out.release()
                os.rename(f'{out_path}\\test_temp.mp4', f'{out_path}\\{x_normalization}{vid_name}')
                continue
            print('video finished. press w to save video. Press a to autosave all video')
            wkey = cv2.waitKey() & 0xFF
            while True:
                if wkey == ord('w'):
                    out.release()
                    os.rename(f'{out_path}\\test_temp.mp4', f'{out_path}\\{x_normalization}{vid_name}')
                    break
                else:
                    out.release()
                    os.remove(f'{out_path}\\test_temp.mp4')
                    break

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    vid_idx += 1
    if end_test == 1: break

E:\project\CVProject\results\result_pt_KDH\mediapipe_sensordata_bbox_ratio_speed_except_normalization.pt
0 02429_H_A_N_C5.mp4
1 00495_H_D_N_C8.mp4
2 02034_H_A_N_C2.mp4
3 01136_O_E_N_C3.mp4
4 00769_O_E_N_C1.mp4
5 00524_H_D_BY_C4.mp4
6 00921_O_E_BY_C1.mp4
7 00860_O_E_BY_C6.mp4
8 01659_Y_E_BY_C7.mp4
9 01095_O_E_BY_C4.mp4
10 02746_H_A_FY_C5.mp4
11 00561_H_D_FY_C4.mp4
12 00669_H_D_FY_C2.mp4
13 00570_H_D_FY_C1.mp4
14 00297_H_A_FY_C3.mp4
15 01919_Y_E_SY_C8.mp4
16 02463_H_A_SY_C4.mp4
17 00022_H_A_SY_C3.mp4
18 01809_Y_A_SY_C3.mp4
19 02301_H_A_SY_C5.mp4
