In [37]:
import pandas as pd
import os

In [38]:

# # concatenate all data tables into one
# df_uhw = pd.read_csv('data_tables/uhw_data.csv')
# df_medway = pd.read_csv('data_tables/MFT_data.csv')
# df_lus = pd.read_csv('data_tables/LUS_data.csv')
# df_lus = df_lus.drop(columns=['Read Error'], errors='ignore')  # drop Read Error column if it exists
# df_all = pd.concat([df_uhw, df_medway, df_lus], ignore_index=True)

# # remove final column if it is unnamed
# if 'Unnamed: 0' in df_all.columns:
#     df_all = df_all.drop(columns=['Unnamed: 0'])
    
# df_all.to_csv('data_tables/all_data.csv', index=False)




In [39]:
def crop_mft_videos(frame, video_path):
    crop_coords_df = pd.read_csv('mft_crop_coordinates.csv')

    for index, row in crop_coords_df.iterrows():
        if row['video_path'] in video_path:
            x_1, y_1, x_2, y_2 = row['x_start'], row['y_start'], row['x_end'], row['y_end']
            scan_area_cropped = frame[y_1:y_2, x_1:x_2]
            return scan_area_cropped

In [40]:
def crop_jcuh_videos(frame):
    cropped_scan_area = frame[60:520, 300:620]
    return cropped_scan_area

In [41]:
def crop_uhw_videos(frame, rel_x=0.2938, rel_y=0.1917, rel_w=0.4125, rel_h=0.8083):
    height, width, channels = frame.shape
    x = int(rel_x * width)
    y = int(rel_y * height)
    w = int(rel_w * width)
    h = int(rel_h * height)
    scan_area_cropped = frame[y:y+h, x:x+w]
    return scan_area_cropped

In [42]:
def crop_frame(frame, video_path):
    if 'MFT' in video_path:
        return crop_mft_videos(frame, video_path)
    elif 'JCUH' in video_path:
        return crop_jcuh_videos(frame)
    elif 'UHW' in video_path:
        return crop_uhw_videos(frame)
    else:
        return frame  # return original frame if no cropping rules apply

In [43]:
#  extract 5 frames per second from each video
def extract_frames( video_path, data_frame, fps=5):

    # only extract frames for videos in data_frame where column 'neglect' is not True
    if 'neglect' in data_frame.columns:
        video_record = data_frame[data_frame['File Path'] == video_path]
        if not video_record.empty and video_record.iloc[0]['neglect'] == True:
            return []

    import cv2
    cap = cv2.VideoCapture(video_path)
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(video_fps / fps)
    frames = []
    frame_count = 0

    

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = crop_frame(frame, video_path)
        # resize frame to 300x450
        frame = cv2.resize(frame, (300, 450))
        if frame_count % frame_interval == 0:
            frames.append(frame)
        frame_count += 1

    cap.release()
    return frames

In [44]:
#  save extracted frames to disk
def save_frames(frames, output_dir, base_filename):
    import cv2
    import os
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    for i, frame in enumerate(frames):
        frame_filename = os.path.join(output_dir, f"{base_filename}_frame_{i:04d}.png")
        cv2.imwrite(frame_filename, frame)          

In [45]:
# create data frame from first 10 rows of all_data.csv
df_all = pd.read_csv('data_tables/all_data.csv')
df_sample = df_all.head(10)

# extract and save frames for each video in the sample data frame
for index, row in df_sample.iterrows():
    video_path = row['File Path']
    frames = extract_frames(video_path, df_sample, fps=5)
    base_filename = os.path.splitext(os.path.basename(video_path))[0]
    output_dir = 'output_frames'
    save_frames(frames, output_dir, base_filename)