# Extract Salient Frame Poses into a CSV File (Mimic PoseRAC's pre_train.py)

In [10]:
import numpy as np
import pandas as pd
import glob
import os
import sys
import deeplabcut
import cv2
from tqdm.auto import tqdm

In [47]:
# System alterations

# Set the print options to avoid truncation
np.set_printoptions(threshold=1000) # Default threshold is 1000

In [4]:
# Note that parameters of this project can be seen at: *openfield-Pranav-2018-10-30/config.yaml*
PATH_CONFIG_FILE = os.path.join(os.getcwd(), 'examples', 'CowBytes-Single-Sadat-2024-06-30', 'config.yaml')
deeplabcut.load_demo_data(PATH_CONFIG_FILE)

This is not an official demo dataset.
Loaded, now creating training data...
The training dataset is successfully created. Use the function 'train_network' to start training. Happy training!


In [60]:
# Define extracted destination directory and video file paths [EDIT HERE]
VIDEO_DIR = os.path.join(os.getcwd(), 'examples', 'BiteCountA_pose', 'video', 'train')
FRAMES_CSV_FILE = os.path.join(os.getcwd(), 'examples', 'BiteCountA_pose', 'annotation', 'pose_train.csv')
EXTRACTED_DEST_DIR = os.path.join(os.getcwd(), 'examples', 'BiteCountA_pose', 'extracted')
POSE_DEST_DIR = os.path.join(os.getcwd(), 'examples', 'BiteCountA_pose', 'annotation_pose')

SUBSETS = ['train']
CLASSES = ['cow_bite']
SALIENTS = ['salient1', 'salient2']

In [7]:
# Create directories for each subset, class, and salient
os.makedirs(EXTRACTED_DEST_DIR, exist_ok=True)
for subset in SUBSETS:
    subset_dir = os.path.join(EXTRACTED_DEST_DIR, subset)
    os.makedirs(subset_dir, exist_ok=True)

    for class_ in CLASSES:
        class_dir = os.path.join(subset_dir, class_)
        os.makedirs(class_dir, exist_ok=True)

        for salient in SALIENTS:
            salient_dir = os.path.join(class_dir, salient)
            os.makedirs(salient_dir, exist_ok=True)

In [64]:
# Create video directories under every salient directory
df = pd.read_csv(FRAMES_CSV_FILE)
videos = glob.glob(os.path.join(VIDEO_DIR, '**', '*.mp4'), recursive=True)

for video in tqdm(videos, desc="Extracting frames from videos", position=0, leave=True):
    video_name = os.path.basename(video)
    video_record = df[df["name"] == video_name]
    video_class = video_record['type'].iloc[0]
    video_data = video_record.filter(regex='^L').values.flatten()

    # Create video directories in salient directories
    for subset in SUBSETS:
        for class_ in CLASSES:
            if class_ == video_class:
                for salient in SALIENTS:
                    video_dir = os.path.join(EXTRACTED_DEST_DIR, subset, class_, salient, video_name)
                    os.makedirs(video_dir, exist_ok=True)

    # Extract salient images to store in video directories
    cap = cv2.VideoCapture(video)
    salient_counts = {salient: 0 for salient in SALIENTS}
    for i, frame_num in enumerate(video_data):
        if not pd.isna(frame_num):
            cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_num))
            ret, frame = cap.read()
            if ret:
                salient_index = i % len(SALIENTS)
                salient_dir = SALIENTS[salient_index]
                image_file = os.path.join(EXTRACTED_DEST_DIR, subset, video_class, salient_dir, video_name, f"{salient_counts[salient_dir]}.jpg")
                salient_counts[salient_dir] += 1
                cv2.imwrite(image_file, frame)
    cap.release()
    tqdm.write(f"Proccessed video: {video_name}")

Extracting frames from videos:   0%|          | 0/63 [00:00<?, ?it/s]

Proccessed video: 743a064e5f68f7acd214bbf9df289c9d6fa16e4f851e321fdfb8a07b80dde08a_2.mp4
Proccessed video: 4a639247a4b687c6ae226f986414241864b4174f8e30190b62ad4821e5c3f875_1.mp4
Proccessed video: d136ae29ffec48fd62ef8dbf80878dfece480fdbfcb72855a780cdcd03f0d08e_1.mp4
Proccessed video: 6f9416bb9789f27385491d3b308c52f2c5a441038301fcb9f5fe4df5a60b0a45_1.mp4
Proccessed video: d40739c2aa4b801a2eda2bbb8a554a6ff85e3fc331b719fb52af72bf43224086_3.mp4
Proccessed video: 4fc0428ba0f9882718fb5d7e4196369d2af69c153d42de9717a25a3919782d80_2.mp4
Proccessed video: 0739b5b782dcfd787ebd280dbe01b1118380b41a717750235d98e6d4a15165c6_1.mp4
Proccessed video: 2f4338b600baddf8b021162744427b9faaf7ec28230638ceeffadbfafd86ba44_1.mp4
Proccessed video: 9f06f158766658820031cf3f32a755a1403bcbcb696ce9febc42105f31f4fa46_2.mp4
Proccessed video: 4ff8a74c52c529eceb817a1d89e76a3ae053583c2abc491ecacbe560484b2e7e_3.mp4
Proccessed video: 9c7cbf325c3227806845c6b47c600d381965a61f9d6f0e578303bd40821681e4_1.mp4
Proccessed video: 284

In [None]:
# Extract poses into CSV files
for subset in SUBSETS:
    for class_ in CLASSES:
        for salient in SALIENTS:
            video_dirs = glob.glob(os.path.join(EXTRACTED_DEST_DIR, subset, class_, salient, '*'))
            for video_dir in video_dirs:
                deeplabcut.analyze_time_lapse_frames(PATH_CONFIG_FILE, video_dir, frametype='.jpg', save_as_csv=True)

In [58]:
# Process CSV files into new format
csv_files = glob.glob(os.path.join(EXTRACTED_DEST_DIR, '**', '*.csv'), recursive=True)
for csv_file in csv_files:
    # Process CSV file
    df = pd.read_csv(csv_file, header=None, index_col=None)
    df = df.drop([0, 1, 2]).reset_index(drop=True)
    df.iloc[:, 1:] = df.iloc[:, 1:].astype(np.float32)
    df.iloc[:, 3::3] = 0 

    # Get relative directory and class
    rel_video_dir = os.path.relpath(os.path.dirname(csv_file), EXTRACTED_DEST_DIR)
    image_class = rel_video_dir.split(os.sep)[1]

    # Format new rows
    df.insert(0, 'image_path', df[0].apply(lambda image_file: os.path.join(rel_video_dir, image_file)))
    df.insert(1, 'class', image_class)
    df.drop(columns=[0], inplace=True)
    df.to_csv(csv_file, index=False, header=False)

In [62]:
# Combine CSV files into annotation_pose destination directory
os.makedirs(POSE_DEST_DIR, exist_ok=True)
csv_files = glob.glob(os.path.join(EXTRACTED_DEST_DIR, '**', '*.csv'), recursive=True)

combined_data = []
for csv_file in csv_files:
    df = pd.read_csv(csv_file, header=None, index_col=None)
    combined_data.append(df)

combined_df = pd.concat(combined_data, ignore_index=True)
combined_df.to_csv(os.path.join(POSE_DEST_DIR, 'train.csv'), index=False, header=False)

In [None]:
# Remove redundant files in video directories
files = glob.glob(os.path.join(EXTRACTED_DEST_DIR, '**', '*.*'), recursive=True)

# Delete non-JPG files
for file in files:
    if not file.endswith('.jpg'):
        try:
            os.remove(file)
            print(f"Deleted: {file}")
        except Exception as e:
            print(f"Error deleting {file}: {e}")