In [2]:
import cv2
import pandas as pd
import os

# Define directories and CSV file
fall_dir = "originals/fall"
root_output_dir = "MMACTION"
csv_file = "Labels.csv"

# Load the fall event data
fall_data = pd.read_csv(csv_file)
print(fall_data)

     Clip Fall Start Fall End
0       1      46.65    49.15
1       2       11.9     14.2
2       3       2.65        5
3       4        0.4      1.4
4       5        0.9     3.15
..    ...        ...      ...
115   116      START      2.1
116   117      START      END
117   118       1.15        2
118   119          1        2
119   120        2.9      END

[120 rows x 3 columns]


In [3]:
import cv2
import os
import pandas as pd
import numpy as np

# Ensure output directory exists
os.makedirs(root_output_dir, exist_ok=True)

# Define processing parameters
frame_count = 50
slide_frames = 10

# Make clip directory
clip_dir = os.path.join(root_output_dir, str(frame_count))
os.makedirs(clip_dir, exist_ok=True)

# Initialize the text file for recording filenames and labels
rdfs_file_path = os.path.join(root_output_dir, f"RFDS_{str(frame_count)}.txt")
# Delete file if it exists to ensure fresh start
if os.path.exists(rdfs_file_path):
    os.remove(rdfs_file_path)

# Process each video in the fall directory
for video_file in os.listdir(fall_dir):
    if video_file.endswith(".mp4"):
        # Extract the clip number from the filename
        clip_number = int(video_file.split('_')[1])

        # Retrieve fall timing for this clip
        fall_row = fall_data[fall_data['Clip'] == clip_number]

        fall_start = fall_row.iloc[0, 1]
        fall_end = fall_row.iloc[0, 2]

        # Handle "START" and "END" values
        if fall_start == "START":
            fall_start = 0.0
        else:
            fall_start = float(fall_start)

        video_path = os.path.join(fall_dir, video_file)
        cap = cv2.VideoCapture(video_path)

        # Retrieve video properties
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_size = (frame_width, frame_height)
        video_duration = total_frames / fps

        if fall_end == "END":
            fall_end = float(video_duration)
        else:
            fall_end = float(fall_end)

        clip_index = 0  # Counter for clips
        start_frame = 0  # Initial start frame

        while start_frame + frame_count <= total_frames:
            # Check if the clip contains a fall
            clip_start_time = float(start_frame / fps)
            clip_end_time = float((start_frame + frame_count) / fps)
            has_fall = not (clip_end_time <= fall_start or clip_start_time >= fall_end)
            label = "FALL" if has_fall else "NO"
            label_value = 1 if has_fall else 0  # Numeric label for the text file

            output_filename = f"{clip_number:03}_{clip_index}_{label}.mp4"
            output_path = os.path.join(clip_dir, output_filename)

            # Set the starting frame
            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

            # Initialize the VideoWriter
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for .mp4 format
            writer = cv2.VideoWriter(output_path, fourcc, fps, frame_size)

            for _ in range(frame_count):
                ret, frame = cap.read()
                if not ret:
                    print(f"Error reading frame {start_frame + _} from video.")
                    break
                writer.write(frame)

            writer.release()

            # Write to RDFS_train.txt
            with open(rdfs_file_path, "a") as rdfs_file:  # Append mode
                rdfs_file.write(f"{output_filename} {label_value}\n")

            print(f"Saved: {output_filename}")

            # Update frame index for sliding window
            start_frame += slide_frames
            clip_index += 1
        # Process the final clip
        if start_frame < total_frames:
            remaining_frames = total_frames - start_frame
            padding_frames = frame_count - remaining_frames

            # Set the starting frame
            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

            # Initialize the VideoWriter
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for .mp4 format
            output_filename = f"{clip_number:03}_{clip_index}_NO.mp4"
            output_path = os.path.join(clip_dir, output_filename)
            writer = cv2.VideoWriter(output_path, fourcc, fps, frame_size)

            for _ in range(remaining_frames):
                ret, frame = cap.read()
                if not ret:
                    print(f"Error reading frame {start_frame + _} from video.")
                    break
                writer.write(frame)

            # Pad with black frames
            for _ in range(padding_frames):
                black_frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
                writer.write(black_frame)

            writer.release()

            # Write to RDFS_train.txt
            with open(rdfs_file_path, "a") as rdfs_file:  # Append mode
                rdfs_file.write(f"{output_filename} 0\n")

            print(f"Saved: {output_filename}")
        # Release the VideoCapture after processing the entire video
        cap.release()


Saved: 002_0_NO.mp4
Saved: 002_1_NO.mp4
Saved: 002_2_NO.mp4
Saved: 002_3_NO.mp4
Saved: 002_4_NO.mp4
Saved: 002_5_NO.mp4
Saved: 002_6_NO.mp4
Saved: 002_7_NO.mp4
Saved: 002_8_NO.mp4
Saved: 002_9_NO.mp4
Saved: 002_10_NO.mp4
Saved: 002_11_NO.mp4
Saved: 002_12_NO.mp4
Saved: 002_13_NO.mp4
Saved: 002_14_NO.mp4
Saved: 002_15_NO.mp4
Saved: 002_16_NO.mp4
Saved: 002_17_NO.mp4
Saved: 002_18_NO.mp4
Saved: 002_19_NO.mp4
Saved: 002_20_NO.mp4
Saved: 002_21_NO.mp4
Saved: 002_22_NO.mp4
Saved: 002_23_NO.mp4
Saved: 002_24_NO.mp4
Saved: 002_25_FALL.mp4
Saved: 002_26_FALL.mp4
Saved: 002_27_FALL.mp4
Saved: 002_28_FALL.mp4
Saved: 002_29_FALL.mp4
Saved: 002_30_FALL.mp4
Saved: 002_31_FALL.mp4
Saved: 002_32_FALL.mp4
Saved: 002_33_NO.mp4
Saved: 006_0_FALL.mp4
Saved: 006_1_FALL.mp4
Saved: 006_2_NO.mp4
Saved: 006_3_NO.mp4
Saved: 006_4_NO.mp4
Saved: 006_5_NO.mp4
Saved: 006_6_NO.mp4
Saved: 006_7_NO.mp4
Saved: 006_8_NO.mp4
Saved: 006_9_NO.mp4
Saved: 006_10_NO.mp4
Saved: 006_11_NO.mp4
Saved: 006_12_NO.mp4
Saved: 006_13

In [6]:
# Count files in each directory
fall_count = len([f for f in os.listdir(clip_dir) if 'FALL' in f])
no_count = len([f for f in os.listdir(clip_dir) if 'NO' in f])

print(f"Fall count: {fall_count}")
print(f"No count: {no_count}")

Fall count: 721
No count: 721


In [5]:
import os
import random

# Balance dataset
import os
import random

# Get lists of files with 'FALL' and 'NO' in their names
fall_files = [f for f in os.listdir(clip_dir) if 'FALL' in f]
no_files = [f for f in os.listdir(clip_dir) if 'NO' in f]

# Calculate the difference in lengths
diff = len(no_files) - len(fall_files)

# If there are more 'NO' files, randomly remove some
if diff > 0:
    files_to_remove = random.sample(no_files, diff)
    for file in files_to_remove:
        # Remove the file
        os.remove(os.path.join(clip_dir, file))
        print(f"Removed file: {file}")

    # Remove the corresponding annotations from RDFS_{frame_count}.txt
    annotation_file = os.path.join(root_output_dir, f"RFDS_{frame_count}.txt")
    with open(annotation_file, 'r') as f:
        lines = [line for line in f.readlines() if not any(file in line for file in files_to_remove)]
    with open(annotation_file, 'w') as f:
        f.writelines(lines)

Removed file: 016_12_NO.mp4
Removed file: 020_6_NO.mp4
Removed file: 001_106_NO.mp4
Removed file: 025_20_NO.mp4
Removed file: 014_1_NO.mp4
Removed file: 020_25_NO.mp4
Removed file: 091_5_NO.mp4
Removed file: 037_3_NO.mp4
Removed file: 001_172_NO.mp4
Removed file: 052_1_NO.mp4
Removed file: 002_18_NO.mp4
Removed file: 024_16_NO.mp4
Removed file: 005_25_NO.mp4
Removed file: 021_17_NO.mp4
Removed file: 001_134_NO.mp4
Removed file: 053_8_NO.mp4
Removed file: 071_1_NO.mp4
Removed file: 093_8_NO.mp4
Removed file: 001_4_NO.mp4
Removed file: 002_3_NO.mp4
Removed file: 081_3_NO.mp4
Removed file: 005_34_NO.mp4
Removed file: 001_26_NO.mp4
Removed file: 011_22_NO.mp4
Removed file: 027_5_NO.mp4
Removed file: 032_6_NO.mp4
Removed file: 065_27_NO.mp4
Removed file: 001_97_NO.mp4
Removed file: 001_156_NO.mp4
Removed file: 012_25_NO.mp4
Removed file: 030_23_NO.mp4
Removed file: 001_157_NO.mp4
Removed file: 023_18_NO.mp4
Removed file: 100_16_NO.mp4
Removed file: 023_12_NO.mp4
Removed file: 090_10_NO.mp4


In [7]:
import random
import shutil

# Define the split ratio
train_ratio=0.8
clip_dir = os.path.join(root_output_dir, str(frame_count))
train_dir = os.path.join(root_output_dir, f"{frame_count}_train")
validation_dir = os.path.join(root_output_dir, f"{frame_count}_validation")
train_txt_path = os.path.join(root_output_dir, f"{frame_count}_train.txt")
validation_txt_path = os.path.join(root_output_dir, f"{frame_count}_validation.txt")

# Ensure output directories exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)

# Load RDFS_{frame_count}.txt
rdfs_file_path = os.path.join(root_output_dir, f"RFDS_{frame_count}.txt")

with open(rdfs_file_path, "r") as rdfs_file:
    lines = rdfs_file.readlines()

random.shuffle(lines)
split_index = int(len(lines) * train_ratio)
train_lines = lines[:split_index]
validation_lines = lines[split_index:]

# Write annotations and move files
with open(train_txt_path, "w") as train_file, open(validation_txt_path, "w") as validation_file:

    # Process training files
    for line in train_lines:
        filename, label = line.strip().split()
        source_path = os.path.join(clip_dir, filename)
        target_path = os.path.join(train_dir, filename)
        shutil.move(source_path, target_path)
        train_file.write(f"{filename} {label}\n")

    # Process validation files
    for line in validation_lines:
        filename, label = line.strip().split()
        source_path = os.path.join(clip_dir, filename)
        target_path = os.path.join(validation_dir, filename)
        shutil.move(source_path, target_path)
        validation_file.write(f"{filename} {label}\n")

print(f"Split complete. Training files in {train_dir}, validation files in {validation_dir}.")
print(f"Training annotations saved to {train_txt_path}. Validation annotations saved to {validation_txt_path}.")

Split complete. Training files in MMACTION/50_train, validation files in MMACTION/50_validation.
Training annotations saved to MMACTION/50_train.txt. Validation annotations saved to MMACTION/50_validation.txt.
