In [1]:
import os

BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
RAW_VIDEO_DIR = os.path.join(BASE_DIR, 'raw/ravdess')
PROCESSED_IMAGES_DIR = os.path.join(BASE_DIR, 'processed/images')
PROCESSED_AUDIO_DIR = os.path.join(BASE_DIR, 'processed/audio')

os.makedirs(PROCESSED_IMAGES_DIR, exist_ok=True)
os.makedirs(PROCESSED_AUDIO_DIR, exist_ok=True)

In [2]:
emotion_map = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fear',
    '07': 'disgust',
    '08': 'surprised'
}

allowed_emotions = {'angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprised'}


In [3]:
from moviepy import VideoFileClip

def extract_audio_from_video(video_path, output_audio_path):
    clip = VideoFileClip(video_path)
    audio = clip.audio
    if audio is None:
        print(f"Brak ścieżki audio w {video_path}")
        clip.close()
        return
    audio.write_audiofile(output_audio_path, ffmpeg_params=["-ac", "1"], logger=None)
    clip.close()

In [4]:
import numpy as np
from PIL import Image

def extract_frames_from_video(video_path, output_dir, emotion, split, start_frame_index, num_frames=3):
    os.makedirs(output_dir, exist_ok=True)
    clip = VideoFileClip(video_path)
    duration = clip.duration

    frame_times = np.linspace(0, duration, num_frames + 2)[1:-1]

    current_frame_index = start_frame_index
    for idx, t in enumerate(frame_times, 1):
        frame = clip.get_frame(t)
        img = Image.fromarray(frame)
        filename = f"{emotion}-{split}-{current_frame_index:04d}-{idx:02d}.jpg" 
        img.save(os.path.join(output_dir, filename))
    clip.close()

    return current_frame_index + 1

In [5]:
import random
from glob import glob

split_ratio = {'train': 0.7, 'test': 0.2, 'validation': 0.1}

def sort_and_extract_from_video(source_dir, processed_audio_dir, processed_images_dir, split_ratio):
    video_files = glob(os.path.join(source_dir, 'Actor_*', '*.mp4'))
    random.shuffle(video_files)

    total = len(video_files)
    train_count = int(total * split_ratio['train'])
    test_count = int(total * split_ratio['test'])

    splits = {
        'train': video_files[:train_count],
        'test': video_files[train_count:train_count+test_count],
        'validation': video_files[train_count+test_count:]
    }

    counters_audio = {}
    counters_frames = {}

    for split, split_files in splits.items():
        for file_path in split_files:
            file_name = os.path.basename(file_path)
            parts = file_name.split('-')
            if len(parts) < 7:
                continue

            emotion_code = parts[2]
            emotion = emotion_map.get(emotion_code, 'unknown')

            if emotion not in allowed_emotions:
                continue

            counters_audio.setdefault((split, emotion), 0)
            counters_audio[(split, emotion)] += 1
            base_audio_name = f"{emotion}-{split}-{counters_audio[(split, emotion)]:04d}-audio"

            audio_target_dir = os.path.join(processed_audio_dir, split, emotion)
            os.makedirs(audio_target_dir, exist_ok=True)
            extract_audio_from_video(file_path, os.path.join(audio_target_dir, f"{base_audio_name}.wav"))

            counters_frames.setdefault((split, emotion), 1)
            image_target_dir = os.path.join(processed_images_dir, split, emotion)
            os.makedirs(image_target_dir, exist_ok=True)

            counters_frames[(split, emotion)] = extract_frames_from_video(
                file_path,
                image_target_dir,
                emotion,
                split,
                counters_frames[(split, emotion)],
                num_frames=3
            )

In [None]:
sort_and_extract_from_video(
    source_dir=RAW_VIDEO_DIR,
    processed_audio_dir=PROCESSED_AUDIO_DIR,
    processed_images_dir=PROCESSED_IMAGES_DIR,
    split_ratio=split_ratio
)


{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'M4V', 'minor_version': '1', 'compatible_brands': 'M4V mp42isom', 'creation_time': '2013-02-28T02:13:40.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': 'eng', 'default': True, 'size': [1280, 720], 'bitrate': 9994, 'fps': 29.97002997002997, 'codec_name': 'h264', 'profile': '(High)', 'metadata': {'Metadata': '', 'creation_time': '2013-02-28T02:13:40.000000Z', 'handler_name': 'Mainconcept MP4 Video Media Handler', 'vendor_id': '[0][0][0][0]', 'encoder': 'AVC Coding'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': 'eng', 'default': True, 'fps': 48000, 'bitrate': 189, 'metadata': {'Metadata': '', 'creation_time': '2013-02-28T02:13:40.000000Z', 'handler_name': 'Mainconcept MP4 Sound Media Handler', 'vendor_id': '[0][0][0][0]'}}], 'input_number': 0}], 'duration': 3.7, 'bitrate': 10261, 'start': 0.0, 'default_video_input_number': 0