In [None]:
import os
import random
import concurrent.futures
from pathlib import Path
import moviepy.editor as mp
import cv2
import logging
import warnings

# Настройка логгирования и предупреждений
logging.getLogger('moviepy').setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

def get_random_frame(cap, max_attempts=5):
    """Более надежный способ получения случайного кадра"""
    for _ in range(max_attempts):
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        if frame_count <= 0:
            return None
            
        random_frame = random.randint(0, frame_count - 1)
        cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame)
        
        # Даем видео несколько попыток декодирования
        for _ in range(3):
            ret, frame = cap.read()
            if ret:
                return frame
            cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame)
    
    return None

def process_video(video_path, sounds_dir, images_dir):
    try:
        # Обработка аудио
        audio_path = sounds_dir / f"{video_path.stem}.wav"
        clip = mp.VideoFileClip(str(video_path))
            
        if clip.duration > 10:
            audio = clip.subclip(0, 10).audio
        else:
            audio = clip.audio
        
        
        
        # Обработка изображения с улучшенным чтением кадров
        cap = cv2.VideoCapture(str(video_path))
        frame = get_random_frame(cap)
        
        if frame is not None:
            resized = cv2.resize(frame, (128, 128))
            img_path = images_dir / f"{video_path.stem}.jpg"
            cv2.imwrite(str(img_path), resized)

            audio.write_audiofile(str(audio_path), fps=44100, codec='pcm_s16le', verbose=False, logger=None)
        else:
            print(f"Не удалось декодировать файл {video_path}")
        
        cap.release()
        clip.close()
        
    except Exception as e:
        print(f"Ошибка при обработке {video_path}: {str(e)}")

def process_all_videos(root_dir):
    root_path = Path(root_dir)
    sounds_dir = root_path / "sounds"
    images_dir = root_path / "images"

    root_path = root_path / "videos"
    
    sounds_dir.mkdir(exist_ok=True)
    images_dir.mkdir(exist_ok=True)
    
    video_files = []
    for dirpath, _, filenames in os.walk(root_path):
        for filename in filenames:
            if filename.lower().endswith('.mp4'):
                video_files.append(Path(dirpath) / filename)
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_video, video_path, sounds_dir, images_dir) 
                 for video_path in video_files]
        concurrent.futures.wait(futures)

In [None]:
root_directory = "data"
process_all_videos(root_directory)