In [1]:
import os
import sys
import subprocess
from tqdm import tqdm

def convert_video_to_audio_ffmpeg(video_file, save_path, output_ext="wav"):
    """Converts video to audio directly using `ffmpeg` command
    with the help of subprocess module"""
    subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{save_path}.{output_ext}"], 
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT)

# Parse video info

In [5]:
videos_dir = 'E:/video'
all_video_paths = dict()
for part in sorted(os.listdir(videos_dir)):
    data_part = part.split('_')[-1] # L01, L02 for ex
    all_video_paths[data_part] =  dict()

for data_part in sorted(all_video_paths.keys()):
    data_part_path = f'{videos_dir}/Videos_{data_part}/video'
    video_paths = sorted(os.listdir(data_part_path))
    video_ids = [video_path.replace('.mp4', '').split('_')[-1] for video_path in video_paths]
    for video_id, video_path in zip(video_ids, video_paths):
        video_path_full = f'{data_part_path}/{video_path}'
        all_video_paths[data_part][video_id] = video_path_full

In [6]:
all_video_paths

{'L01': {'V001': 'E:/video/Videos_L01/video/L01_V001.mp4',
  'V002': 'E:/video/Videos_L01/video/L01_V002.mp4',
  'V003': 'E:/video/Videos_L01/video/L01_V003.mp4',
  'V004': 'E:/video/Videos_L01/video/L01_V004.mp4',
  'V005': 'E:/video/Videos_L01/video/L01_V005.mp4',
  'V006': 'E:/video/Videos_L01/video/L01_V006.mp4',
  'V007': 'E:/video/Videos_L01/video/L01_V007.mp4',
  'V008': 'E:/video/Videos_L01/video/L01_V008.mp4',
  'V009': 'E:/video/Videos_L01/video/L01_V009.mp4',
  'V010': 'E:/video/Videos_L01/video/L01_V010.mp4',
  'V011': 'E:/video/Videos_L01/video/L01_V011.mp4',
  'V012': 'E:/video/Videos_L01/video/L01_V012.mp4',
  'V013': 'E:/video/Videos_L01/video/L01_V013.mp4',
  'V014': 'E:/video/Videos_L01/video/L01_V014.mp4',
  'V015': 'E:/video/Videos_L01/video/L01_V015.mp4',
  'V016': 'E:/video/Videos_L01/video/L01_V016.mp4',
  'V017': 'E:/video/Videos_L01/video/L01_V017.mp4',
  'V018': 'E:/video/Videos_L01/video/L01_V018.mp4',
  'V019': 'E:/video/Videos_L01/video/L01_V019.mp4',
  'V0

# Get audio

In [7]:
save_dir_all = 'E:/audio'
if not os.path.exists(save_dir_all):
    os.mkdir(save_dir_all)

for key in tqdm(all_video_paths.keys()):
    save_dir = f'{save_dir_all}/{key}'

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
        
    video_paths_dict = all_video_paths[key]
    video_ids = sorted(video_paths_dict.keys())
    for video_id in tqdm(video_ids):
        video_path = video_paths_dict[video_id]
        save_path = f'{save_dir}/{video_id}'
        convert_video_to_audio_ffmpeg(video_path, save_path)

  0%|          | 0/24 [00:00<?, ?it/s]

100%|██████████| 31/31 [09:10<00:00, 17.77s/it]
100%|██████████| 31/31 [09:12<00:00, 17.83s/it]t]
100%|██████████| 30/30 [08:35<00:00, 17.19s/it]t]
100%|██████████| 30/30 [09:26<00:00, 18.88s/it]t]
100%|██████████| 31/31 [09:09<00:00, 17.73s/it]t]
100%|██████████| 31/31 [10:35<00:00, 20.51s/it]t]
100%|██████████| 31/31 [08:49<00:00, 17.08s/it]t]
100%|██████████| 30/30 [09:34<00:00, 19.14s/it]/it]
100%|██████████| 29/29 [09:54<00:00, 20.51s/it]/it]
100%|██████████| 29/29 [10:43<00:00, 22.19s/it]/it]
100%|██████████| 30/30 [08:39<00:00, 17.32s/it]s/it]
100%|██████████| 30/30 [09:47<00:00, 19.58s/it]s/it]
100%|██████████| 30/30 [08:22<00:00, 16.74s/it]s/it]
100%|██████████| 27/27 [08:37<00:00, 19.15s/it]s/it]
100%|██████████| 31/31 [08:05<00:00, 15.68s/it]s/it]
100%|██████████| 31/31 [09:23<00:00, 18.17s/it]s/it]
100%|██████████| 28/28 [07:16<00:00, 15.60s/it]s/it]
100%|██████████| 29/29 [09:18<00:00, 19.27s/it]it]  
100%|██████████| 30/30 [07:21<00:00, 14.73s/it]it]
100%|██████████| 29/2