In [None]:
import glob
from shutil import copyfile
import os
import random
import subprocess
import sys

import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

import config

In [None]:
# change current directory to project root
current_dir = os.getcwd().split('/')[-1]
if current_dir != 'skateboard_trick_classification': 
    %cd '..'

In [None]:
def delete_files_in_class_folders(root_dir):
    pathname = os.path.join(root_dir, "*/*")
    filepaths = glob.glob(pathname)
    for filepath in filepaths:
        os.remove(filepath)

# Video Dataset

## Delete existing videos in training and validation directories

In [None]:
delete_files_in_class_folders(config.VIDEO_TRAINING_DIR)
delete_files_in_class_folders(config.VIDEO_VALIDATION_DIR)

## Copy videos into training and validation directories

In [None]:
def copy_files(filepaths, subdir='training'):
    for filepath in filepaths:
        new_filepath = filepath.replace('training_validation', subdir)
        copyfile(filepath, new_filepath)

def video_training_validation_split(class_dir):
    pathname = os.path.join(class_dir, "*")
    filepaths = glob.glob(pathname)
    random.shuffle(filepaths)
    
    n_training = int((1.0-config.VALIDATION_SPLIT) * len(filepaths))
    training_filepaths = filepaths[:n_training]
    copy_files(training_filepaths, subdir='training')
    validation_filepaths = filepaths[n_training:]
    copy_files(validation_filepaths, subdir='validation')

In [None]:
class_pathname = os.path.join(config.VIDEO_TRAINING_VALIDATION_DIR, "*")
class_dirs = glob.glob(class_pathname)
for class_dir in class_dirs:
    video_training_validation_split(class_dir)

# Audio Dataset

## Delete existing WAV files in training and validation directories

In [None]:
delete_files_in_class_folders(config.WAV_TRAINING_DIR)
delete_files_in_class_folders(config.WAV_VALIDATION_DIR)
delete_files_in_class_folders(config.WAV_TEST_DIR)

## Generate WAV files

In [None]:
def create_wav_files_from_videos(video_dir, wav_dir):
    video_pathname = os.path.join(video_dir, "*/*")
    video_filepaths = glob.glob(video_pathname)
    for video_filepath in video_filepaths:
        video_label = video_filepath.split('/')[-2]
        audio_label = config.VIDEO_TO_AUDIO_LABEL_MAPPING.get(video_label)
        if not audio_label:
            continue
        wav_file_name = video_filepath.split('/')[-1].replace('.mp4', '.wav')
        output_wav_path = f'{wav_dir}/{audio_label}/{video_label}_{wav_file_name}'
        command = f'ffmpeg -i {video_filepath} -ac 2 -y -vn {output_wav_path}'
        code = subprocess.call(command, shell=True)
        if code == 1:
            print(f'Error: {video_filepath}, {output_wav_path}')

In [None]:
create_wav_files_from_videos(config.VIDEO_TRAINING_DIR, config.WAV_TRAINING_DIR)
create_wav_files_from_videos(config.VIDEO_VALIDATION_DIR, config.WAV_VALIDATION_DIR)
create_wav_files_from_videos(config.VIDEO_TEST_DIR, config.WAV_TEST_DIR)

## Delete existing spectrograms in training and validation directories

In [None]:
delete_files_in_class_folders(config.SPECTROGRAM_TRAINING_DIR)
delete_files_in_class_folders(config.SPECTROGRAM_VALIDATION_DIR)
delete_files_in_class_folders(config.SPECTROGRAM_TEST_DIR)

## Generate Mel-Spectrograms

In [None]:
def create_spectrograms_from_wav_files(wav_dir):
    wav_pathname = os.path.join(wav_dir, "*/*")
    wav_filepaths = glob.glob(wav_pathname)
    for wav_filepath in wav_filepaths:
        clip, sample_rate = librosa.load(wav_filepath, sr=None)
        S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
        fig = plt.figure(figsize=[1, 1])
        # next three lines are used to remove white border around image
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        S_db = librosa.power_to_db(S, ref=np.max)
        librosa.display.specshow(S_db)

        spectogram_filepath = wav_filepath.replace('/wav/', '/spectrogram/').replace('.wav', '.jpg')
        fig.savefig(spectogram_filepath, dpi=224, pad_inches=0)
        plt.close()

In [None]:
create_spectrograms_from_wav_files(config.WAV_TRAINING_DIR)
create_spectrograms_from_wav_files(config.WAV_VALIDATION_DIR)
create_spectrograms_from_wav_files(config.WAV_TEST_DIR)