In [None]:
import librosa
import librosa.display
import subprocess

import matplotlib.pyplot as plt
import soundfile as sf

import os

In [None]:
# with tutorial by librosa 
# https://librosa.org/doc/main/generated/librosa.load.html
# https://librosa.org/doc/main/generated/librosa.resample.html
# https://librosa.org/doc/main/generated/librosa.util.normalize.html
# https://librosa.org/doc/main/generated/librosa.to_mono.html

# writing to file https://pysoundfile.readthedocs.io/en/0.8.1/#soundfile.write

def preprocess(input_file: str, output_file: str, target_sr: int = 22050):
    # extract wav with ffmpeg
    subprocess.run(['ffmpeg', '-i', input_file, '-acodec' ,'pcm_s16le', '-y', output_file])
    
    # load with librosa
    y, orig_sr = librosa.load(output_file, sr=None, mono=False) # loading file with original sr
    
    # resample
    y_resampled = librosa.resample(y, orig_sr, target_sr)
    
    # normalization
    y_normalized = librosa.util.normalize(y_resampled, axis=1)
    
    # downmix to mono
    y_mono = librosa.to_mono(y_normalized)
    
    # overwrite extracted file
    sf.write(output_file, y_mono, target_sr)
    
    return y_mono
    

In [None]:
# directory handling taken from my MRE Assignment 1

def preprocessDirectory(input_directory: str, output_directory: str, target_sr: int = 22050):
    os.makedirs(output_directory, exist_ok=True)   # make output directory if not existant
    
    # If directory or just one file
    if os.path.isdir(input_directory):
        for filename in os.listdir(input_directory):
            filepath = os.path.join(input_directory, filename)
            name, extension = os.path.splitext(filename)
            if os.path.isfile(filepath) and extension.upper() in ['.MP4']: #just take these extensions
                output_filepath = os.path.join(output_directory, name + '.wav')
                preprocess(input_file = filepath, output_file = output_filepath, target_sr = target_sr)