In [5]:
import numpy as np
import librosa
import subprocess
import os
import matplotlib.pyplot as plt

In [6]:
#resamples audio signal to a new sample frequency
def resample(y, sr, resample_to):
    return librosa.resample(y, sr, resample_to)

#normalizes audio signal
def normalize(samples):
    return librosa.util.normalize(samples, norm=np.inf)

#downmix to one channel
def downmix_to_mono(signal):
    # check if signal has more than 1 channel
    if len(signal.shape) > 1:
        return librosa.to_mono(signal)
    else:
        return signal


In [7]:
def preprocess(input_file, output_file=None, plot=True):

    if output_file is None:
        file_base = os.path.splitext(os.path.basename(input_file))[0]
        output_file = "../videos/processed/" + file_base + "_processed.wav"

    #shell call that extracts wav from video file
    subprocess.call("ffmpeg -i " + input_file + " -acodec pcm_s16le -y " + output_file, shell=True)

    #load file
    y, sr = librosa.load(output_file, sr=22050)

    #downsample
    new_sr = 11025
    resampled = resample(y, sr, new_sr)

    #normalize to maximum possible amplitude
    normed = normalize(resampled)

    normed_mono = downmix_to_mono(normed)

    if plot:
        plt.plot(y)
        plt.plot(normed_mono)
        plt.show()
        
    #return normed_mono, new_sr

In [8]:
#processes every file in directory
def preprocessDirectory(dir_name):
    for filename in os.listdir(dir_name):
        if filename.endswith(".mp4"):
            preprocess(os.path.join(dir_name, filename), plot=False)