In [3]:
from __future__ import print_function
import os
import numpy as np
import librosa
import warnings
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display, HTML

warnings.filterwarnings("ignore")


In [4]:
def getBeat(y,sr):
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    return (tempo, beat_frames)

def getMFCC(y, sr):
    hop_length = 512

    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
    mfcc_averaged = mfcc.mean(axis=0)  # to take the mean of each column

    return (mfcc, mfcc_averaged)

def getPitchesAndMagnitudes(y, sr):
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    return pitches, magnitudes

def getTempo(y, sr):
    onset_env = librosa.onset.onset_strength(y, sr=sr)
    dtempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, aggregate=None)

    return dtempo

def getCentroid(y, sr):
    return librosa.feature.spectral_centroid(y=y, sr=sr)

def getRollOff(y, sr):
    return librosa.feature.spectral_rolloff(y=y, sr=sr)

def getZeroCrossing(y):
    return librosa.feature.zero_crossing_rate(y)

def getMelSpec(y, sr):
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    return mel_spec


def getTempo(y, sr):
    onset_env = librosa.onset.onset_strength(y, sr)
    return librosa.beat.tempo(onset_envelope=onset_env, sr=sr)

#compute energy over fixed short time intervals
def getEnergy(y):
    hop_len = 256
    frame_len = 512
    energy = np.array([sum(abs(y[i:i + frame_len] ** 2)) for i in range(0, len(y), hop_len)])

    return energy

def getFlux(y, sr):
    return librosa.onset.onset_strength(y=y, sr=sr)

In [5]:
#this function extracts the chosen features from all preprocessed audio files in a directory and returns a numpy array
def calcFeatureMatrixForDirectory(dir_name):
    NR_OF_FEATURES = 19
   

    feature_matrix = np.zeros((len(os.listdir(dir_name)), NR_OF_FEATURES))

    names=[]
    counter=0
    for filename in os.listdir(dir_name):
        if filename.endswith(".wav"):
            name=os.path.join(dir_name, filename)
            names.append(filename)
            y, sr = librosa.load(name, sr=11025)


            cent = getCentroid(y, sr)
            cent_mean = np.mean(cent)
            cent_std = np.std(cent)

            roll = getRollOff(y, sr)
            roll_mean = np.mean(roll)
            roll_std = np.std(roll)

            zero_crossing = getZeroCrossing(y)
            zero_mean = np.mean(zero_crossing)
            zero_std = np.std(zero_crossing)

            tempo = getTempo(y, sr)[0]

            energy = getEnergy(y)
            energy_mean = np.mean(energy)
            energy_std = np.std(energy)

            flux = getFlux(y,sr)
            flux_mean = np.mean(flux)
            flux_std = np.std(flux)

            melSpec = getMelSpec(y, sr)
            melSpec_combined = melSpec.mean(axis=0)
            melSpec_mean = np.mean(melSpec_combined)
            melSpec_std = np.std(melSpec_combined)


            tempo, beat_frames = getBeat(y, sr)
            beat_mean = np.mean(beat_frames)
            beat_std = np.std(beat_frames)

            mfcc, mfcc_averaged = getMFCC(y, sr)
            mfcc_mean = np.mean(mfcc_averaged)
            mfcc_std = np.std(mfcc_averaged)


            pitches, magnitudes = getPitchesAndMagnitudes(y, sr)
            pitches_combined = pitches.mean(axis=0)
            pitch_mean = np.mean(pitches_combined)
            pitch_std = np.std(pitches_combined)

            #FINAL FEATURE VECTOR
           
            feature_vector = np.array([cent_mean, cent_std, roll_mean, 
                                       roll_std, zero_mean, zero_std,
                                       tempo, energy_mean, energy_std,
                                       flux_mean, flux_std, melSpec_mean,
                                       melSpec_std, beat_mean, beat_std,
                                       mfcc_mean, mfcc_std, pitch_mean, 
                                       pitch_std
                                       ])
     
            feature_matrix[counter, :] = feature_vector

            counter+=1

    return feature_matrix, names

In [9]:
#this function combines everything required in task 2 and 3, building and visualizing the dataframes for movies and genres, drawing the graphs etc.
def processAndDisplayAudioDirectoryFeatures(dir = "../videos/processed", showDataframes=False, plot=False):
    feature_matrix, file_names = calcFeatureMatrixForDirectory(dir)

    #BUILD MOVIE MEAN MATRIX
    movie_matrix_mean = np.zeros((5, feature_matrix.shape[1]))
    i=0
    while i<feature_matrix.shape[1]:
        j=0
        col = feature_matrix[:, i]
        x_idx = 0
        while j<feature_matrix.shape[0]:
            movie_matrix_mean[x_idx][i] = np.mean(feature_matrix[j:j + 4, i])
            j+=4

            x_idx+=1
        i+=1

    #BUILD GENRE MEAN MATRIX
    genre_matrix_mean = np.zeros((3, feature_matrix.shape[1]))
    i=0
    while i<feature_matrix.shape[1]:
        j=0
        col = feature_matrix[:, i]
        x_idx = 0
        while j<feature_matrix.shape[0]:
            if j<16:
                genre_matrix_mean[x_idx][i] = np.mean(feature_matrix[j:j+8, i])
                j+=8
            else:
                genre_matrix_mean[x_idx][i] = np.mean(feature_matrix[j:j + 4, i])
                j += 4
            x_idx+=1
        i+=1




    #BUILD MOVIE STD MATRIX
    movie_matrix_std = np.zeros((5, feature_matrix.shape[1]))
    i=0
    while i<feature_matrix.shape[1]:
        j=0
        col = feature_matrix[:, i]
        x_idx = 0
        while j<feature_matrix.shape[0]:
            movie_matrix_std[x_idx][i] = np.std(feature_matrix[j:j + 4, i])
            j+=4

            x_idx+=1
        i+=1

    #BUILD GENRE STD MATRIX
    genre_matrix_std = np.zeros((3, feature_matrix.shape[1]))
    i=0
    while i<feature_matrix.shape[1]:
        j=0
        col = feature_matrix[:, i]
        x_idx = 0
        while j<feature_matrix.shape[0]:
            if j<16:
                genre_matrix_std[x_idx][i] = np.std(feature_matrix[j:j+8, i])
                j+=8
            else:
                genre_matrix_std[x_idx][i] = np.std(feature_matrix[j:j + 4, i])
                j += 4
            x_idx+=1
        i+=1

                                   

    feature_names = ["centroid_mean", "centroid std", "roll-off_mean", "roll-off_std", "zero crossing_mean",
                     "zero crossing_std", "tempo", "energy mean", "energy std", "flux mean", "flux std", "melSpec_mean", "melSpec_std",
                     "beat_mean", "beat_std", "mfcc_mean", "mfcc_std", "pitch_mean", "pitch_std"]
    movie_names = ['DEVILS ADVOCATE', 'PULP FICTION', 'HOW SHE MOVE', 'LAST DANCE', 'ARCTIC TALE']
    genre_names = ['THRILLER', 'DANCE', 'DOCUMENTARY']

    if showDataframes:

        
        dataset = pd.DataFrame(feature_matrix)
        dataset.columns = feature_names

        #build complete feature dataframe       
        dataset.index=file_names
        dataset = dataset.rename_axis('COMPLETE FEATURE MATRIX', axis=1)
        
        
        #build genre mean dataframe
        genre_dataset_mean = pd.DataFrame(genre_matrix_mean)
        genre_dataset_mean.columns = [feat_name + ' Mean' for feat_name in feature_names]
        genre_dataset_mean.index = genre_names
        genre_dataset_mean = genre_dataset_mean.rename_axis('GENRE MEAN MATRIX', axis=1)

        
        #build movie mean dataframe
        movie_dataset_mean = pd.DataFrame(movie_matrix_mean)
        movie_dataset_mean.columns = [feat_name + ' Mean' for feat_name in feature_names]
        movie_dataset_mean.index = movie_names
        movie_dataset_mean = movie_dataset_mean.rename_axis('MOVIE MEAN MATRIX', axis=1)


        #build genre std dataframe
        genre_dataset_std = pd.DataFrame(genre_matrix_std)
        genre_dataset_std.columns = [feat_name + ' Std' for feat_name in feature_names]
        genre_dataset_std.index = genre_names
        genre_dataset_std = genre_dataset_std.rename_axis('GENRE STD MATRIX', axis=1)


        #build movie std dataframe
        movie_dataset_std = pd.DataFrame(movie_matrix_std)
        movie_dataset_std.columns = [feat_name + ' Std' for feat_name in feature_names]
        movie_dataset_std.index = movie_names
        movie_dataset_std = movie_dataset_std.rename_axis('MOVIE STD MATRIX', axis=1)

        
        display(dataset)
      
        display(movie_dataset_mean)
        display(movie_dataset_std)
        
        display(genre_dataset_mean)
        display(genre_dataset_std)        
        
        
    #plot results, save them into plots directory
    if plot:
        n=0
        while n<feature_matrix.shape[1]:
            name = feature_names[n]

            thriller_centroid_mean = feature_matrix[0:8, n].transpose()
            dance_centroid_mean = feature_matrix[8:16, n].transpose()
            documentary_centroid_mean = feature_matrix[16:20, n].transpose()

            plt.ylabel(name)
            plt.xlabel('files')
            plot1, = plt.plot(np.arange(1, 9), thriller_centroid_mean)
            plot2, = plt.plot(np.arange(1, 9), dance_centroid_mean)
            plot3, = plt.plot(np.arange(1, 5), documentary_centroid_mean)

            plt.legend([plot1, plot2, plot3], ["thriller", "dance", "documentary"])

            plt.show()

            plt.savefig("../plots/" + name)
            plt.clf()

            n+=1