# Feature Extraction Notebook

This notebook seems to extract features from the entire dataset

In [56]:
# Import the required libraries
import librosa
import matplotlib.pyplot as plt
import numpy as np
import librosa.display
import scipy.fftpack
import math
import soundfile as sf
import sklearn.preprocessing
import pandas as pd

from scipy.io import wavfile
from scipy.signal import find_peaks

from sklearn.neural_network import MLPClassifier

# Required to train and split the data
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# Required to import the data
from os import listdir

In [33]:
# Set Directories
speech_directory_1 = '../dataset/Speech_1/'
speech_directory_2 = '../dataset/Speech_2/'
music_directory_1 = '../dataset/Music/'

# Auqire files
speech_files_1 = [speech_directory_1 + filename for filename in listdir(speech_directory_1)]
speech_files_2 = [speech_directory_2 + filename for filename in listdir(speech_directory_2)]
music_files_1 = [music_directory_1 + filename for filename in listdir(music_directory_1)]

speech_files = shuffle(np.array(speech_files_1 + speech_files_2))
music_files = shuffle(np.array(music_files_1))

# Shuffle Files

print("Number of Speech files : {}".format(len(speech_files)))
print("Number of Music files : {}".format(len(music_files)))

Number of Speech files : 7577
Number of Music files : 7997


In [39]:
# Get files from previous runs
np.savetxt('speech_files.csv', speech_files, delimiter=',',fmt="%s")
np.savetxt('music_files.csv', music_files, delimiter=',',fmt="%s")

In [40]:
speech_files = np.loadtxt('speech_files.csv', delimiter=',',dtype=str)
music_files = np.loadtxt('music_files.csv', delimiter=',',dtype=str)

print("Number of Speech files : {}".format(len(speech_files)))
print("Number of Music files : {}".format(len(music_files)))

Number of Speech files : 7577
Number of Music files : 7997


## Set the Parameters

In [75]:
# Set parameters
total_set_size = 8000
frame_freq = 50
hop_length_frac = 3/4

## Read Audio Files and Save Variance of Zero-Crossing Rates

In [42]:
def averageZeroCrossRateVar(y, sr, frame_freq):

    """
    Determines ...
    
    Arguments:
        y (list of float): list of data points of the audio signal
        sr (int): sample rate of the audio signal
    
    Returns:
        avg_var_zero_crossing (float): average variance of zero-crossing rate 
    """

    var_zero_crossings = []

    # Determine frame length
    frame_length = int (sr / frame_freq)          # number of samples a frame is composed of
    hop_length = int(frame_length*hop_length_frac)       # delay from start of one frame to start of next frame  

    # Determine Zero-Crossing Rate per Frame
    crossrate = librosa.feature.zero_crossing_rate(y,frame_length=frame_length,hop_length=hop_length)

     # Calculate Local Variance of Zero-Crossing Rate
    local_var_zero_crossing= []

    end=0
    for i in range ( 0 , int( crossrate.size/frame_freq) ):
        local_var_zero_crossing.append( np.var(crossrate[0][( frame_freq*i ):( (frame_freq)*(i+1) -1)]) )
        end = i+1
    if (frame_freq*end < crossrate.size) : local_var_zero_crossing.append( np.var(crossrate[0][( frame_freq*end ): crossrate.size ]) )

    # Calculate Average Variance of Zero-Crossing Rate
    return np.average(local_var_zero_crossing)

In [44]:
%%time

# To clear outout
from IPython.display import clear_output

all_var_zero_crossings = []
all_var_zero_crossings_entire_segment = []
all_labels = []
    
counter=0

# Extract Zero crossing Rates of Speech Files
for filename in speech_files[0:int(total_set_size/2)]:

    # Print files
    # print(filename)
    
    # Load File 
    y, sr = librosa.load(filename)

    # Calculate Average Variance of Zero-Crossing Rate
    avg_var_zero_crossing = averageZeroCrossRateVar(y,sr,frame_freq)

    # Append to list
    if (avg_var_zero_crossing!=np.nan):
        all_var_zero_crossings.append(avg_var_zero_crossing)
        all_labels.append(0)

    # Update user on progress
    counter+=1
    clear_output(wait=True)
    print("Progress : {:3.2f} %".format(counter/total_set_size*100))

# Extract Zero crossing Rates of Music Files
for filename in music_files[0:int(total_set_size/2)]:
    # Print files
    # print(filename)
    
    # Load File 
    y, sr = librosa.load(filename)

    # Calculate Average Variance of Zero-Crossing Rate
    avg_var_zero_crossing = averageZeroCrossRateVar(y,sr,frame_freq)

    # Append to list
    if (avg_var_zero_crossing!=np.nan):
        all_var_zero_crossings.append(avg_var_zero_crossing)
        all_labels.append(1)

    # Update user on progress
    counter+=1
    clear_output(wait=True)
    print("Progress : {:3.2f} %".format(counter/total_set_size*100))


np.savetxt('var_zc.csv', all_var_zero_crossings, delimiter=',')
np.savetxt('labels_zc.csv', all_labels, delimiter=',',fmt='%d')


Progress : 7.50 %


KeyboardInterrupt: 

## Read Previously Saved Variance of Zero-Crossing Rates

In [46]:
all_var_zero_crossings = np.loadtxt('var_zc.csv', delimiter=',',dtype=np.double)
all_labels = np.loadtxt('labels_zc.csv', delimiter=',')

print("Length of all_var_zero_crossings : {}".format(len(all_var_zero_crossings)))
print("Length of all_labels : {}".format(len(all_labels)))

Length of all_var_zero_crossings : 1000
Length of all_labels : 1000


## Average and Variance of the loaded Values

In [7]:
speech_entire_segment = [x for i,x in enumerate(all_var_zero_crossings_entire_segment) if all_labels[i]==0]
music_entire_segment = [x for i,x in enumerate(all_var_zero_crossings_entire_segment) if all_labels[i]==1]

speech = [x for i,x in enumerate(all_var_zero_crossings) if all_labels[i]==0]
music = [x for i,x in enumerate(all_var_zero_crossings) if all_labels[i]==1]

In [8]:
print("Average Variance of Zero-Crossing Rate Speech: {:.3E}".format( np.average(speech) ))
print("Average Variance of Zero-Crossing Rate Music: {:.3E}".format( np.average(music) ))

print()

print("Average Variance of Zero-Crossing Rate Speech (entire segment): {:.3E}".format( np.average(speech_entire_segment) ))
print("Average Variance of Zero-Crossing Rate Music (entire segment): {:.3E}".format( np.average(music_entire_segment) ))

print()

print("Variance of Variance of Zero-Crossing Rate Speech: {:.3E}".format( np.var(speech) ))
print("Variance of Variance of Zero-Crossing Rate Music: {:.3E}".format( np.var(music) ))

print()

print("Variance of Variance of Zero-Crossing Rate Speech (entire segment): {:.3E}".format( np.var(speech_entire_segment) ))
print("Variance of Variance of Zero-Crossing RateMusic (entire segment): {:.3E}".format( np.var(music_entire_segment) ))

Average Variance of Zero-Crossing Rate Speech: 8.820E-03
Average Variance of Zero-Crossing Rate Music: 2.530E-03

Average Variance of Zero-Crossing Rate Speech (entire segment): 1.050E-02
Average Variance of Zero-Crossing Rate Music (entire segment): 3.624E-03

Variance of Variance of Zero-Crossing Rate Speech: 2.155E-05
Variance of Variance of Zero-Crossing Rate Music: 1.075E-05

Variance of Variance of Zero-Crossing Rate Speech (entire segment): 3.105E-05
Variance of Variance of Zero-Crossing RateMusic (entire segment): 2.302E-05


## Variance of Onset Strength 

In [11]:
def varOnsetStrength(y, sr):
    onset = librosa.onset.onset_strength(y , sr)
    onset = onset / onset.max()
    return np.var(onset)

In [20]:
%%time

# To clear outout
from IPython.display import clear_output

counter = 0 

var_onset_strength = []

# Extract Zero crossing Rates of Speech Files
for filename in speech_files[0:int(total_set_size/2)]:
    y, sr = librosa.load(filename)

    var_onset_strength.append(varOnsetStrength(y,sr))
    
    # Update user on progress
    counter+=1
    clear_output(wait=True)
    print("Progress : {:3.2f} %".format(counter/total_set_size*100))

# Extract Zero crossing Rates of Speech Files
for filename in music_files[0:int(total_set_size/2)]:
    y, sr = librosa.load(filename)

    var_onset_strength.append(varOnsetStrength(y,sr))
    
    # Update user on progress
    counter+=1
    clear_output(wait=True)
    print("Progress : {:3.2f} %".format(counter/total_set_size*100))

np.savetxt('var_onset_detection.csv', var_onset_strength, delimiter=',')

Progress : 100.00 %
Wall time: 9min 16s


In [None]:
var_onset_detection = np.loadtxt('var_onset_detection.csv', delimiter=',',dtype=np.double)
all_labels = np.loadtxt('labels_zc.csv', delimiter=',')

print("Length of var_onset_detection : {}".format(len(var_onset_detection)))
print("Length of all_labels : {}".format(len(all_labels)))

In [47]:
onset_speech = [x for i,x in enumerate(var_onset_strength) if all_labels[i]==0]
onset_music = [x for i,x in enumerate(var_onset_strength) if all_labels[i]==1]

In [49]:
print("Average of Variance of Onset Strength Speech: {:.3E}".format( np.average(onset_speech) ))
print("Average of Variance of Onset Strength  Music: {:.3E}".format( np.average(onset_music) ))

print()

print("Variance of Variance of Onset Strength Speech: {:.3E}".format( np.var(onset_speech) ))
print("Variance of Variance of Onset Strength Music: {:.3E}".format( np.var(onset_music) ))

Average of Variance of Onset Strength Speech: 2.617E-02
Average of Variance of Onset Strength  Music: 1.572E-02

Variance of Variance of Onset Strength Speech: 7.033E-05
Variance of Variance of Onset Strength Music: 6.429E-05


## Peeks in Onset Strength 

In [54]:
def peekRateOnsetStrength(y, sr):
    onset = librosa.onset.onset_strength(y , sr)
    onset = onset / onset.max()
    peaks = find_peaks(onset)
    return len(peaks)/len(onset)

In [76]:
%%time

# To clear outout
from IPython.display import clear_output

counter = 0 

onset_strength_peak_rate = []

# Extract Zero crossing Rates of Speech Files
for filename in speech_files[0:int(total_set_size/2)]:
    y, sr = librosa.load(filename)

    onset_strength_peak_rate.append(peekRateOnsetStrength(y,sr))
    
    # Update user on progress
    counter+=1
    clear_output(wait=True)
    print("Progress : {:3.2f} %".format(counter/total_set_size*100))

# Extract Zero crossing Rates of Speech Files
for filename in music_files[0:int(total_set_size/2)]:
    y, sr = librosa.load(filename)

    onset_strength_peak_rate.append(peekRateOnsetStrength(y,sr))
    
    # Update user on progress
    counter+=1
    clear_output(wait=True)
    print("Progress : {:3.2f} %".format(counter/total_set_size*100))

np.savetxt('onset_strength_peak_rate.csv', onset_strength_peak_rate, delimiter=',')

Progress : 100.00 %
Wall time: 1h 18min 5s


In [78]:
all_labels = []

for filename in music_files[0:int(total_set_size/2)]:
    all_labels.append(0)

for filename in music_files[0:int(total_set_size/2)]:
    all_labels.append(1)

np.savetxt('labels_zc.csv', all_labels, delimiter=',',fmt='%d')

In [79]:
onset_strength_peak_rate = np.loadtxt('onset_strength_peak_rate.csv', delimiter=',',dtype=np.double)
all_labels = np.loadtxt('labels_zc.csv', delimiter=',')

print("Length of onset_strength_peak_rate : {}".format(len(onset_strength_peak_rate)))
print("Length of all_labels : {}".format(len(all_labels)))

Length of onset_strength_peak_rate : 8000
Length of all_labels : 8000


In [80]:
onset_peak_rate_speech = [x for i,x in enumerate(onset_strength_peak_rate) if all_labels[i]==0]
onset_peak_rate_music = [x for i,x in enumerate(onset_strength_peak_rate) if all_labels[i]==1]

In [81]:
print("Average of Onset Strength Peak Rate Speech: {:.3E}".format( np.average(onset_peak_rate_speech) ))
print("Average of Onset Strength Peak Rate Music: {:.3E}".format( np.average(onset_peak_rate_music) ))

print()

print("Variance of Onset Strength Peak Rate Speech: {:.3E}".format( np.var(onset_peak_rate_speech) ))
print("Variance of Onset Strength Peak Rate Music: {:.3E}".format( np.var(onset_peak_rate_music) ))

Average of Onset Strength Peak Rate Speech: 7.878E-03
Average of Onset Strength Peak Rate Music: 1.570E-03

Variance of Onset Strength Peak Rate Speech: 1.636E-05
Variance of Onset Strength Peak Rate Music: 1.996E-06
