# Feature Extraction Notebook

This notebook seems to extract features from the entire dataset

In [2]:
# Import the required libraries
import librosa
import matplotlib.pyplot as plt
import numpy as np
import librosa.display
import scipy.fftpack
import math
import soundfile as sf
import sklearn.preprocessing
import pandas as pd

from scipy.io import wavfile

from sklearn.neural_network import MLPClassifier

# Required to train and split the data
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# Required to import the data
from os import listdir

In [3]:
# Set Directories
speech_directory_1 = '../dataset/Speech_1/'
speech_directory_2 = '../dataset/Speech_2/'
music_directory_1 = '../dataset/Music/'

# Auqire files
speech_files_1 = [speech_directory_1 + filename for filename in listdir(speech_directory_1)]
speech_files_2 = [speech_directory_2 + filename for filename in listdir(speech_directory_2)]
music_files_1 = [music_directory_1 + filename for filename in listdir(music_directory_1)]

speech_files = shuffle(np.array(speech_files_1 + speech_files_2))
music_files = shuffle(np.array(music_files_1))

# Shuffle Files

print("Number of Speech files : {}".format(len(speech_files)))
print("Number of Music files : {}".format(len(music_files)))

Number of Speech files : 7577
Number of Music files : 7997


## Read Audio Files and Save Variance of Zero-Crossing Rates

In [None]:
def getZeroCrossRateVar(y, sr, ):

    """
    Determines ...
    
    Arguments:
        y (list of float): list of data points of the audio signal
        sr (int): sample rate of the audio signal
    
    Returns:
        avg_var_zero_crossing (float): average variance of zero-crossing rate 
    """

    var_zero_crossings = []

    # Determine frame length
    frame_length = int (sr / frame_freq)          # number of samples a frame is composed of
    hop_length = int(frame_length*hop_length_frac)       # delay from start of one frame to start of next frame  

    # Determine Zero-Crossing Rate per Frame
    crossrate = librosa.feature.zero_crossing_rate(y,frame_length=frame_length,hop_length=hop_length)

     # Calculate Local Variance of Zero-Crossing Rate
    local_var_zero_crossing= []

    end=0
    for i in range ( 0 , int( crossrate.size/frame_freq) ):
        local_var_zero_crossing.append( np.var(crossrate[0][( frame_freq*i ):( (frame_freq)*(i+1) -1)]) )
        end = i+1
    if (frame_freq*end < crossrate.size) : local_var_zero_crossing.append( np.var(crossrate[0][( frame_freq*end ): crossrate.size ]) )

    # Calculate Average Variance of Zero-Crossing Rate
    avg_var_zero_crossing = np.average(local_var_zero_crossing)



    return 0

In [5]:
%%time

# To clear outout
from IPython.display import clear_output

all_var_zero_crossings = []
all_var_zero_crossings_entire_segment = []
all_labels = []

# Set parameters
total_set_size = 1000
frame_freq = 50
hop_length_frac = 3/4
    
counter=0

# Extract Zero crossing Rates of Speech Files
for filename in speech_files[0:int(total_set_size/2)]:

    # Print files
    # print(filename)
    
    # Load File 
    y, sr = librosa.load(filename)

    # Determine frame length
    frame_length = int (sr / frame_freq)          # number of samples a frame is composed of
    hop_length = int(frame_length*hop_length_frac)       # delay from start of one frame to start of next frame  

    # Determine Zero-Crossing Rate per Frame
    crossrate = librosa.feature.zero_crossing_rate(y,frame_length=frame_length,hop_length=hop_length)

    # Calculate Local Variance of Zero-Crossing Rate
    local_var_zero_crossing= []
    end=0
    for i in range ( 0 , int( crossrate.size/frame_freq) ):
        local_var_zero_crossing.append( np.var(crossrate[0][( frame_freq*i ):( (frame_freq)*(i+1) -1)]) )
        end = i+1
    if (frame_freq*end < crossrate.size) : local_var_zero_crossing.append( np.var(crossrate[0][( frame_freq*end ): crossrate.size ]) )

    # Calculate Average Variance of Zero-Crossing Rate
    avg_var_zero_crossing = np.average(local_var_zero_crossing)

    # Append to list
    if (avg_var_zero_crossing!=np.nan):
        all_var_zero_crossings.append(avg_var_zero_crossing)
        all_var_zero_crossings_entire_segment.append(np.var(crossrate))
        all_labels.append(0)

    # Update user on progress
    counter+=1
    clear_output(wait=True)
    print("Progress : {:3.2f} %".format(counter/total_set_size*100))

    # Print Results
    # print("Local Variances of Speech Zero-Crossing rate is {} with an average of {} \n".format(local_var_zero_crossing, avg_var_zero_crossing ))

# Extract Zero crossing Rates of Music Files
for filename in music_files[0:int(total_set_size/2)]:
    # print(filename)
    y, sr = librosa.load(filename)
    frame_length = int (sr / frame_freq)          # number of samples a frame is composed of
    hop_length = int(frame_length*hop_length_frac)       # delay from start of one frame to start of next frame  
    crossrate = librosa.feature.zero_crossing_rate(y,frame_length=frame_length,hop_length=hop_length)
    local_var_zero_crossing= []
    end=0
    for i in range ( 0 , int( crossrate.size/frame_freq) ):
        local_var_zero_crossing.append( np.var(crossrate[0][( frame_freq*i ):( (frame_freq)*(i+1) -1)]) )
        end = i+1
    if (frame_freq*end < crossrate.size) : local_var_zero_crossing.append( np.var(crossrate[0][( frame_freq*end ): crossrate.size ]) )
    avg_var_zero_crossing = np.average(local_var_zero_crossing)
    # Append to list
    if (avg_var_zero_crossing!=np.nan):
        all_var_zero_crossings.append(avg_var_zero_crossing)
        all_var_zero_crossings_entire_segment.append(np.var(crossrate))
        all_labels.append(1)
    counter+=1
    clear_output(wait=True)
    print("Progress : {:3.2f} %".format(counter/total_set_size*100))
    # print("Local Variances of Speech Zero-Crossing rate is {} with an average of {} \n".format(local_var_zero_crossing, avg_var_zero_crossing ))


np.savetxt('var_zc1.csv', all_var_zero_crossings, delimiter=',')
np.savetxt('labels_zc1.csv', all_labels, delimiter=',',fmt='%d')


Progress : 100.00 %
Wall time: 9min


## Read Previously Saved Variance of Zero-Crossing Rates

In [17]:
all_var_zero_crossings = np.loadtxt('var_zc.csv', delimiter=',',dtype=np.double)
all_labels = np.loadtxt('labels_zc.csv', delimiter=',')

## Average and Variance of the loaded Values

In [7]:
speech_entire_segment = [x for i,x in enumerate(all_var_zero_crossings_entire_segment) if all_labels[i]==0]
music_entire_segment = [x for i,x in enumerate(all_var_zero_crossings_entire_segment) if all_labels[i]==1]

speech = [x for i,x in enumerate(all_var_zero_crossings) if all_labels[i]==0]
music = [x for i,x in enumerate(all_var_zero_crossings) if all_labels[i]==1]

In [8]:
print("Average Variance of Zero-Crossing Rate Speech: {:.3E}".format( np.average(speech) ))
print("Average Variance of Zero-Crossing Rate Music: {:.3E}".format( np.average(music) ))

print()

print("Average Variance of Zero-Crossing Rate Speech (entire segment): {:.3E}".format( np.average(speech_entire_segment) ))
print("Average Variance of Zero-Crossing Rate Music (entire segment): {:.3E}".format( np.average(music_entire_segment) ))

print()

print("Variance of Variance of Zero-Crossing Rate Speech: {:.3E}".format( np.var(speech) ))
print("Variance of Variance of Zero-Crossing Rate Music: {:.3E}".format( np.var(music) ))

print()

print("Variance of Variance of Zero-Crossing Rate Speech (entire segment): {:.3E}".format( np.var(speech_entire_segment) ))
print("Variance of Variance of Zero-Crossing RateMusic (entire segment): {:.3E}".format( np.var(music_entire_segment) ))

Average Variance of Zero-Crossing Rate Speech: 8.820E-03
Average Variance of Zero-Crossing Rate Music: 2.530E-03

Average Variance of Zero-Crossing Rate Speech (entire segment): 1.050E-02
Average Variance of Zero-Crossing Rate Music (entire segment): 3.624E-03

Variance of Variance of Zero-Crossing Rate Speech: 2.155E-05
Variance of Variance of Zero-Crossing Rate Music: 1.075E-05

Variance of Variance of Zero-Crossing Rate Speech (entire segment): 3.105E-05
Variance of Variance of Zero-Crossing RateMusic (entire segment): 2.302E-05
