In [2]:
import numpy as np 
import librosa
import glob
import python_speech_features as psf
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from IPython.display import Audio
import scipy.io.wavfile as wav
from scipy import signal
from sklearn import svm
from scipy.signal import stft, fftconvolve
import pandas as pd
import sys
from sklearn.feature_selection import *
import statsmodels.api as sm
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

In [5]:
# functions to load dataset
def clean_audio(audio , sr):
    cutoff_freq = 4000  # Hz
    nyquist_freq = 0.5 * sr  # Hz (half the sampling rate)
    order = 4
    # Calculate filter coefficients
    coeff1, coeff2 = signal.butter(order, cutoff_freq / nyquist_freq, 'low')
    # Apply filter to signal
    audio = signal.filtfilt(coeff1, coeff2, audio)
    return audio 

def get_labels(folder):
    label_string = [file[-10:-7] for file in folder]
    labels = set(label_string)
    mapping = dict( zip(labels , range(len(labels)) ))
    return np.array([mapping[element] for element in label_string])
    

def get_mfcc(folder , numFeatures): 
    MFCC = np.empty([len(folder)  , numFeatures])
    for i , file in enumerate(folder):
        audio , sr = librosa.load(file , sr = 48000)
        audio = clean_audio(audio , sr)
        mfcc = librosa.feature.mfcc(audio, sr=sr, n_mfcc=numFeatures)
        MFCC[i] = np.mean(mfcc, axis=1)
    
    return MFCC 
    
def get_centroids(folder):
    SC = np.empty([len(folder)])
    
    for i , file in enumerate(folder):
        audio , sr = librosa.load(file , sr = 48000)
        audio = clean_audio(audio , sr)
        SC[i] = np.mean(librosa.feature.spectral_centroid(audio, sr=sr))
        
    return SC

def get_bispectrum(folder): #
    window_size = 1024
    hop_size = 512
    BS = np.empty([len(folder) , 513])
    
    for i,file in enumerate(folder):
        audio , sr = librosa.load(file , sr = 48000)
        audio = clean_audio(audio , sr)
        f, t, stft_data = stft(audio, fs=sr, window='hann', nperseg=window_size, noverlap=hop_size)

        # calculate the magnitude of the STFT data
        stft_mag = np.abs(stft_data)

        # compute the bispectrum using FFT convolutions
        stft_mag_fft = np.fft.fft(stft_mag, axis=0)
        stft_mag_fft_squared = np.abs(stft_mag_fft)**2
        bisp_data_fft = np.fft.ifft(stft_mag_fft_squared, axis=0)
        bisp_data = np.real(bisp_data_fft)

        # Normalize the bispectrum data
        bisp_data /= np.max(bisp_data)
        bisp_data = np.mean(bisp_data , axis=1) #taking average of frequencies over time
        BS[i] = bisp_data
        
    return BS

def get_chromagram(folder):
    hop_len = 512
    n = 36
    chroma = np.empty([len(folder) , n])
    
    for i , file in enumerate(folder):
        audio , sr = librosa.load(file, sr=48000)
        audio = clean_audio(audio , sr)
        chromagram = librosa.feature.chroma_cqt(y=audio, sr=sr, hop_length=hop_len, n_chroma=n)
        chroma[i] = np.mean(chromagram, axis=1)
    return chroma
    
def get_spectrogram(folder):
    hop_len = 512
    n = 36
    spec = np.empty([len(folder) , n])
    
    for i , file in enumerate(folder):
        audio , sr = librosa.load(file, sr=48000)
        audio = clean_audio(audio , sr)
        mel_spec = librosa.feature.melspectrogram(audio , sr=sr , n_fft = 2048 , hop_length = hop_len , n_mels = n)
        spec[i] = np.mean(mel_spec, axis=1)
    return spec

def get_features(folder_path): #iterates over every wav file to extract features
    n_mfcc=28
    folder = glob.glob(folder_path)
    mfcc = get_mfcc(folder , n_mfcc)
    bispectrum = get_bispectrum(folder)
    chroma = get_chromagram(folder)
    spec = get_spectrogram(folder)
    centroid = get_centroids(folder)
    #load data from storage if possible
    X = np.append(mfcc , np.append(chroma , centroid.reshape(-1,1) , axis = 1) , axis = 1)
    return X

In [None]:
%%time
folder_path = 'SentimentAnalysisDataset/NoiseAudioWAV/*'
numFeatures = 28
folder = glob.glob(folder_path)
X = get_features(folder_path)
Y = get_labels(glob.glob(folder_path))


In [15]:
#used these cells if you have data in hardrive
mfcc = np.load('mfcc.npy')
bispectrum = np.load('bispectrum.npy')
chroma = np.load('chroma.npy')
mel_spec = np.load('mel_spec.npy')
centroids = np.load('centroid.npy')

In [16]:
#load data from storage if possible
X = np.append(mfcc , np.append(chroma , centroids.reshape(-1,1) , axis = 1) , axis = 1)

# Normalize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Feature Selection


In [17]:
#using correlation 
def get_correlation(X , Y):
    correlation = np.empty([len(X)])
    for i , feature in enumerate(X) :
        correlation[i] = np.corrcoef(feature , Y)[0,1]
    return correlation

def get_features_corr(X , Y , k):#extracting features based on correlation
    corr = get_correlation(X.T , Y)
    corr = np.abs(corr)
    highest_corr = np.flip(np.argsort(corr))[:k]
    X = np.take(X , highest_corr , axis = 1)
    return X

In [18]:
var_thr = VarianceThreshold(threshold = 1)
# extracting features based on variance
X1 = var_thr.fit_transform(X)

#extracting features with highest correlation with Y 
X2 = get_features_corr(X , Y , 30)

#extracting features using recursive feature elimination
model = LogisticRegression(max_iter = 1000)
rfe = RFE(model, n_features_to_select=30)
X3 = rfe.fit_transform(X, Y)

# PCA
pca = PCA(n_components=30)
X4 = pca.fit_transform(X)


In [19]:
X_list = [X, X1, X2, X3, X4]

# Training

In [20]:
#train test split 
X_train , X_test , Y_train , Y_test = train_test_split(X3 , Y , train_size = 0.75 , shuffle = 1)

In [None]:
#this is code used to create a summary table of how different feature extracting methods have affected accuracy score
summary_acc = np.empty([len(X_list),5])
summary_f1 = np.empty([len(X_list) , 5])
for i , X0 in enumerate(X_list):
    accuracy=[]
    f1 = []
    
    X_train , X_test , Y_train , Y_test = train_test_split(X0 , Y , train_size = 0.75 , shuffle = 1)

    knn = KNeighborsClassifier(n_neighbors = 30)
    knn.fit(X_train , Y_train)
    prediction_knn = knn.predict(X_test)
    f1_score(Y_test , prediction_knn, average='weighted')
    accuracy.append(accuracy_score(Y_test , prediction_knn))
    f1.append(f1_score(Y_test , prediction_knn , average = 'macro') )
    
    
    log_reg = LogisticRegression(max_iter = 1000)
    log_reg.fit(X_train, Y_train)
    prediction_lr = log_reg.predict(X_test)
    accuracy.append(accuracy_score(Y_test , prediction_lr))
    f1.append(f1_score(Y_test , prediction_lr , average = 'macro') )

    
    
    svm_model = svm.SVC(kernel='rbf')
    svm_model.fit(X_train, Y_train)
    prediction_svm = svm_model.predict(X_test)
    accuracy.append(accuracy_score(Y_test , prediction_svm))
    f1.append(f1_score(Y_test , prediction_svm , average = 'macro') )

    
    gnb = GaussianNB()
    gnb.fit(X_train, Y_train)
    prediction_gnb = gnb.predict(X_test)
    accuracy.append(accuracy_score(Y_test , prediction_gnb))
    f1.append(f1_score(Y_test , prediction_gnb  , average = 'macro') )

    
    mlp = MLPClassifier(hidden_layer_sizes=(100, 100, 100), max_iter=1000)
    mlp.fit(X_train, Y_train)
    prediction_mlp = mlp.predict(X_test)
    accuracy.append(accuracy_score(Y_test , prediction_mlp))
    f1.append(f1_score(Y_test , prediction_mlp ,  average = 'macro'))

    
    summary_acc[i] = accuracy
    summary_f1[i] = f1

In [22]:
#rows indicate data after various selection techniques and columns are scores for each classifier
summary_acc


array([[0.41536808, 0.44760881, 0.48361096, 0.39011284, 0.42289092],
       [0.3707684 , 0.41106932, 0.44814616, 0.33207953, 0.38420204],
       [0.36808168, 0.42289092, 0.45351961, 0.33852767, 0.40515852],
       [0.4013971 , 0.43578721, 0.4545943 , 0.36969371, 0.40247179],
       [0.39387426, 0.40515852, 0.4615798 , 0.39871037, 0.41214401]])

In [23]:
summary_f1


array([[0.4090571 , 0.43856673, 0.47698498, 0.36059484, 0.42200913],
       [0.36742331, 0.39890981, 0.44367277, 0.31061586, 0.38298349],
       [0.35962583, 0.4141885 , 0.44384428, 0.31145215, 0.40437018],
       [0.38963205, 0.42576565, 0.44293021, 0.33844034, 0.39931523],
       [0.38377167, 0.39361662, 0.45549714, 0.39248463, 0.40908559]])