In [46]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import sys
import wave
import struct
from scipy import signal
from itertools import chain
from numpy import *
from scipy import *

In [47]:
#DataFlair - Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
        result=np.hstack((result,decision(file_name)))
    return result

In [48]:
def readWaveFile(fileName):
    waveFile = wave.open(fileName,'r')
    channels = waveFile.getnchannels()
    sampWidth = waveFile.getsampwidth()
    framesNumber = waveFile.getnframes()
    frameRate = waveFile.getframerate()
    frames = waveFile.readframes(framesNumber)
    data = struct.unpack("%dh" %  channels*framesNumber, frames)
    oneChannelData = data[::channels]
    oneChannelData = list(chain(oneChannelData))
    framesNumber = len(oneChannelData)
    waveFile.close()
    return oneChannelData, channels, sampWidth, frameRate, framesNumber

In [49]:
def getFreq(data,framesNumber,frameRate):
    time = float(framesNumber) / frameRate
    data = data * signal.nuttall(framesNumber)
    dataFFT = fft(data)
    absFFT = abs(dataFFT)
    logAbsFFT = np.log(absFFT)
    hps = copy(logAbsFFT)
    for h in np.arange(2, 6):
        decim = signal.decimate(logAbsFFT, int(h))
        hps[:len(decim)] += decim
    start = 150
    peak = np.argmax(hps[start::])
    fundamental = ((start+peak)/time)
    return fundamental

In [50]:
a=0
b=0
def decision(file_name):
    data, channels, sampwidth, frameRate, framesNumber = readWaveFile(file_name)
    freq = getFreq(data, framesNumber, frameRate)
    global a
    global b
    if freq>200:
        a=a+1
        return [1]
    else:
        b=b+1
        return [-1]

In [51]:
#DataFlair - Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}
#DataFlair - Emotions to observe
observed_emotions=['neutral', 'happy', 'sad', 'angry']

In [52]:
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("C:\\Users\\hp\\Documents\\Study Material\\OE-DM\\Speech-Emotion-Recogniser\\speech-emotion-recognition-ravdess-data\\Actor_*\\*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        intensity = file_name.split("-")[3]
        #if int(intensity)== 2:
        #    continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    for file in glob.glob("C:\\Users\\hp\\Documents\\Study Material\\OE-DM\\Speech-Emotion-Recogniser\\speech-emotion-recognition-ravdess-data\\Actor_*\\*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        intensity = file_name.split("-")[3]
        #if int(intensity)== 2:
        #    continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, train_size= 0.8, random_state=9)

In [53]:
#DataFlair - Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.2)

  b = a[a_slice]
  return y[sl]


In [54]:
#DataFlair - Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

(1075, 269)


In [55]:
#DataFlair - Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 181


In [56]:
#DataFlair - Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.001, batch_size=32, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=250)

In [57]:
#DataFlair - Train the model
model.fit(x_train,y_train)

MLPClassifier(alpha=0.001, batch_size=32, hidden_layer_sizes=(300,),
              learning_rate='adaptive', max_iter=250)

In [58]:
#DataFlair - Predict for the test set
y_pred=model.predict(x_test)

In [59]:
#DataFlair - Calculate the accuracy of our model
accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)
#DataFlair - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 91.45%


In [60]:
print(a,b)

734 610
