In [3]:
import os

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

# librosa is a Python library for analyzing audio and music.
# It can be used to extract the data from the audio files
import librosa 
import librosa.display
import audioread
# to play the audio files
from IPython.display import Audio
plt.style.use('seaborn-white')
import soundfile

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

import sys

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report

import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [13]:
RAV = "/Users/kokilareddy/Downloads/download/audio_speech_actors_01-24/"

In [14]:
dir = os.listdir(RAV)

males = []
females = [] 
        
for actor in dir:
       
    files = os.listdir(RAV + actor)
        
    for file in files: 
        part = file.split('.')[0]
        part = part.split("-")           
            
        temp = int(part[6])        
                
        if part[2] == '01':
            emotion = 'neutral'
        elif part[2] == '02':
            emotion = 'calm'
        elif part[2] == '03':
            emotion = 'happy'
        elif part[2] == '04':
            emotion = 'sad'
        elif part[2] == '05':
            emotion = 'angry'
        elif part[2] == '06':
            emotion = 'fear'
        elif part[2] == '07':
            emotion = 'disgust'
        elif part[2] == '08':
            emotion = 'surprise'
        else:
            emotion = 'unknown'
            
        if temp%2 == 0:
            path = (RAV + actor + '/' + file)
            #emotion = 'female_'+emotion
            females.append([emotion, path]) 
        else:
            path = (RAV + actor + '/' + file)
             #emotion = 'male_'+emotion
            males.append([emotion, path])   
    
   
RavFemales_df = pd.DataFrame(females)
RavFemales_df.columns = ['Emotion', 'Path']

RavMales_df = pd.DataFrame(males)
RavMales_df.columns = ['Emotion', 'Path']

print('RAVDESS datasets')
RavFemales_df.head()

RAVDESS datasets


Unnamed: 0,Emotion,Path
0,angry,/Users/kokilareddy/Downloads/download/audio_sp...
1,fear,/Users/kokilareddy/Downloads/download/audio_sp...
2,fear,/Users/kokilareddy/Downloads/download/audio_sp...
3,angry,/Users/kokilareddy/Downloads/download/audio_sp...
4,disgust,/Users/kokilareddy/Downloads/download/audio_sp...


In [15]:
RavFemales_df['Gender']='Female'
RavMales_df['Gender']='Male'

In [17]:
Audiodata=RavFemales_df.append(RavMales_df, ignore_index=True)

In [18]:
Audiodata

Unnamed: 0,Emotion,Path,Gender
0,angry,/Users/kokilareddy/Downloads/download/audio_sp...,Female
1,fear,/Users/kokilareddy/Downloads/download/audio_sp...,Female
2,fear,/Users/kokilareddy/Downloads/download/audio_sp...,Female
3,angry,/Users/kokilareddy/Downloads/download/audio_sp...,Female
4,disgust,/Users/kokilareddy/Downloads/download/audio_sp...,Female
...,...,...,...
1435,neutral,/Users/kokilareddy/Downloads/download/audio_sp...,Male
1436,calm,/Users/kokilareddy/Downloads/download/audio_sp...,Male
1437,calm,/Users/kokilareddy/Downloads/download/audio_sp...,Male
1438,happy,/Users/kokilareddy/Downloads/download/audio_sp...,Male


In [19]:
Audiodata.Emotion.unique() 

array(['angry', 'fear', 'disgust', 'sad', 'surprise', 'happy', 'calm',
       'neutral'], dtype=object)

In [20]:
Audiodata['Paths']=Audiodata['Path']+"@"+Audiodata['Emotion']
Paths=Audiodata['Paths']

In [21]:
def extract_features(path):
    data,sr=librosa.load(path , res_type='kaiser_fast', duration=2.5, offset=0.6)
    result = np.array([])    
    mfccs = librosa.feature.mfcc(y=data, sr=22050, n_mfcc=100)
    mfccs_processed = np.mean(mfccs.T,axis=0)
    result = np.array(mfccs_processed)    
    return result


# Load the data and  to extract features for each Audio file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in Paths:
        File=file.split("@")[0]
        emotion=file.split("@")[1]
        feature=extract_features(File)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=15)

In [22]:
# Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.2)

In [23]:
# Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

(1152, 288)


In [24]:
# Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 100


In [25]:
# Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.1, learning_rate='adaptive', max_iter= 550 , hidden_layer_sizes= 750, activation='relu')

In [26]:
# Train the model
model.fit(x_train,y_train)

MLPClassifier(alpha=0.1, hidden_layer_sizes=750, learning_rate='adaptive',
              max_iter=550)

In [27]:
# - Predict for the test set
y_pred=model.predict(x_test)

In [28]:
# - Calculate the accuracy of our model
accuracy=accuracy_score(y_test, y_pred )

# - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 66.32%


In [29]:
# - Predict for the train set
y_pred=model.predict(x_train)
# - Calculate the accuracy of our model
accuracy=accuracy_score(y_train, y_pred )

# - Print the accuracy
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 100.00%


In [30]:
path="/Users/kokilareddy/Downloads/download/AudioData/DC_h10.wav"
x=[]
feature=extract_features(path)
x.append(feature)
test=np.array(x)
y_pred=model.predict(test)
y_pred

array(['happy'], dtype='<U8')

In [31]:
path="/Users/kokilareddy/Downloads/download/TESS Toronto emotional speech set data/OAF_Pleasant_surprise/OAF_dab_ps.wav"
x=[]
feature=extract_features(path)
x.append(feature)
test=np.array(x)
y_pred=model.predict(test)
y_pred

array(['sad'], dtype='<U8')

In [32]:
path="/Users/kokilareddy/Downloads/download/audio_speech_actors_01-24/Actor_20/03-01-02-02-02-02-20.wav"
x=[]
feature=extract_features(path)
x.append(feature)
test=np.array(x)
y_pred=model.predict(test)
y_pred

array(['calm'], dtype='<U8')

In [278]:
import sounddevice as sd
import numpy as np
from scipy.io.wavfile import write 
# import wavio as wv
import scipy.io.wavfile as wav

fs=44100
duration = 5  # seconds
myrecording = sd.rec(duration * fs, samplerate=fs, channels=2,dtype='float64')
print("Recording Audio")
sd.wait()
print("Audio recording complete ,checking emotion")
write("recording0.wav", fs, myrecording)
path="recording0.wav"
x=[]
feature=extract_features(path)
x.append(feature)
test=np.array(x)
y_pred=model.predict(test)
y_pred

Recording Audio
Audio recording complete ,checking emotion


array(['angry'], dtype='<U8')

### 