In [None]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from numpy import array
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.utils import to_categorical

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel, contrast, tonnetz, poly):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma or contrast:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
        if poly:
            poly = np.mean(librosa.feature.poly_features(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, poly))

    return result

In [None]:
emotions={
  'NEU':'neutral',
  'HAP':'happy',
  'SAD':'sad',
  'ANG':'angry',
  'FEA':'fearful',
  'DIS':'disgust'
}
observed_emotions=[ 'happy', 'sad', 'angry', 'neutral' ]

In [None]:
# Load the data and extract features for each sound file
def load_data(test_size=0.25):
    x,y=[],[]
    for file in glob.glob("/content/drive/My Drive/Actor/Actor_*//*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("_")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True , contrast=True , tonnetz=True , poly=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [None]:
# Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

# New section

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
print(type(x_train))

In [None]:
# Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

In [None]:
# Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

In [None]:
values = np.array(y_train)
print(values)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
a = to_categorical(integer_encoded)


In [None]:
values = np.array(y_test)
print(values)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
b = to_categorical(integer_encoded)

In [None]:
# create the model
new_model = Sequential()
new_model.add(Dense(780, input_shape=(195,)))
new_model.add(Dense(780, activation='relu'))
new_model.add(Dropout(0.3))
new_model.add(Dense(195, activation='relu'))
new_model.add(Dropout(0.2))
new_model.add(Dense(4, activation='softmax'))
new_model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
print(new_model.summary())

In [None]:
# Train the model
new_model.fit(x_train, a, validation_data=(x_test, b), epochs=150, batch_size=10, verbose=2)

In [None]:
# Predict for the test set
Y_newpred=new_model.predict(x_test)
print(Y_newpred)

In [None]:
scores = new_model.evaluate(x_test, b, verbose=0)

In [None]:
print('Accuracy: {}% \n Error: {}%'.format(scores[1]*100, 100 - scores[1]*100))

In [None]:
res = extract_feature('Angry.wav', mfcc=True, chroma=True, mel=True , contrast=True , tonnetz=True , poly=True)
newpred = new_model.predict(sc.transform(np.array([res])))

a1=newpred[0]


if(a1[0] > a1[1] and a1[0] > a1[2] and a1[0] > a1[3]) :
    print('Angry')
elif(a1[1] > a1[0] and a1[1] > a1[2] and a1[1] > a1[3]):
    print('Happy')
elif(a1[2] > a1[1] and a1[2] > a1[0] and a1[2] > a1[3]):
     print('Neutral')
elif(a1[3] > a1[1] and a1[3] > a1[2] and a1[3] > a1[1]):
    print('Sad')