In [69]:
from google.colab import drive
drive.mount('/content/drive')
%cd /gdrive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/gdrive


In [70]:
!pwd

/gdrive


In [71]:
import soundfile
import numpy as np
import librosa
import glob
import os
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [72]:
int2emotion = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

AVAILABLE_EMOTIONS = {
    "angry",
    "sad",
    "neutral",
    "happy"
}

In [84]:
def extract_feature(file_name, **kwargs):
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
    return result

In [74]:
def load_dataset(test_size=0.2):
    X, y = [], []
    for file in glob.glob("/content/drive/MyDrive/data/data/Actor_*/*.wav"):
       
        basename = os.path.basename(file)
       
        emotion = int2emotion[basename.split("-")[2]]
    
        if emotion not in AVAILABLE_EMOTIONS:
            continue
     
        features = extract_feature(file, mfcc=True, chroma=True, mel=True)
        
        
        X.append(features)
        y.append(emotion)
  
    return train_test_split(np.array(X), y, test_size=test_size, random_state=7)
    

In [75]:
X_train, X_test, y_train, y_test = load_dataset(test_size=0.25)

print("[+] Number of training samples:", X_train.shape[0])

print("[+] Number of testing samples:", X_test.shape[0])

print("[+] Number of features:", X_train.shape[1])

model_params = {
    'alpha': 0.01,
    'batch_size': 256,
    'epsilon': 1e-08, 
    'hidden_layer_sizes': (300,), 
    'learning_rate': 'adaptive', 
    'max_iter': 500, 
}

model = MLPClassifier(**model_params)


print("[*] Training the model...")
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)

print("Accuracy: {:.2f}%".format(accuracy*100))


[+] Number of training samples: 504
[+] Number of testing samples: 168
[+] Number of features: 180
[*] Training the model...
Accuracy: 68.45%


In [76]:
from sklearn import metrics
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       angry       0.91      0.70      0.79        43
       happy       0.60      0.64      0.62        45
     neutral       0.64      0.48      0.55        29
         sad       0.65      0.82      0.72        51

    accuracy                           0.68       168
   macro avg       0.70      0.66      0.67       168
weighted avg       0.70      0.68      0.68       168



In [80]:
file = "/content/drive/MyDrive/data/test_file/audio1.wav"
print(file)

/content/drive/MyDrive/data/test_file/audio1.wav


In [82]:
features = extract_feature(file, mfcc=True, chroma=True, mel=True).reshape(1, -1)

In [83]:
result = model.predict(features)[0]

print("result:", result)

result: happy
