In [None]:

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install soundfile

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import soundfile
import numpy as np
import librosa
import glob
import os
from sklearn.model_selection import train_test_split

# all emotions on RAVDESS dataset
int2emotion = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

In [None]:
def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
    return result

In [None]:
def load_data(fi, test_size=0.2):
    X, y = [], []
    try :
      for file in (fi):
          # get the base name of the audio file
          basename = os.path.basename(file)
          print(basename)
          # get the emotion label
          emotion = int2emotion[basename.split("-")[2]]
          #we allow only AVAILABLE_EMOTIONS we set
          #if emotion not in AVAILABLE_EMOTIONS:
              #continue
          # extract speech features
          features = extract_feature(file, mfcc=True, chroma=True, mel=True)
          # add to data
          X.append(features)
          y.append(emotion)
    except :
         pass
    # split the data to training and testing and return it
    return train_test_split(np.array(X), y, test_size=test_size, random_state=7)

In [None]:
# Directory containing the WAV files
directory = "/content/drive/MyDrive/audio_speech_actors_01-24/Actor_21"

# Get the file paths of all WAV files in the directory
file_paths = glob.glob(os.path.join(directory, "*.wav"))


X_train, X_test, y_train, y_test = load_data(file_paths, test_size=0.25)
# print some details
# number of samples in training data
print("[+] Number of training samples:", X_train.shape[0])
# number of samples in testing data
print("[+] Number of testing samples:", X_test.shape[0])
# number of features used
# this is a vector of features extracted
# using utils.extract_features() method
print("[+] Number of features:", X_train.shape[1])

03-01-02-01-02-01-21.wav
03-01-02-01-01-01-21.wav
03-01-01-01-02-01-21.wav
03-01-01-01-01-02-21.wav
03-01-02-01-01-02-21.wav
03-01-01-01-01-01-21.wav
03-01-01-01-02-02-21.wav
03-01-05-01-01-01-21.wav
03-01-02-01-02-02-21.wav
03-01-05-01-02-02-21.wav
03-01-03-02-01-01-21.wav
03-01-07-01-02-01-21.wav
03-01-02-02-02-02-21.wav
03-01-04-01-01-01-21.wav
03-01-06-02-01-02-21.wav
03-01-04-02-02-02-21.wav
03-01-07-01-01-02-21.wav
03-01-04-01-02-01-21.wav
03-01-06-01-02-02-21.wav
03-01-05-02-02-02-21.wav
03-01-07-01-01-01-21.wav
03-01-03-01-02-02-21.wav
03-01-03-02-01-02-21.wav
03-01-05-01-02-01-21.wav
03-01-07-01-02-02-21.wav
03-01-06-02-02-02-21.wav
03-01-05-02-02-01-21.wav
03-01-06-01-02-01-21.wav
03-01-07-02-01-01-21.wav
03-01-08-01-01-01-21.wav
03-01-06-01-01-02-21.wav
03-01-08-01-02-01-21.wav
03-01-04-02-02-01-21.wav
03-01-03-01-01-02-21.wav
03-01-07-02-02-01-21.wav
03-01-06-02-01-01-21.wav
03-01-04-01-02-02-21.wav
03-01-05-01-01-02-21.wav
03-01-03-01-02-01-21.wav
03-01-04-01-01-02-21.wav


In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.svm import SVC
svm_model_linear = SVC(kernel = 'rbf', C = 1).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)


print(accuracy_score(y_true=y_test,y_pred=svm_predictions))
print(classification_report(y_test,svm_predictions))
# creating a confusion matrix
print(confusion_matrix(y_test, svm_predictions) )

0.2
              precision    recall  f1-score   support

       angry       0.12      1.00      0.22         1
        calm       0.29      1.00      0.44         2
     disgust       0.00      0.00      0.00         3
     fearful       0.00      0.00      0.00         2
       happy       0.00      0.00      0.00         2
         sad       0.00      0.00      0.00         3
   surprised       0.00      0.00      0.00         2

    accuracy                           0.20        15
   macro avg       0.06      0.29      0.10        15
weighted avg       0.05      0.20      0.07        15

[[1 0 0 0 0 0 0]
 [0 2 0 0 0 0 0]
 [1 2 0 0 0 0 0]
 [2 0 0 0 0 0 0]
 [1 1 0 0 0 0 0]
 [1 2 0 0 0 0 0]
 [2 0 0 0 0 0 0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.svm import SVC
svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)


print(accuracy_score(y_true=y_test,y_pred=svm_predictions))
print(classification_report(y_test,svm_predictions))
# creating a confusion matrix
print(confusion_matrix(y_test, svm_predictions) )


0.9333333333333333
              precision    recall  f1-score   support

       angry       1.00      1.00      1.00         1
        calm       1.00      1.00      1.00         2
     disgust       1.00      1.00      1.00         3
     fearful       1.00      1.00      1.00         2
       happy       1.00      1.00      1.00         2
     neutral       0.00      0.00      0.00         0
         sad       1.00      0.67      0.80         3
   surprised       1.00      1.00      1.00         2

    accuracy                           0.93        15
   macro avg       0.88      0.83      0.85        15
weighted avg       1.00      0.93      0.96        15

[[1 0 0 0 0 0 0 0]
 [0 2 0 0 0 0 0 0]
 [0 0 3 0 0 0 0 0]
 [0 0 0 2 0 0 0 0]
 [0 0 0 0 2 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 2 0]
 [0 0 0 0 0 0 0 2]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.svm import SVC
svm_model_linear = SVC().fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)


print(accuracy_score(y_true=y_test,y_pred=svm_predictions))
print(classification_report(y_test,svm_predictions))
# creating a confusion matrix
print(confusion_matrix(y_test, svm_predictions) )

0.2
              precision    recall  f1-score   support

       angry       0.12      1.00      0.22         1
        calm       0.29      1.00      0.44         2
     disgust       0.00      0.00      0.00         3
     fearful       0.00      0.00      0.00         2
       happy       0.00      0.00      0.00         2
         sad       0.00      0.00      0.00         3
   surprised       0.00      0.00      0.00         2

    accuracy                           0.20        15
   macro avg       0.06      0.29      0.10        15
weighted avg       0.05      0.20      0.07        15

[[1 0 0 0 0 0 0]
 [0 2 0 0 0 0 0]
 [1 2 0 0 0 0 0]
 [2 0 0 0 0 0 0]
 [1 1 0 0 0 0 0]
 [1 2 0 0 0 0 0]
 [2 0 0 0 0 0 0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import pickle
file="svm_model.pkl"
pickle.dump(svm_model_linear,open(file,'wb'))



In [None]:
from google.colab import files
files.download('svm_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
pickled_model = pickle.load(open('svm_model.pkl', 'rb'))
pickled_model.predict(X_test)

array(['angry', 'angry', 'angry', 'calm', 'angry', 'calm', 'calm',
       'angry', 'calm', 'angry', 'angry', 'calm', 'calm', 'calm', 'angry'],
      dtype='<U9')