In [None]:
import os
import librosa
import numpy as np
import soundfile as sf


DATASET_PATH = "ravdess_dataset"


def load_and_preprocess_audio(file_path, target_sr=16000):
  
    audio, sr = librosa.load(file_path, sr=target_sr)


    noise_reduction = audio - librosa.effects.preemphasis(audio)


    normalized_audio = librosa.util.normalize(noise_reduction)
    
    return normalized_audio, sr


def process_dataset(dataset_path):
    audio_data = []
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                audio, sr = load_and_preprocess_audio(file_path)
                audio_data.append((file, audio, sr))
                print(f"Processed: {file}")
    return audio_data


processed_data = process_dataset(DATASET_PATH)
print(f"Total files processed: {len(processed_data)}")


Processed: 03-01-01-01-01-01-01.wav
Processed: 03-01-01-01-01-02-01.wav
Processed: 03-01-01-01-02-01-01.wav
Processed: 03-01-01-01-02-02-01.wav
Processed: 03-01-02-01-01-01-01.wav
Processed: 03-01-02-01-01-02-01.wav
Processed: 03-01-02-01-02-01-01.wav
Processed: 03-01-02-01-02-02-01.wav
Processed: 03-01-02-02-01-01-01.wav
Processed: 03-01-02-02-01-02-01.wav
Processed: 03-01-02-02-02-01-01.wav
Processed: 03-01-02-02-02-02-01.wav
Processed: 03-01-03-01-01-01-01.wav
Processed: 03-01-03-01-01-02-01.wav
Processed: 03-01-03-01-02-01-01.wav
Processed: 03-01-03-01-02-02-01.wav
Processed: 03-01-03-02-01-01-01.wav
Processed: 03-01-03-02-01-02-01.wav
Processed: 03-01-03-02-02-01-01.wav
Processed: 03-01-03-02-02-02-01.wav
Processed: 03-01-04-01-01-01-01.wav
Processed: 03-01-04-01-01-02-01.wav
Processed: 03-01-04-01-02-01-01.wav
Processed: 03-01-04-01-02-02-01.wav
Processed: 03-01-04-02-01-01-01.wav
Processed: 03-01-04-02-01-02-01.wav
Processed: 03-01-04-02-02-01-01.wav
Processed: 03-01-04-02-02-02

In [None]:
import csv


def extract_features(audio, sr):

    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs.T, axis=0)


    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
    spectral_centroid_mean = np.mean(spectral_centroid)


    zcr = librosa.feature.zero_crossing_rate(y=audio)
    zcr_mean = np.mean(zcr)


    rmse = librosa.feature.rms(y=audio)
    rmse_mean = np.mean(rmse)


    return np.hstack([mfccs_mean, spectral_centroid_mean, zcr_mean, rmse_mean])


def save_features_to_csv(processed_data, output_file='audio_features.csv'):
    header = [
        'File_Name',
        *[f'MFCC_{i+1}' for i in range(13)],
        'Spectral_Centroid', 'ZCR', 'RMSE', 'Label'
    ]

    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(header)

        for file_name, audio, sr in processed_data:
            
            features = extract_features(audio, sr)

           
            label = file_name.split('-')[2]  

            
            writer.writerow([file_name, *features, label])
            print(f"Features extracted for: {file_name}")


save_features_to_csv(processed_data)
print("Feature extraction and saving completed.")


Features extracted for: 03-01-01-01-01-01-01.wav
Features extracted for: 03-01-01-01-01-02-01.wav
Features extracted for: 03-01-01-01-02-01-01.wav
Features extracted for: 03-01-01-01-02-02-01.wav
Features extracted for: 03-01-02-01-01-01-01.wav
Features extracted for: 03-01-02-01-01-02-01.wav
Features extracted for: 03-01-02-01-02-01-01.wav
Features extracted for: 03-01-02-01-02-02-01.wav
Features extracted for: 03-01-02-02-01-01-01.wav
Features extracted for: 03-01-02-02-01-02-01.wav
Features extracted for: 03-01-02-02-02-01-01.wav
Features extracted for: 03-01-02-02-02-02-01.wav
Features extracted for: 03-01-03-01-01-01-01.wav
Features extracted for: 03-01-03-01-01-02-01.wav
Features extracted for: 03-01-03-01-02-01-01.wav
Features extracted for: 03-01-03-01-02-02-01.wav
Features extracted for: 03-01-03-02-01-01-01.wav
Features extracted for: 03-01-03-02-01-02-01.wav
Features extracted for: 03-01-03-02-02-01-01.wav
Features extracted for: 03-01-03-02-02-02-01.wav
Features extracted f

In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score


def load_data(csv_file='audio_features.csv'):
    data = pd.read_csv(csv_file)
    X = data.iloc[:, 1:-1].values 
    y = data.iloc[:, -1].values   
    return X, y


def fix_labels(y):
    return y - 1  


X, y = load_data()
y = fix_labels(y)  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


svm_model = SVC(probability=True, kernel='rbf', C=1.0, gamma='scale')
svm_model.fit(X_train, y_train)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

xgb_model = xgb.XGBClassifier(objective='multi:softprob', num_class=len(np.unique(y)), random_state=42)
xgb_model.fit(X_train, y_train)

svm_probs = svm_model.predict_proba(X_train)
rf_probs = rf_model.predict_proba(X_train)
xgb_probs = xgb_model.predict_proba(X_train)

combined_features_train = np.hstack([svm_probs, rf_probs, xgb_probs])

meta_model = LogisticRegression(max_iter=1000)
meta_model.fit(combined_features_train, y_train)

svm_probs_test = svm_model.predict_proba(X_test)
rf_probs_test = rf_model.predict_proba(X_test)
xgb_probs_test = xgb_model.predict_proba(X_test)

combined_features_test = np.hstack([svm_probs_test, rf_probs_test, xgb_probs_test])

y_pred = meta_model.predict(combined_features_test)


accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy (Stacking): {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_report(y_test, y_pred))


joblib.dump(meta_model, 'meta_model.joblib')
joblib.dump(scaler, 'scaler.joblib')

joblib.dump(svm_model, 'svm_model.joblib')
joblib.dump(rf_model, 'rf_model.joblib')
joblib.dump(xgb_model, 'xgb_model.joblib')

print("Models and meta-model saved.")


Model Accuracy (Stacking): 90.97%
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.76      0.86        42
           1       0.89      0.97      0.93        69
           2       0.93      0.93      0.93        82
           3       0.83      0.93      0.88        61
           4       0.87      0.87      0.87        79
           5       0.90      0.93      0.91        80
           6       0.95      0.93      0.94        84
           7       0.95      0.90      0.92        79

    accuracy                           0.91       576
   macro avg       0.91      0.90      0.91       576
weighted avg       0.91      0.91      0.91       576

Models and meta-model saved.
