In [3]:
import os
import librosa
import pandas as pd
import numpy as np

def extract_features(file_path):
    y, sr = librosa.load(file_path)
    
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    
    features = np.concatenate([
        np.mean(mfccs, axis=1),
        [np.mean(spectral_centroid)],
        [np.mean(spectral_bandwidth)],
        [np.mean(spectral_rolloff)],
        [np.mean(zero_crossing_rate)]
    ])
    
    return features

def process_folder(folder_path, label):
    features_list = []
    for file in os.listdir(folder_path):
        if file.endswith('.wav'):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            features_list.append(np.concatenate([[label], features]))
    return features_list

dataset_path = 'dataset'

male_features = process_folder(os.path.join(dataset_path, 'males'), 'male')
female_features = process_folder(os.path.join(dataset_path, 'females'), 'female')

all_features = male_features + female_features

columns = ['label'] + [
    f'mfcc_{i+1}' for i in range(13)
] + [
    'spectral_centroid',
    'spectral_bandwidth',
    'spectral_rolloff',
    'zero_crossing_rate'
]

df = pd.DataFrame(all_features, columns=columns)

df.to_csv('extracted.csv', index=False)