In [24]:
import librosa
import numpy as np
def extract_features(audio_path):
    # Load audio file
    y, sr = librosa.load(audio_path)
    y_trimmed, _ = librosa.effects.trim(y, top_db=30)
    # Extract pitch (fundamental frequency)
    pitches, magnitudes = librosa.piptrack(y=y_trimmed, sr=sr)
    pitch_values = [pitches[magnitudes[:, t].argmax(), t] for t in range(pitches.shape[1]) if pitches[magnitudes[:, t].argmax(), t] > 0]
    avg_pitch = np.mean(pitch_values) if pitch_values else 0
    
    # Extract MFCC features (13 coefficients)
    mfccs = librosa.feature.mfcc(y=y_trimmed, sr=sr, n_mfcc=13)
    mfcc_mean = np.mean(mfccs.T, axis=0)

    # #Extract mel spectrogram features
    # mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    # mel_mean = np.mean(mel_spectrogram.T, axis=0) 
    
    # Combine pitch and MFCCs into a single feature vector
    features = np.hstack([avg_pitch, mfcc_mean])
    
    return features

In [25]:
from sklearn.cluster import KMeans
import numpy as np
from glob import glob
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import os

# Assuming you have a function to extract features from audio files
# and a list that contains the actual gender labels (male = 0, female = 1)
# For example: actual_genders = [0, 1, 0, 1, ...] corresponding to audio files

audio_files = glob('C:/Users/saith/OneDrive/Desktop/DWDM/ravdess/*/*.wav')

actual_genders = []
for audio in audio_files:
    filename=os.path.basename(audio).split('/')[-1]
    parts=filename.split('-')
    actor_id=parts[6].split('.')[0]
    if int(actor_id) %2!=0:
        actual_genders.append("Male")
    else:
        actual_genders.append("Female")

# Extract features for each audio file
feature_data = np.array([extract_features(audio_file) for audio_file in audio_files])

# Standardize features
scaler = StandardScaler()
feature_data = scaler.fit_transform(feature_data)

# Apply KMeans with 2 clusters (for male and female)
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans.fit(feature_data)

# Get the cluster labels (0 or 1 for two clusters)
labels = kmeans.labels_

# Get the cluster centers (mean of the features in each cluster)
cluster_centers = kmeans.cluster_centers_

# Use domain knowledge to assign gender labels
# Typically, the cluster with the lower average pitch will correspond to male
if cluster_centers[0][0] < cluster_centers[1][0]:  # Compare based on average pitch
    gender_map = {0: "Male", 1: "Female"}
else:
    gender_map = {0: "Female", 1: "Male"}

# Predict gender for each data point
predicted_genders = ["Male" if gender_map[label] == "Male" else "Female" for label in labels]

# Calculate the accuracy of the prediction
accuracy = accuracy_score(actual_genders, predicted_genders)

# Output the accuracy
print(f"Accuracy of Gender Prediction: {accuracy * 100:.2f}%")


Accuracy of Gender Prediction: 78.75%
