In [3]:
import os
import librosa
import numpy as np
import csv
from sklearn.mixture import GaussianMixture

# Define the number of MFCCs globally so it's accessible throughout the script
n_mfcc = 13

def extract_mfcc_features(audio_path, n_mfcc=n_mfcc):
    try:
        audio, sample_rate = librosa.load(audio_path, sr=None)
        mfcc_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
        return mfcc_features.T
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

def process_audio_files(folder_path):
    all_features = []
    all_files = []
    for file in os.listdir(folder_path):
        if file.endswith('.flac'):
            file_path = os.path.join(folder_path, file)
            mfcc_features = extract_mfcc_features(file_path)
            if mfcc_features is not None:
                all_features.append(mfcc_features)  # Append features of each file
                all_files.append(file_path)  # Keep track of the file path
    return all_features, all_files

# Define the paths to your audio folders

folder_paths = [
    '/Users/chavimangla/Downloads/dummy_data/audiomnist',
    '/Users/chavimangla/Downloads/dummy_data/ECSC',
    '/Users/chavimangla/Downloads/dummy_data/crema-d',
    '/Users/chavimangla/Downloads/dummy_data/english_children',
    '/Users/chavimangla/Downloads/dummy_data/SpeechAccentKaggle'
]

# Process each folder and collect all MFCC features
all_mfcc_features = []
all_file_names = []
for folder_path in folder_paths:
    features, file_names = process_audio_files(folder_path)
    all_mfcc_features.extend(features)
    all_file_names.extend(file_names)

# Check if we have extracted any features
if not all_mfcc_features:
    print("No features extracted. Please check your dataset.")
else:
    # Combine all features into one numpy array
    combined_features = np.vstack(all_mfcc_features)
    
  # Create a CSV file to store the extracted features
csv_file_path = 'mfcc_features.csv'
with open(csv_file_path, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    header = ['file_name'] + [f'mfcc_{i+1}' for i in range(n_mfcc)]
    csvwriter.writerow(header)
    
    # Write the features from each file to the CSV
    for file_name, features in zip(all_file_names, all_mfcc_features):
        # Flatten the 2D feature array to 1D
        flattened_features = features.flatten()
        # Write the file name and the flattened features to the CSV
        csvwriter.writerow([file_name] + flattened_features.tolist())


    print(f"MFCC features saved to {csv_file_path}")

    # Train a Gaussian Mixture Model (GMM)
    n_components = 5  # Adjust as necessary
    gmm = GaussianMixture(n_components=n_components, covariance_type='diag', max_iter=200, random_state=0)
    gmm.fit(combined_features)
    print("GMM trained successfully!")


MFCC features saved to mfcc_features.csv
GMM trained successfully!


In [4]:
# After the GMM has been trained
train_log_likelihood = gmm.score(combined_features)
print(f"Log-likelihood of training data: {train_log_likelihood}")


Log-likelihood of training data: -56.988110957712


In [10]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.mixture import GaussianMixture

# Define the number of MFCCs globally
n_mfcc = 13

# Function to extract MFCC features from an audio file
def extract_mfcc_features(audio_path, n_mfcc=n_mfcc):
    try:
        audio, sample_rate = librosa.load(audio_path, sr=None)
        mfcc_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
        return mfcc_features.T
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

# Function to process audio files and extract features and labels using metadata
def process_audio_files(folder_path, metadata_df):
    all_features = []
    all_labels = []
    for file in os.listdir(folder_path):
        if file.endswith('.flac'):
            file_path = os.path.join(folder_path, file)
            mfcc_features = extract_mfcc_features(file_path)
            if mfcc_features is not None:
                # Get the label from the metadata dataframe
                age_band = metadata_df.loc[metadata_df['filename'] == os.path.basename(file)]['age band'].values[0]
                all_features.append(mfcc_features)
                all_labels.append(age_band)
    return all_features, all_labels

# Paths to audio folders and corresponding metadata CSVs
audio_metadata_pairs = [
    ('/Users/chavimangla/Downloads/dummy_data/audiomnist', '/Users/chavimangla/Downloads/dummy_data/metdata/audioMNIST.csv'),
    ('/Users/chavimangla/Downloads/dummy_data/ECSC', '/Users/chavimangla/Downloads/dummy_data/metdata/ECSC_metadata (1).csv'),
    ('/Users/chavimangla/Downloads/dummy_data/crema-d', '/Users/chavimangla/Downloads/dummy_data/metdata/crema-d (1).csv'),
    ('/Users/chavimangla/Downloads/dummy_data/english_children', '/Users/chavimangla/Downloads/dummy_data/metdata/english_children_metadata (1).csv'),
    ('/Users/chavimangla/Downloads/dummy_data/SpeechAccentKaggle', '/Users/chavimangla/Downloads/dummy_data/metdata/accent_kaggle_metadata (1).csv')
]


# Process each folder and collect all MFCC features and labels
all_mfcc_features = []
all_age_bands = []
for folder_path, metadata_path in audio_metadata_pairs:
    metadata_df = pd.read_csv(metadata_path)
    features, age_bands = process_audio_files(folder_path, metadata_df)
    all_mfcc_features.extend(features)
    all_age_bands.extend(age_bands)

# Aggregate the MFCC features for each file by averaging over the frames
aggregated_mfcc_features = [np.mean(mfcc, axis=0) for mfcc in all_mfcc_features]

# Create a DataFrame from the aggregated features
combined_df = pd.DataFrame(aggregated_mfcc_features)

# Add the age bands to the DataFrame
combined_df['age_band'] = all_age_bands

# Save the combined features and age bands to a CSV file
combined_csv_path = '/Users/chavimangla/Downloads/dummy_data/combined_mfcc_features_age_band.csv'
combined_df.to_csv(combined_csv_path, index=False)
print(f"Combined MFCC features and age bands saved to {combined_csv_path}")

# Load the combined features and age bands
df = pd.read_csv(combined_csv_path)
X = df.drop(columns=['age_band']).to_numpy()
y = df['age_band'].to_numpy()

# Split the features and age bands into training and testing sets (70-30 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Gaussian Mixture Model (GMM) on the training set
gmm = GaussianMixture(n_components=5, covariance_type='diag', max_iter=200, random_state=0)
gmm.fit(X_train)

# Evaluate the GMM using the log-likelihood on the test set
log_likelihood = gmm.score(X_test)
print(f"Log-likelihood on test data: {log_likelihood}")


Combined MFCC features and age bands saved to /Users/chavimangla/Downloads/dummy_data/combined_mfcc_features_age_band.csv
Log-likelihood on test data: -48.810523741207135


In [11]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Define the number of MFCCs globally
n_mfcc = 13

def extract_mfcc_features(audio_path, n_mfcc=n_mfcc):
    try:
        audio, sample_rate = librosa.load(audio_path, sr=None)
        mfcc_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
        return mfcc_features.T
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

def process_audio_files(folder_path, metadata_df):
    all_features = []
    all_labels = []
    for file in os.listdir(folder_path):
        if file.endswith('.flac'):
            file_path = os.path.join(folder_path, file)
            mfcc_features = extract_mfcc_features(file_path)
            if mfcc_features is not None:
                age_band = metadata_df.loc[metadata_df['filename'] == os.path.basename(file)]['age band'].values[0]
                all_features.append(mfcc_features)
                all_labels.append(age_band)
    return all_features, all_labels

audio_metadata_pairs = [
    ('/Users/chavimangla/Downloads/dummy_data/audiomnist', '/Users/chavimangla/Downloads/dummy_data/metdata/audioMNIST.csv'),
    ('/Users/chavimangla/Downloads/dummy_data/ECSC', '/Users/chavimangla/Downloads/dummy_data/metdata/ECSC_metadata (1).csv'),
    ('/Users/chavimangla/Downloads/dummy_data/crema-d', '/Users/chavimangla/Downloads/dummy_data/metdata/crema-d (1).csv'),
    ('/Users/chavimangla/Downloads/dummy_data/english_children', '/Users/chavimangla/Downloads/dummy_data/metdata/english_children_metadata (1).csv'),
    ('/Users/chavimangla/Downloads/dummy_data/SpeechAccentKaggle', '/Users/chavimangla/Downloads/dummy_data/metdata/accent_kaggle_metadata (1).csv')
]


all_mfcc_features = []
all_age_bands = []
for folder_path, metadata_path in audio_metadata_pairs:
    metadata_df = pd.read_csv(metadata_path)
    features, age_bands = process_audio_files(folder_path, metadata_df)
    all_mfcc_features.extend(features)
    all_age_bands.extend(age_bands)

aggregated_mfcc_features = [np.mean(mfcc, axis=0) for mfcc in all_mfcc_features]

combined_df = pd.DataFrame(aggregated_mfcc_features)
combined_df['age_band'] = all_age_bands
combined_csv_path = 'combined_mfcc_features_age_band.csv'
combined_df.to_csv(combined_csv_path, index=False)
print(f"Combined MFCC features and age bands saved to {combined_csv_path}")

df = pd.read_csv(combined_csv_path)
X = df.drop(columns=['age_band']).to_numpy()
y = df['age_band'].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict the age bands on the test set
y_pred = rf_classifier.predict(X_test)

# Calculate and print evaluation metrics
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Precision: {precision_score(y_test, y_pred, average='weighted')}")
print(f"Recall: {recall_score(y_test, y_pred, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, y_pred, average='weighted')}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Combined MFCC features and age bands saved to combined_mfcc_features_age_band.csv
Accuracy: 0.6848626925653047
Precision: 0.6674772217847404
Recall: 0.6848626925653047
F1 Score: 0.6449249605559396

Classification Report:
               precision    recall  f1-score   support

     12 - 20       0.00      0.00      0.00        68
     20 - 29       0.69      0.94      0.79      3215
     30 - 39       0.66      0.47      0.55      1390
     40 - 49       0.50      0.25      0.33       485
     50 - 59       0.67      0.21      0.32       338
     60 - 69       0.89      0.20      0.33       201
     70 - 79       0.00      0.00      0.00        40
        < 12       0.94      0.84      0.89       228
       >= 80       0.00      0.00      0.00         7

    accuracy                           0.68      5972
   macro avg       0.48      0.32      0.36      5972
weighted avg       0.67      0.68      0.64      5972



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
