<a href="https://colab.research.google.com/github/nurmukhammaddev/Audio-data-augmentation/blob/main/Extract_Features_Audio_Module.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import librosa
import pandas as pd
import os
from scipy.stats import  kurtosis, mode

In [2]:
def extract_mean_frequency(y, sr=22050):
    return librosa.feature.spectral_centroid(y=y, sr=sr).mean()

In [15]:
audio_file = "/content/common_voice_uz_38196557.mp3"

In [16]:
data, sr = librosa.load(audio_file)

In [17]:
data

array([-1.8018204e-13,  3.3075097e-13, -1.5452331e-13, ...,
        0.0000000e+00,  0.0000000e+00,  0.0000000e+00], dtype=float32)

In [18]:
menafreq = extract_mean_frequency(data)

In [19]:
menafreq

1429.1259942249133

In [20]:
def extract_standard_deviation(y, sr):
    return librosa.feature.spectral_centroid(y=y, sr=sr).std()

In [21]:
def extract_median(y, sr):
    return librosa.feature.spectral_centroid(y=y, sr=sr)[0]

In [22]:
def extract_kurtosis(y):
    return kurtosis(y)


In [23]:
def extract_spectral_flatness(y):
    return librosa.feature.spectral_flatness(y=y)[0]

def extract_mode(y):
    return mode(y)[0]

def extract_centroid(y, sr):
    return librosa.feature.spectral_centroid(y=y, sr=sr)[0]

def extract_peak(y, sr):
    return librosa.feature.spectral_rolloff(y=y, sr=sr)[0]

def extract_mean_function(y, sr):
    return librosa.feature.mfcc(y=y, sr=sr)[0].mean()

def extract_minimum_function(y, sr):
    return librosa.feature.mfcc(y=y, sr=sr).min()

def extract_maximum_function(y, sr):
    return librosa.feature.mfcc(y=y, sr=sr).max()

def extract_mean_dom(y, sr):
    return librosa.feature.spectral_bandwidth(y=y, sr=sr)[0].mean()

def extract_minimum_dom(y, sr):
    return librosa.feature.spectral_bandwidth(y=y, sr=sr).min()

def extract_maximum_dom(y, sr):
    return librosa.feature.spectral_bandwidth(y=y, sr=sr).max()

def extract_dfrange(maxdom, mindom):
    return maxdom - mindom

def extract_modindx(y, sr):
    return librosa.feature.tempogram(y=y, sr=sr).mean()

In [24]:
def extract_features(audio_file):
    y, sr = librosa.load(audio_file)

    meanfreq = extract_mean_frequency(y, sr)
    sd = extract_standard_deviation(y, sr)
    median = extract_median(y, sr)
    kurt = extract_kurtosis(y)
    sfm = extract_spectral_flatness(y)
    mode_ = extract_mode(y)
    centroid = extract_centroid(y, sr)
    peakf = extract_peak(y, sr)
    meanfun = extract_mean_function(y, sr)
    minfun = extract_minimum_function(y, sr)
    maxfun = extract_maximum_function(y, sr)
    meandom = extract_mean_dom(y, sr)
    mindom = extract_minimum_dom(y, sr)
    maxdom = extract_maximum_dom(y, sr)
    dfrange = extract_dfrange(maxdom, mindom)
    modindx = extract_modindx(y, sr)

    features = {
        'meanfreq': meanfreq,
        'sd': sd,
        'median': median,
        'kurt': kurt,
        'sfm': sfm,
        'mode': mode_,
        'centroid': centroid,
        'peakf': peakf,
        'meanfun': meanfun,
        'minfun': minfun,
        'maxfun': maxfun,
        'meandom': meandom,
        'mindom': mindom,
        'maxdom': maxdom,
        'dfrange': dfrange,
        'modindx': modindx
    }

    return features

In [25]:
def process_audio_directory(input_dir, output_file):
    # Initialize an empty list to store features
    features_list = []
    # Loop through all audio files in the directory
    for filename in os.listdir(input_dir):
        if filename.endswith('.mp3'):
            audio_file = os.path.join(input_dir, filename)
            features = extract_features(audio_file)
            features_list.append(features)

    # Convert the list of features to a DataFrame
    df = pd.DataFrame(features_list)

    # Save the DataFrame to a CSV file
    df.to_csv(output_file, index=False)

# Example usage:
input_directory = 'clips/'
output_csv_file = 'output_features_new.csv'
process_audio_directory(input_directory, output_csv_file)
