<a href="https://colab.research.google.com/github/kdk0411/Audio_Classification_Model/blob/main/Voice_preprocessing_class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import librosa
import os
import pathlib
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

class AudioClassifier:
    def __init__(self, wav_path, csv_path):
        self.wav_path = wav_path
        self.csv_path = csv_path

    def process_data(self):
        X_mfcc = []
        X_spec = []
        labels = []
        data_dir = pathlib.Path(self.wav_path)
        all_wav_paths = sorted(list(data_dir.glob('*.wav')))

        df = pd.read_csv(self.csv_path)
        cry_audio_file = df["Cry_Audio_File"]
        label = df["Label"]

        max_length = 188

        for wav_path_dir in all_wav_paths:
            file_name = os.path.basename(wav_path_dir)
            index = cry_audio_file[cry_audio_file == file_name].index[0]
            label_value = label[index]

            y, sr = librosa.load(wav_path_dir, sr=16000, duration=6)

            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
            if mfcc.shape[1] > max_length:
                mfcc = mfcc[:, :max_length]

            spec = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
            if spec.shape[1] > max_length:
                spec = spec[:, :max_length]

            X_mfcc.append(mfcc)
            X_spec.append(spec)
            labels.append(label_value)

        X_mfcc = np.array(X_mfcc)
        X_spec = np.array(X_spec)
        labels = np.array(labels)
        return X_mfcc, X_spec, labels

    def preprocess_data(self):
        X_mfcc, X_spec, labels = self.process_data()

        X_spec = np.expand_dims(X_spec, axis=-1)

        scaler_mfcc = StandardScaler()
        scaler_spec = StandardScaler()

        X_mfcc_scaled = scaler_mfcc.fit_transform(X_mfcc.reshape(-1, X_mfcc.shape[-1])).reshape(X_mfcc.shape)
        X_spec_scaled = scaler_spec.fit_transform(X_spec.reshape(-1, X_spec.shape[-1])).reshape(X_spec.shape)

        label_encoder = LabelEncoder()
        labels_encoded = label_encoder.fit_transform(labels)

        num_classes = len(label_encoder.classes_)
        labels_encoded = to_categorical(labels_encoded, num_classes=num_classes)

        return X_mfcc_scaled, X_spec_scaled, labels_encoded
