In [None]:
import glob
import librosa
import numpy as np
import pandas as pd
from pathlib import Path

# List to store the extracted features and labels
features_and_labels = []

# Iterate over each WAV file in the directory
for file in glob.glob("/content/drive/MyDrive/DATA/11/train/*.wav"):
    # Load audio file
    y, sr = librosa.load(file, sr=None, mono=True)
    # Extract the first 83 seconds of the audio
    segment = y[:int(83 * sr)]
    # Extract 13 MFCC features
    mfccs = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=13)
    # Extract file ID from filename
    filename = Path(file).stem
    file_id = filename.split("_")[0]
    # Append file ID and MFCC features to the list
    features_and_labels.append([file_id, mfccs])

# Read labels from a CSV file
labels_df = pd.read_csv('/content/drive/MyDrive/DATA/Form.csv')

# List to store training data with labels
training_data = []

# Match features with labels
for feature in features_and_labels:
    file_id = feature[0]
    mfccs = feature[1]
    # Find label corresponding to the file ID
    label = labels_df.loc[labels_df['FileID'] == file_id, 'Label'].values[0]
    # Append file ID, MFCC features, and label to the training data
    training_data.append([file_id, mfccs, label])

# Sort training data by file ID
training_data.sort(key=lambda x: x[0])

# Convert training data to numpy array
X_train = np.array([data[1] for data in training_data])

# Define function for z-score normalization
def z_score_normalize(data, mean=None, std=None):
    if mean is None:
        mean = np.mean(data, axis=(0, 1))
    if std is None:
        std = np.std(data, axis=(0, 1))
    normalized_data = (data - mean) / std
    return normalized_data, mean, std

# Normalize training data using z-score normalization
X_train_normalized, mean, std = z_score_normalize(X_train)


