In [1]:
import librosa
import librosa.display
import IPython.display as ipd
import os
import numpy as np

In [109]:
import os
import random
import numpy as np
import pandas as pd
import librosa

# Collect audio files from the last level of subdirectories
def collect_audio_files_dfs(root_dir, num_files_per_dir=20):
    selected_files = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        if not dirnames:  # Check if there are no more subdirectories
            audio_files = [os.path.join(dirpath, file) for file in filenames if file.endswith('.wav') or file.endswith('.flac')]
            selected_files.extend(random.sample(audio_files, min(num_files_per_dir, len(audio_files))))
    return selected_files

# Feature extraction functions
def extract_mfcc(y):
    return np.mean(librosa.feature.mfcc(y=y, sr=16000, n_mfcc=13).T, axis=0)

def extract_delta(y):
    return np.mean(librosa.feature.delta(librosa.feature.mfcc(y=y, sr=16000, n_mfcc=13)).T, axis=0)

def extract_mel_spectrogram(y):
    return np.mean(librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=16000), ref=np.max).T, axis=0)

def extract_zero_crossing_rate(y):
    return np.mean(librosa.feature.zero_crossing_rate(y).T, axis=0)

def extract_chroma(y):
    return np.mean(librosa.feature.chroma_stft(y=y, sr=16000).T, axis=0)

def extract_cqt(y):
    return np.mean(np.abs(librosa.cqt(y, sr=16000)).T, axis=0)

# Preprocess audio and extract features
def preprocess_and_extract_features(file):
    y, sr = librosa.load(file, sr=16000)  # Load the audio file

    # Trim silence from the beginning and end
    y, _ = librosa.effects.trim(y)

    # Pad or truncate to ensure audio is 5 seconds long
    max_len = sr * 5
    if len(y) < max_len:
        y = np.pad(y, (0, max_len - len(y)), mode='constant')
    else:
        y = y[:max_len]

    # Extract features
    mfcc = extract_mfcc(y)
    delta_mfcc = extract_delta(y)
    mel_spectrogram = extract_mel_spectrogram(y)
    zcr = extract_zero_crossing_rate(y)
    chroma = extract_chroma(y)
    cqt = extract_cqt(y)

    return [mfcc, delta_mfcc, mel_spectrogram, zcr, chroma, cqt]

# Process audio files and save features
def process_audio_files(audio_files, label, file_prefix):
    features_list = []
    for file in audio_files:
        try:
            features = preprocess_and_extract_features(file)
            combined_features = [file] + list(np.concatenate(features)) + [label]
            features_list.append(combined_features)
        except Exception as e:
            print(f"Error processing {file}: {e}")

    # Define headers for the CSV file
    headers = ['filename'] + \
              [f'mfcc_{i}' for i in range(13)] + \
              [f'delta_mfcc_{i}' for i in range(13)] + \
              [f'mel_spectrogram_{i}' for i in range(128)] + \
              ['zcr'] + \
              [f'chroma_{i}' for i in range(12)] + \
              [f'cqt_{i}' for i in range(84)] + \
              ['label']

    # Save features to CSV
    df = pd.DataFrame(features_list, columns=headers)
    df.to_csv(f'{file_prefix}_audio_features.csv', index=False)

fake_directory = r"C:\Users\hiran\Downloads\TIMIT-TTS\TIMIT-TTS"
real_directory = r"C:\Users\hiran\Downloads\train-clean-100\LibriSpeech\train-clean-100"

fake_audio_files = collect_audio_files_dfs(fake_directory, num_files_per_dir=6)
real_audio_files = collect_audio_files_dfs(real_directory, num_files_per_dir=2)

# Process and save features
process_audio_files(fake_audio_files, label=1, file_prefix='fake')
process_audio_files(real_audio_files, label=0, file_prefix='real')

print("Feature extraction and saving complete!")

  return pitch_tuning(


Feature extraction and saving complete!
