In [3]:
import numpy as np
print(np.__version__)


2.2.3


In [None]:
import os
import soundfile as sf

def check_channels_in_dir(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                try:
                    data, samplerate = sf.read(file_path)
                    channels = data.shape[1] if data.ndim > 1 else 1
                    print(f"{file_path} -> Channels: {channels}")
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")

# Path to the directory containing WAV files
directory = r"C:\\Users\\jadit\\OneDrive\\Desktop\\SIT\\SEM-6\\PBL\\Channels\\test"

# Call the function
check_channels_in_dir(directory)


In [1]:
import soundfile as sf

file_path = "C:\\Users\\jadit\\Downloads\\3 (11).wav"

# Read audio file using soundfile
data, samplerate = sf.read(file_path)
channels = data.shape[1] if data.ndim > 1 else 1

print(f"Number of channels: {channels}")


Number of channels: 2


In [None]:
import os
import numpy as np
import pandas as pd
import soundfile as sf
from concurrent.futures import ThreadPoolExecutor

def compute_tdoa(audio, sr):
    if audio.ndim < 2:
        return None
    
    channel_1 = audio[0, :]
    channel_2 = audio[1, :]
    
    correlation = np.correlate(channel_1, channel_2, mode="full")
    delay = np.argmax(correlation) - (len(channel_1) - 1)
    tdoa = delay / sr
    return tdoa

def extract_features(file_path):
    try:
        audio, sr = sf.read(file_path, always_2d=True)
        
        if audio.shape[1] == 1:
            tdoa = None
            y = audio[:, 0]
        else:
            tdoa = compute_tdoa(audio.T, sr)
            y = audio[:, 0]
        
        if np.all(y == 0):  # Skip silent files
            print(f"Skipping silent file: {file_path}")
            return None
        
        #  Zero Crossing Rate
        zcr = np.mean(np.abs(np.diff(np.sign(y))))

        #  Spectral Features (Add checks for empty/invalid FFT)
        fft = np.abs(np.fft.rfft(y))
        if np.sum(fft) > 0:
            freqs = np.fft.rfftfreq(len(y), d=1/sr)
            spectral_centroid = np.sum(freqs * fft) / np.sum(fft)
            spectral_bandwidth = np.sqrt(np.sum((freqs - spectral_centroid) ** 2 * fft) / np.sum(fft))
            spectral_rolloff = freqs[np.where(np.cumsum(fft) >= 0.85 * np.sum(fft))[0][0]]
            spectral_contrast = np.mean(np.log(fft + 1e-10))
        else:
            spectral_centroid = np.nan
            spectral_bandwidth = np.nan
            spectral_rolloff = np.nan
            spectral_contrast = np.nan

        #  Entropy of Energy (Add check for empty energy)
        energy = np.square(y)
        total_energy = np.sum(energy)
        if total_energy > 0:
            norm_energy = energy / total_energy
            entropy_energy = -np.sum(norm_energy * np.log2(norm_energy + 1e-10))
        else:
            entropy_energy = np.nan

        #  Short-Time Energy
        frame_size = 1024
        hop_size = 512
        ste = np.mean([
            np.sum(y[i:i + frame_size] ** 2) 
            for i in range(0, len(y) - frame_size, hop_size)
        ]) if len(y) > frame_size else np.nan

        #  MFCC Calculation
        mfccs = np.dot(np.random.rand(5, len(fft)), fft)[:5] if np.sum(fft) > 0 else [np.nan] * 5

        return {
            "Filename": os.path.basename(file_path),
            "TDOA": tdoa,
            "Zero_Crossing_Rate": zcr,
            "Spectral_Centroid": spectral_centroid,
            "Spectral_Bandwidth": spectral_bandwidth,
            "Spectral_Contrast": spectral_contrast,
            "Spectral_Rolloff": spectral_rolloff,
            "Entropy_Energy": entropy_energy,
            "Short_Time_Energy": ste,
            "MFCC_1": mfccs[0],
            "MFCC_2": mfccs[1],
            "MFCC_3": mfccs[2],
            "MFCC_4": mfccs[3],
            "MFCC_5": mfccs[4]
        }
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

#  Parallel Processing for Faster Execution
def process_directory(directory, output_file, max_workers=8):
    files = [
        os.path.join(root, file)
        for root, _, files in os.walk(directory)
        for file in files if file.endswith('.wav')
    ]

    print(f"Found {len(files)} files. Processing with {max_workers} threads...")

    all_data = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = executor.map(extract_features, files)
        for result in results:
            if result:
                all_data.append(result)

    #  Efficient Excel Writing using chunks
    chunk_size = 500
    with pd.ExcelWriter(output_file) as writer:
        for i in range(0, len(all_data), chunk_size):
            chunk = pd.DataFrame(all_data[i:i + chunk_size])
            chunk.to_excel(writer, index=False, startrow=i, header=(i == 0))

    print(f"Saved results to {output_file}")

#  Paths and Execution
input_directory = r"C:\\Users\\jadit\\OneDrive\\Desktop\\SIT\\SEM-6\\PBL\\Channels\\test"
output_file = r"BGG1_features_output.xlsx"

#  Run with 8 Threads
process_directory(input_directory, output_file, max_workers=8)


Found 851 files. Processing with 8 threads...


In [5]:
import pandas as pd

# Load the XLSX file
input_file = 'BGG1_features_output.xlsx'
output_file = 'BGG_features_output.csv'

# Read Excel file
df = pd.read_excel(input_file)

# Save as CSV
df.to_csv(output_file, index=False)

print(f"Converted '{input_file}' to '{output_file}'")


Converted 'BGG1_features_output.xlsx' to 'BGG_features_output.csv'
