In [1]:
import librosa
import numpy as np
import os
import pandas as pd
from tqdm import tqdm

AUDIO_DIR = "../../../work/pi_vcpartridge_umass_edu/ytb_wavs/"

In [2]:
# Classification threshold (this may need tuning based on your dataset)
ZCR_THRESHOLD = 0.05  # Adjust based on experiments

# Function to compute Zero-Crossing Rate for a file
def compute_zcr(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)  # Load audio
        zcr = np.mean(librosa.feature.zero_crossing_rate(y))  # Compute ZCR
        return zcr
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [3]:
# Process all WAV files
results = []
for file_name in tqdm(os.listdir(AUDIO_DIR)):
    if file_name.endswith(".wav"):
        file_path = os.path.join(AUDIO_DIR, file_name)
        zcr_value = compute_zcr(file_path)
        
        if zcr_value is not None:
            classification = "Speech" if zcr_value > ZCR_THRESHOLD else "Music"
            results.append((file_name, zcr_value, classification))

# Save results to a CSV file
df = pd.DataFrame(results, columns=["File", "ZCR", "Classification"])
df.to_csv("classification_results.csv", index=False)

print("Classification complete! Results saved to classification_results.csv.")


100%|██████████| 2654/2654 [10:05<00:00,  4.38it/s] 

Classification complete! Results saved to classification_results.csv.



