In [1]:
import librosa
import numpy as np
import os
import pandas as pd
from tqdm import tqdm

AUDIO_DIR = "../../../work/pi_vcpartridge_umass_edu/ytb_wavs/"

In [2]:
# Classification threshold (this may need tuning based on your dataset)
ZCR_THRESHOLD = 0.05  # Adjust based on experiments

# Function to compute Zero-Crossing Rate for a file
def compute_zcr(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)  # Load audio
        zcr = np.mean(librosa.feature.zero_crossing_rate(y))  # Compute ZCR
        return zcr
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [3]:
# Process all WAV files
results = []
for file_name in tqdm(os.listdir(AUDIO_DIR)):
    if file_name.endswith(".wav"):
        file_path = os.path.join(AUDIO_DIR, file_name)
        zcr_value = compute_zcr(file_path)
        
        if zcr_value is not None:
            classification = "Speech" if zcr_value > ZCR_THRESHOLD else "Music"
            results.append((file_name, zcr_value, classification))

# Save results to a CSV file
df = pd.DataFrame(results, columns=["File", "ZCR", "Classification"])
df.to_csv("classification_results.csv", index=False)

print("Classification complete! Results saved to classification_results.csv.")


100%|██████████| 2654/2654 [10:05<00:00,  4.38it/s] 

Classification complete! Results saved to classification_results.csv.





In [13]:
df = pd.read_csv("classification_results.csv")
df.head()

Unnamed: 0,File,ZCR,Classification
0,AAHiZ-c88ec.wav,0.147625,Speech
1,AEsRr-ZnzNc.wav,0.090394,Speech
2,AJpzk-aFZPU.wav,0.232535,Speech
3,AONXX-h9SdI.wav,0.143601,Speech
4,AOZIY-AwsjM.wav,0.106159,Speech


In [14]:
# df.where(df["Classification"] == "Music").count()
df["ZCR"].describe()

count    2654.000000
mean        0.123274
std         0.044659
min         0.008866
25%         0.093755
50%         0.118163
75%         0.146710
max         0.390265
Name: ZCR, dtype: float64

In [15]:
df["File"] = df["File"].str.replace(".wav", "")
df["Classification"] = df["ZCR"].apply(lambda x: "Speech" if x > 0.07 else "Music")

In [16]:
df.to_csv("classification_results_tuned.csv", index=False)

In [2]:
clustered_df = pd.read_csv("clustered_results.csv")
clustered_df.head()

Unnamed: 0,File,ZCR,SpectralCentroid,SpectralFlux,RMSEnergy,Classification
0,AAHiZ-c88ec,0.147625,2020.684248,0.061404,0.060953,Speech
1,AEsRr-ZnzNc,0.090394,1459.411075,-0.286998,0.119854,Music
2,AJpzk-aFZPU,0.232535,2547.312383,0.63512,0.091512,Speech
3,AONXX-h9SdI,0.143601,2209.112949,0.005477,0.107071,Speech
4,AOZIY-AwsjM,0.106159,1218.740176,-0.102051,0.036883,Music


In [4]:
clustered_df["Classification"].value_counts()

Classification
Music     1587
Speech    1067
Name: count, dtype: int64