In [None]:
import tensorflow as tf
import numpy as np
import soundfile as sf
import resampy
import tensorflow_hub as hub
import pandas as pd
from pathlib import Path

yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

labels_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv'
labels_path = tf.keras.utils.get_file('yamnet_class_map.csv', labels_url)

class_names = pd.read_csv(labels_path)['display_name'].to_list()

voice_class_indices = [i for i, name in enumerate(class_names) if 'Speech' == name]

def detect_voice_in_wav(file_path):
    audio_data, sample_rate = sf.read(file_path)

    if sample_rate != 16000:
        audio_data = resampy.resample(audio_data, sample_rate, 16000)

    waveform = audio_data.astype(np.float32)
    waveform_tensor = tf.convert_to_tensor(waveform)

    yamnet_outputs = yamnet_model(waveform_tensor)
    scores = yamnet_outputs[0].numpy()

    voice_scores = np.mean(scores[:, voice_class_indices], axis=-1)

    return np.max(voice_scores)

if __name__ == "__main__":
    wav_dir = Path("./test_denoised")  # (1)
    output_csv = "./voice_denoised_yamnet.csv"  # (2)

    results = []
    for wav_file in wav_dir.glob("*.wav"):
        max_voice_score = detect_voice_in_wav(wav_file)
        file_id = wav_file.stem
        results.append({"file_id": file_id, "max_voice_score": max_voice_score})

    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)

    print(f"Results saved to: {output_csv}")