# Notebook B: HeAR Feature Extraction
## Generate embeddings using HeAR model

Loads the HeAR model from Hugging Face using direct TensorFlow SavedModel loading to ensure compatibility.

In [1]:
# Install dependencies
%pip install huggingface_hub librosa tensorflow

import os
from pathlib import Path
import numpy as np
import librosa
import tensorflow as tf
from tqdm.notebook import tqdm
import json
from huggingface_hub import snapshot_download

DATASETS_ROOT = Path(r"D:\datasets")
PROCESSED_ROOT = DATASETS_ROOT / 'processed'
EMBEDDINGS_DIR = DATASETS_ROOT / 'embeddings'
EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)

TARGET_SR = 16000
N_SAMPLES = 32000  # 2 seconds at 16kHz
EMBEDDING_DIM = 768 

print(f"Embeddings output: {EMBEDDINGS_DIR}")
print(f"TensorFlow version: {tf.__version__}")

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
Embeddings output: D:\datasets\embeddings
TensorFlow version: 2.20.0


In [2]:
# Load HeAR Model directly as a SavedModel
print("Downloading HeAR model from Hugging Face Hub...")

try:
    model_path = snapshot_download(repo_id="google/hear", repo_type="model")
    print(f"Model path: {model_path}")
    
    print("Loading SavedModel...")
    hear_model = tf.saved_model.load(model_path)
    inference_fn = hear_model.signatures["serving_default"]
    print("✓ Model loaded successfully using tf.saved_model.load")
    print(f"Signatures: {list(hear_model.signatures.keys())}")
except Exception as e:
    print(f"⚠ Failed to load model: {e}")
    raise e

Downloading HeAR model from Hugging Face Hub...


Fetching 24 files:   0%|          | 0/24 [00:00<?, ?it/s]

Model path: C:\Users\PC\.cache\huggingface\hub\models--google--hear\snapshots\9b2eb2853c426676255cc6ac5804b7f1fe8e563f
Loading SavedModel...
⚠ Failed to load model: Importing a SavedModel with `tf.saved_model.load` requires a `tags=` argument if there is more than one MetaGraph. Got `tags=None`, but there are 0 MetaGraphs in the SavedModel with tag sets: []. Pass a `tags=` argument to load this SavedModel.


ValueError: Importing a SavedModel with `tf.saved_model.load` requires a `tags=` argument if there is more than one MetaGraph. Got `tags=None`, but there are 0 MetaGraphs in the SavedModel with tag sets: []. Pass a `tags=` argument to load this SavedModel.

In [None]:
def load_audio_for_hear(file_path):
    """Load audio and ensure it's exactly 32000 samples (2s @ 16kHz)"""
    # HeAR expects normalized audio in range [-1, 1]
    audio, _ = librosa.load(str(file_path), sr=TARGET_SR, mono=True)
    
    # Pad or trim to exactly N_SAMPLES
    if len(audio) < N_SAMPLES:
        audio = np.pad(audio, (0, N_SAMPLES - len(audio)), 'constant')
    else:
        audio = audio[:N_SAMPLES]
        
    return audio.astype(np.float32)

def extract_embeddings_batch(audio_batch):
    """Batch extraction using the serving_default signature"""
    # Convert input list to tensor
    audio_tensor = tf.convert_to_tensor(audio_batch)
    
    # Inference: The model expects input key 'x'
    # Note: tf.saved_model signatures often return a dict
    output_dict = inference_fn(x=audio_tensor)
    
    # The output key is usually 'output_0'
    if 'output_0' in output_dict:
        embedding = output_dict['output_0'].numpy()
    else:
        # Fallback if key is different (though 'output_0' is standard for HeAR)
        key = list(output_dict.keys())[0]
        embedding = output_dict[key].numpy()
        
    return embedding

print("✓ Embedding functions ready")

In [None]:
def process_dataset_embeddings(dataset_name, batch_size=32):
    input_dir = PROCESSED_ROOT / dataset_name
    if not input_dir.exists():
        print(f"⚠ {dataset_name}: Not found")
        return None

    wav_files = sorted(input_dir.glob("*.wav"))
    if not wav_files:
        return None

    print(f"\nProcessing {dataset_name}: {len(wav_files)} files")

    embeddings_list = []
    file_names = []

    # Process in batches
    for i in tqdm(range(0, len(wav_files), batch_size), desc=f"Extracting {dataset_name}"):
        batch_files = wav_files[i:i+batch_size]
        
        # Load batch audio
        batch_audio = []
        valid_batch_indices = []
        
        for idx, f in enumerate(batch_files):
            try:
                audio = load_audio_for_hear(f)
                batch_audio.append(audio)
                valid_batch_indices.append(idx)
            except Exception as e:
                print(f"Error loading {f}: {e}")
        
        if not batch_audio:
            continue
            
        try:
            batch_embeddings = extract_embeddings_batch(batch_audio)
            embeddings_list.append(batch_embeddings)
            file_names.extend([batch_files[idx].stem for idx in valid_batch_indices])
        except Exception as e:
            print(f"Error batch {i}: {e}")
            continue

    if not embeddings_list:
        return 0

    embeddings = np.vstack(embeddings_list)
    output_path = EMBEDDINGS_DIR / f"{dataset_name}_embeddings.npz"
    np.savez_compressed(output_path, embeddings=embeddings, file_names=file_names)
    print(f"✓ {dataset_name}: {embeddings.shape[0]} embeddings saved")
    return embeddings.shape[0]

In [None]:
# Process all datasets
datasets = ['coughvid', 'parkinsons', 'respiratory_sounds', 'coswara']
results = {}

for name in datasets:
    results[name] = process_dataset_embeddings(name)

summary = {'embedding_dim': EMBEDDING_DIM, 'sample_rate': TARGET_SR, 'datasets': results}
with open(EMBEDDINGS_DIR / 'embeddings_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print(f"\nTotal embeddings: {sum(v for v in results.values() if v)}")