## Overlay on Sample 

In [2]:
import os
from IPython.display import Audio, display

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from scipy.spatial.distance import cdist
from sklearn.metrics import silhouette_score

import umap.umap_ as umap

from pydub import AudioSegment
from pydub.effects import low_pass_filter, high_pass_filter

In [32]:
office_features_path = "../features/office/extracted-features.csv"
office_segments_dir = "../data/segments/office/"

nature_features_path = "../features/nature/extracted-features.csv"
nature_segments_dir = "../data/segments/nature/"

augmented_output_dir = "../data/transformation-strategies/augmented_overlay/"
os.makedirs(augmented_output_dir, exist_ok=True)

In [5]:
# Load features
office_df = pd.read_csv(office_features_path)
nature_df = pd.read_csv(nature_features_path)

# Inspect
print("Office features:", office_df.shape)
print("Nature features:", nature_df.shape)

Office features: (644, 9)
Nature features: (2379, 10)


In [6]:
#### Define 
interesting_files = ["segment_139.wav" "segment_397.wav", "segment_356.wav", 
                     "segment_051.wav", "segment_050.wav", "segment_170.wav", 
                     "segment_034.wav", "segment_087.wav"]

file_qualities = ["Loudest", "Softest", "Brightest & Noisiest",
                   "Dullest", "Smoothest", "Normal",
                   "Normal", "Normal"]

# Parameters
window_ms = 300       # Window size
step_ms = 150         # Step size (50% overlap)
threshold_db = -30    # Overlay only above this level
highpass_hz = 300
lowpass_hz = 7000
ambient_gain_offset = -3  # dB softer than office

# === Prepare Cluster Mapping ===
file_to_cluster = dict(zip(office_df["filename"], office_df["cluster"]))

# === Process Each File ===
for office_filename in interesting_files:
    cluster_id = file_to_cluster.get(office_filename)
    if cluster_id is None:
        print(f"Skipping {office_filename} (no cluster info)")
        continue

    match_row = cluster_match_df[cluster_match_df["cluster_id"] == cluster_id].iloc[0]
    ambient_source = match_row["ambient_source"]
    ambient_filename = match_row["ambient_filename"]

    office_path = os.path.join(office_segments_dir, office_filename)
    ambient_path = os.path.join(nature_segments_dir, ambient_source, ambient_filename)

    try:
        office_audio = AudioSegment.from_wav(office_path)
        ambient_audio = AudioSegment.from_wav(ambient_path)

        # Match length
        ambient_audio *= (len(office_audio) // len(ambient_audio)) + 1
        ambient_audio = ambient_audio[:len(office_audio)]

        # Filter and soften
        ambient_audio = high_pass_filter(ambient_audio, highpass_hz)
        ambient_audio = low_pass_filter(ambient_audio, lowpass_hz)
        ambient_audio = ambient_audio - 3

        output = AudioSegment.silent(duration=len(office_audio))

        for i in range(0, len(office_audio) - window_ms, step_ms):
            slice_office = office_audio[i:i+window_ms]
            slice_ambient = ambient_audio[i:i+window_ms]

            if slice_office.dBFS > threshold_db:
                gain_db = 10 * np.log10((slice_office.rms or 1) / (slice_ambient.rms or 1)) + ambient_gain_offset
                slice_ambient = slice_ambient + gain_db
                mixed = slice_office.overlay(slice_ambient)
            else:
                mixed = slice_office

            output = output.overlay(mixed, position=i)

        out_name = f"match_{office_filename}"
        out_path = os.path.join(augmented_output_dir, out_name)
        output.export(out_path, format="wav")

    except Exception as e:
        print(f"❌ Error processing {office_filename}: {e}")

source
nature-5     1739
nature-7      136
nature-6      122
nature-8      121
nature-9      120
nature-10      72
nature-3       26
nature-1       17
nature-2       16
nature-4       10
Name: count, dtype: int64

In [None]:
from IPython.display import Audio, HTML, display
import os

# Define your files and their quality labels
interesting_files = [
    "segment_139.wav", "segment_397.wav", "segment_356.wav", 
    "segment_051.wav", "segment_050.wav", "segment_170.wav", 
    "segment_034.wav", "segment_087.wav"
]

file_qualities = [
    "Loudest", "Softest", "Brightest & Noisiest",
    "Dullest", "Smoothest", "Normal",
    "Normal", "Normal"
]

# Define your folders
original_path = "../data/segments/office/"
augmented_path = "../data/transformation-strategies/augmented_overlay/"

# Build HTML table
table_rows = []
for fname, quality in zip(interesting_files, file_qualities):
    orig_file = os.path.join(original_path, fname)
    aug_file = os.path.join(augmented_path, f"match_{fname}")
    
    # Make sure both files exist
    if os.path.exists(orig_file) and os.path.exists(aug_file):
        row = f"""
        <tr>
            <td><b>{fname}</b><br><i>{quality}</i></td>
            <td>{Audio(orig_file)._repr_html_()}</td>
            <td>{Audio(aug_file)._repr_html_()}</td>
        </tr>
        """
        table_rows.append(row)
    else:
        print(f"⚠️ Missing file: {fname}")

# Full HTML
html = f"""
<table>
    <thead>
        <tr>
            <th>File & Quality</th>
            <th>Original</th>
            <th>Augmented</th>
        </tr>
    </thead>
    <tbody>
        {''.join(table_rows)}
    </tbody>
</table>
"""

# Display in notebook
display(HTML(html))
