In [21]:
import pandas as pd
import os

# Base paths
BASE_DIR = os.path.join("UrbanSound8K", "UrbanSound8K")
CSV_PATH = os.path.join(BASE_DIR, "metadata", "UrbanSound8K.csv")
AUDIO_DIR = os.path.join(BASE_DIR, "audio")

# Load the metadata
urban_df = pd.read_csv(CSV_PATH)

# Construct full file paths for each row
urban_df['path'] = urban_df.apply(
    lambda row: os.path.join(AUDIO_DIR, f"fold{row['fold']}", row['slice_file_name']),
    axis=1
)

# Rename column for consistency and add source
urban_df.rename(columns={'class': 'category'}, inplace=True)
urban_df['source'] = 'UrbanSound8K'

# Keep only the columns we care about
urban_df = urban_df[['path', 'category', 'source']]

# Save to file
urban_df.to_csv("UrbanSound8K_metadata.csv", index=False)
print("[✓] Metadata saved to UrbanSound8K_metadata.csv")


[✓] Metadata saved to UrbanSound8K_metadata.csv


In [22]:
import pandas as pd
import os

# Base paths for ESC-50
ESC_BASE = os.path.join("ESC-50-master", "ESC-50-master")
CSV_PATH = os.path.join(ESC_BASE, "meta", "esc50.csv")
AUDIO_DIR = os.path.join(ESC_BASE, "audio")

# Load the metadata
esc_df = pd.read_csv(CSV_PATH)

# Create full audio path for each file
esc_df['path'] = esc_df['filename'].apply(lambda f: os.path.join(AUDIO_DIR, f))

# Match UrbanSound8K format
esc_df.rename(columns={'category': 'category'}, inplace=True)
esc_df['source'] = 'ESC-50'
esc_df = esc_df[['path', 'category', 'source']]

# Save to CSV
esc_df.to_csv("ESC50_metadata.csv", index=False)
print("[✓] ESC-50 metadata saved to ESC50_metadata.csv")


[✓] ESC-50 metadata saved to ESC50_metadata.csv


In [24]:
urban_df['category'] = urban_df['category'].replace({
    "air_conditioner": "air_conditioner",
    "car_horn": "car_horn",
    "children_playing": "children_playing",
    "dog_bark": "dog",
    "drilling": "drilling",
    "gun_shot": "gun_shot",
    "jackhammer": "jackhammer",
    "siren": "siren",
    "street_music": "street_music"
})


In [26]:
# Get ESC-50 label set
esc_labels = set(esc_df['category'].unique())

# Get UrbanSound8K label set
urban_labels = set(urban_df['category'].unique())

# Find unexpected extra classes
extra_classes = sorted(urban_labels - esc_labels)
print("Labels in UrbanSound8K not found in ESC-50:", extra_classes)


Labels in UrbanSound8K not found in ESC-50: ['air_conditioner', 'children_playing', 'drilling', 'engine_idling', 'gun_shot', 'jackhammer', 'street_music']


In [27]:
import pandas as pd

# Load UrbanSound8K metadata
urban_df = pd.read_csv("UrbanSound8K_metadata.csv")

# Rename mapping from US8K to ESC-50
rename_map = {
    "dog_bark": "dog",
    # Others already match or are not changed
}

# Apply renaming
urban_df['category'] = urban_df['category'].replace(rename_map)

# Save updated version
urban_df.to_csv("UrbanSound8K_renamed.csv", index=False)
print("[✓] Renamed UrbanSound8K categories saved to UrbanSound8K_renamed.csv")


[✓] Renamed UrbanSound8K categories saved to UrbanSound8K_renamed.csv


In [28]:
import pandas as pd

# Load original ESC-50 and renamed UrbanSound8K
esc_df = pd.read_csv("ESC50_metadata.csv")
urban_df = pd.read_csv("UrbanSound8K_renamed.csv")

# Merge them
merged_df = pd.concat([esc_df, urban_df], ignore_index=True)

# Save
merged_df.to_csv("merged_ESC50_UrbanSound8K.csv", index=False)
print("[✓] Full merged dataset saved to merged_ESC50_UrbanSound8K.csv")


[✓] Full merged dataset saved to merged_ESC50_UrbanSound8K.csv


In [29]:
import pandas as pd

# Load the metadata you have locally
esc_df = pd.read_csv("ESC50_metadata.csv")
urban_df = pd.read_csv("UrbanSound8K_renamed.csv")  # This is the one you renamed earlier

# Combine them
merged_df = pd.concat([esc_df, urban_df], ignore_index=True)


# Count and print number of samples per label
label_counts = merged_df['category'].value_counts().sort_index()
print(label_counts)


category
air_conditioner     1000
airplane              40
breathing             40
brushing_teeth        40
can_opening           40
car_horn             469
cat                   40
chainsaw              40
children_playing    1000
chirping_birds        40
church_bells          40
clapping              40
clock_alarm           40
clock_tick            40
coughing              40
cow                   40
crackling_fire        40
crickets              40
crow                  40
crying_baby           40
dog                 1040
door_wood_creaks      40
door_wood_knock       40
drilling            1000
drinking_sipping      40
engine                40
engine_idling       1000
fireworks             40
footsteps             40
frog                  40
glass_breaking        40
gun_shot             374
hand_saw              40
helicopter            40
hen                   40
insects               40
jackhammer          1000
keyboard_typing       40
laughing              40
mouse_click     

In [30]:
print("\nTotal unique labels:", merged_df['category'].nunique())


Total unique labels: 57


In [33]:
# Save merged dataset to CSV
merged_df.to_csv("merged_ESC50_UrbanSound8K.csv", index=False)
print("\n✅ Merged dataset saved to 'merged_ESC50_UrbanSound8K.csv'")



✅ Merged dataset saved to 'merged_ESC50_UrbanSound8K.csv'


In [34]:
import tensorflow_hub as hub
import tensorflow as tf

yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"

try:
    yamnet_model = hub.load(yamnet_model_handle)
    print("YAMNet model loaded successfully!")
except Exception as e:
    print("Failed to load model:", e)


YAMNet model loaded successfully!


In [39]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Load merged CSV
df = pd.read_csv("merged_ESC50_UrbanSound8K.csv")

# Encode labels to integers
label_encoder = LabelEncoder()
df['class_id'] = label_encoder.fit_transform(df['category'])

# Save label map
label_map = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("🔠 Label map:", label_map)

# Compute class weights
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(df['class_id']),
    y=df['class_id']
)

# Map to dict
class_weights_dict = dict(zip(np.unique(df['class_id']), class_weights))
print("\n⚖️ Class weights:", class_weights_dict)

# Save updated metadata with class_id
df.to_csv("final_merged_with_class_ids.csv", index=False)
print("\n✅ Saved: final_merged_with_class_ids.csv")


🔠 Label map: {'air_conditioner': 0, 'airplane': 1, 'breathing': 2, 'brushing_teeth': 3, 'can_opening': 4, 'car_horn': 5, 'cat': 6, 'chainsaw': 7, 'children_playing': 8, 'chirping_birds': 9, 'church_bells': 10, 'clapping': 11, 'clock_alarm': 12, 'clock_tick': 13, 'coughing': 14, 'cow': 15, 'crackling_fire': 16, 'crickets': 17, 'crow': 18, 'crying_baby': 19, 'dog': 20, 'door_wood_creaks': 21, 'door_wood_knock': 22, 'drilling': 23, 'drinking_sipping': 24, 'engine': 25, 'engine_idling': 26, 'fireworks': 27, 'footsteps': 28, 'frog': 29, 'glass_breaking': 30, 'gun_shot': 31, 'hand_saw': 32, 'helicopter': 33, 'hen': 34, 'insects': 35, 'jackhammer': 36, 'keyboard_typing': 37, 'laughing': 38, 'mouse_click': 39, 'pig': 40, 'pouring_water': 41, 'rain': 42, 'rooster': 43, 'sea_waves': 44, 'sheep': 45, 'siren': 46, 'sneezing': 47, 'snoring': 48, 'street_music': 49, 'thunderstorm': 50, 'toilet_flush': 51, 'train': 52, 'vacuum_cleaner': 53, 'washing_machine': 54, 'water_drops': 55, 'wind': 56}

⚖️ Cl

In [40]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import librosa
from tqdm import tqdm
import tensorflow_hub as hub

yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

df = pd.read_csv("final_merged_with_class_ids.csv")
X = []
y = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    file_path = row['path']
    label = row['class_id']  # ✅ use numeric class_id
    try:
        waveform, sr = librosa.load(file_path, sr=16000, mono=True)
        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
        _, embeddings, _ = yamnet_model(waveform)
        mean_embedding = tf.reduce_mean(embeddings, axis=0).numpy()
        X.append(mean_embedding)
        y.append(label)
    except Exception as e:
        print(f"Failed: {file_path} → {e}")

X = np.array(X)
y = np.array(y)
print("✅ Done:", X.shape, y.shape)


100%|██████████| 10732/10732 [05:08<00:00, 34.81it/s]

✅ Done: (10732, 1024) (10732,)





In [41]:
MAX_FRAMES = 100
X_seq = []
y_seq = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    file_path = row['path']
    label = row['class_id']  # ✅ Corrected from 'category'
    try:
        waveform, sr = librosa.load(file_path, sr=16000, mono=True)
        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
        _, embeddings, _ = yamnet_model(waveform)
        emb_np = embeddings.numpy()
        if emb_np.shape[0] < MAX_FRAMES:
            pad_width = MAX_FRAMES - emb_np.shape[0]
            emb_np = np.pad(emb_np, ((0, pad_width), (0, 0)), mode='constant')
        else:
            emb_np = emb_np[:MAX_FRAMES, :]
        X_seq.append(emb_np)
        y_seq.append(label)
    except Exception as e:
        print(f"Failed: {file_path} → {e}")

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)
print("✅ Done:", X_seq.shape, y_seq.shape)


100%|██████████| 10732/10732 [04:38<00:00, 38.48it/s]


✅ Done: (10732, 100, 1024) (10732,)


In [42]:
# Save mean-pooled features
np.save("X_mean.npy", X)
np.save("y_mean.npy", y)

# Save sequence-padded features
np.save("X_seq.npy", X_seq)
np.save("y_seq.npy", y_seq)

print("✅ Saved: X_mean.npy, y_mean.npy, X_seq.npy, y_seq.npy")

✅ Saved: X_mean.npy, y_mean.npy, X_seq.npy, y_seq.npy
