In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!mkdir -p /content/drive/MyDrive/EE641/Project/
%cd /content/drive/MyDrive/EE641/Project/

/content/drive/MyDrive/EE641/Project


In [3]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
import wave

In [4]:
file_path = 'metadata.csv'
data = pd.read_csv(file_path)
audio_dir = 'UrbanSound8K_Composite/'

In [5]:
file_path = 'metadata.csv'
data = pd.read_csv(file_path)
audio_dir = 'UrbanSound8K_Composite/'
SAMPLE_RATE = 22050
AUDIO_DURATION = 60
FRAME_SIZE = SAMPLE_RATE * AUDIO_DURATION

In [6]:
class_labels = sorted(data['class'].unique())
class_labels

['air_conditioner',
 'car_horn',
 'children_playing',
 'dog_bark',
 'drilling',
 'engine_idling',
 'gun_shot',
 'jackhammer',
 'siren',
 'street_music']

In [7]:
def read_wave_file(file_path, frame_size=FRAME_SIZE):
    try:
        with wave.open(file_path, 'rb') as wf:
            sample_rate = wf.getframerate()
            n_channels = wf.getnchannels()
            num_frames = wf.getnframes()
            audio_data = wf.readframes(num_frames)
            audio = np.frombuffer(audio_data, dtype=np.int16)


            if n_channels > 1:
                audio = audio[::n_channels]


            if len(audio) < frame_size:
                audio = np.pad(audio, (0, frame_size - len(audio)), 'constant')
            else:
                audio = audio[:frame_size]

            return audio, sample_rate
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None, None

In [8]:
def build_label_matrix(clip_id, frame_size, sample_rate, df):
    num_frames = frame_size // sample_rate
    label_matrix = np.zeros((num_frames, len(class_labels)), dtype=int)
    events = df[df['clip_id'] == clip_id]

    for _, event in events.iterrows():
        start_frame = int(event['start_time'] * sample_rate)
        end_frame = int(event['end_time'] * sample_rate)
        class_idx = class_labels.index(event['class'])
        label_matrix[start_frame:end_frame + 1, class_idx] = 1

    return label_matrix

In [9]:
features = []
labels = []

In [10]:

for clip_id in tqdm(data['clip_id'].unique()):
    audio_file = os.path.join(audio_dir, f"composite_clip_{clip_id}.wav")
    audio, sr = read_wave_file(audio_file)

    if audio is None or sr != SAMPLE_RATE:
        print(f"Skipping file {audio_file} due to error or mismatched sample rate.")
        continue


    label_matrix = build_label_matrix(clip_id, FRAME_SIZE, SAMPLE_RATE, data)


    features.append(audio)
    labels.append(label_matrix)

100%|██████████| 5000/5000 [23:45<00:00,  3.51it/s]


In [11]:
features = np.array(features)
labels = np.array(labels)

In [12]:
np.save('features_orginal_wav.npy', features)
np.save('labels_orginal_wav.npy', labels)

In [13]:
print(f"Features shape: {features.shape}, Labels shape: {labels.shape}")

Features shape: (5000, 1323000), Labels shape: (5000, 60, 10)


In [14]:
features

array([[   22,    44,   -12, ...,     0,     0,     0],
       [  377,  -407,  -111, ...,     0,     0,     0],
       [-1251, -1379,  -248, ...,     0,     0,     0],
       ...,
       [ 1416,  1976,  1392, ...,     0,     0,     0],
       [ -828, -1153, -1044, ...,     0,     0,     0],
       [  144,   280,   248, ...,     0,     0,     0]], dtype=int16)