In [2]:
import librosa
import numpy as np
import os
import torch
from pathlib import Path

np.set_printoptions(suppress=True)

In [6]:
dataset = 'training/college_basketball'
# sample_rate = librosa.load("training/navy_vs_american_2_26/ads/ad [2025-03-12 114540].wav")[1]
# print(sample_rate)

sample_rate = 16000

In [5]:
# 0: broadcast
# 1: advertisement

ads_tensor = []
broadcast_tensor = []

for recording in os.listdir(dataset):
    ads = os.path.join(dataset, recording, 'ads')
    broadcast = os.path.join(dataset, recording, 'broadcast')
    # misc = os.path.join(dataset, recording, 'misc')

    # load audio into numpy arrays shape (clip, sample, (aplitude value, label))
    ads_labeled = []
    broadcast_labeled = []
    for ad in os.listdir(ads):
        audio, sr = librosa.load(os.path.join(ads, ad), sr=sample_rate, mono=True)
        labels = np.ones_like(audio)
        ads_labeled.append(np.stack((audio, labels), axis=1))

    for segment in os.listdir(broadcast):
        audio, sr = librosa.load(os.path.join(broadcast, segment), sr=sample_rate, mono=True)
        labels = np.zeros_like(audio)
        broadcast_labeled.append(np.stack((audio, labels), axis=1))

    ads_tensor.extend(ads_labeled)
    broadcast_tensor.extend(broadcast_labeled)

print(f"{sum(len(x) for x in ads_tensor)//sr} seconds of advertising over {len(ads_tensor)} ads")
print(f"{sum(len(x) for x in broadcast_tensor)//sr} seconds of actual broadcasting over {len(broadcast_tensor)} segments")

FileNotFoundError: [Errno 2] No such file or directory: 'training/college_basketball/ads/ads'

In [5]:
# split a segment into k-second clips

clip_duration = 1.0
samples_per_clip = int(sample_rate * clip_duration)
all_clips = []
for ad_segment in ads_tensor:
    total_chunks = int(len(ad_segment) // samples_per_clip)

    clip = [
        ad_segment[i * samples_per_clip : (i + 1) * samples_per_clip]
        for i in range(total_chunks)
    ]

    all_clips.extend(clip)

for broadcast_segment in broadcast_tensor:
    total_chunks = int(len(broadcast_segment) // samples_per_clip)

    clip = [
        broadcast_segment[i * samples_per_clip : (i + 1) * samples_per_clip]
        for i in range(total_chunks)
    ]

    all_clips.extend(clip)


In [6]:
clips_np = np.stack([clip.astype(np.float32, copy=False) for clip in all_clips])

out_path = Path(f"clips_{sample_rate}Hz_{clip_duration}s.npz")
np.savez_compressed(
    out_path,
    clips=clips_np,               # (N, samples_per_clip)
    sample_rate=np.int32(sample_rate),
    clip_duration=np.float32(clip_duration),
)

print(f"Saved {clips_np.shape[0]} clips to {out_path.resolve()}")

Saved 3906 clips to /Users/lense/Documents/projects/entertainment/clips_16000Hz_1.0s.npz


In [7]:
print(clips_np.shape)
print(clips_np[0].shape)
print(clips_np[0][0])

(3906, 16000, 2)
(16000, 2)
[-0.00000413  1.        ]
