In [4]:
pip install requests pydub


Note: you may need to restart the kernel to use updated packages.


In [10]:
import os
import time
import csv
import requests
from datetime import datetime

# Dictionary of online radio streams
stream_sources = {
    "Radio Paradise": "http://stream.radioparadise.com/aac-320",
    "Mixcloud": "https://www.mixcloud.com/",
    "KRSC-FM": "http://www.rsukrscfm.rsu.edu:8000/rsuradio128k",
    "Listen2myradio": "http://www.listen2myradio.com/",
    "NPR Live": "http://npr-ice.streamguys1.com/live.mp3",
    "KEXP Radio": "http://live-aacplus-64.kexp.org/kexp64.aac",
    "Radio Swiss Jazz": "http://stream.srg-ssr.ch/m/rsj/aacp_96",
    "FIP Radio": "http://icecast.radiofrance.fr/fip-hifi.aac",
    "SomaFM Groove Salad": "http://ice1.somafm.com/groovesalad-128-mp3",
    "WNYC FM": "http://fm939.wnyc.org/wnycfm",
    "WFMU 91.1 FM": "http://stream0.wfmu.org/freeform-128k",
}

# Recording settings
clip_duration = 60
max_recordings = 30  # number of clips 

# Directory for storing recordings
dataset_folder = "Radio_Audio_Collection"
os.makedirs(dataset_folder, exist_ok=True)

# CSV file for metadata storage
metadata_file = os.path.join(dataset_folder, "recordings_info.csv")

with open(metadata_file, mode='w', newline='', encoding='utf-8') as csv_f:
    csv_writer = csv.writer(csv_f)
    csv_writer.writerow(["Station", "File_Name", "Timestamp", "Duration_sec"])

    file_index = 0

    while file_index < max_recordings:
        for station, url in stream_sources.items():
            if file_index >= max_recordings:
                break

            # Generate filename with timestamp
            current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
            output_file = f"{station}_{current_time}_{file_index+1}.mp3"
            save_path = os.path.join(dataset_folder, output_file)

            print(f"Recording {clip_duration}s from {station}...")

            try:
                # Open a connection to the stream
                response = requests.get(url, stream=True, timeout=10)
                response.raise_for_status()  

                start_time = time.time()

                with open(save_path, "wb") as audio_file:
                    for chunk in response.iter_content(chunk_size=4096):
                        if time.time() - start_time > clip_duration:
                            break
                        audio_file.write(chunk)

                print(f" Saved: {save_path}")

                # Log the metadata
                csv_writer.writerow([station, output_file, current_time, clip_duration])
                file_index += 1

            except requests.exceptions.RequestException as net_err:
                print(f" Network error capturing {station}: {net_err}")

            except Exception as error:
                print(f" Error processing {station}: {error}")

            time.sleep(2)  # Short delay before next recording

print("\n Audio collection process completed successfully!")


Recording 60s from Radio Paradise...
 Saved: Radio_Audio_Collection\Radio Paradise_2025-03-09_23-14-39_1.mp3
Recording 60s from Mixcloud...
 Saved: Radio_Audio_Collection\Mixcloud_2025-03-09_23-15-41_2.mp3
Recording 60s from KRSC-FM...
 Saved: Radio_Audio_Collection\KRSC-FM_2025-03-09_23-15-49_3.mp3
Recording 60s from Listen2myradio...
 Saved: Radio_Audio_Collection\Listen2myradio_2025-03-09_23-16-52_4.mp3
Recording 60s from NPR Live...
 Saved: Radio_Audio_Collection\NPR Live_2025-03-09_23-16-55_5.mp3
Recording 60s from KEXP Radio...
 Saved: Radio_Audio_Collection\KEXP Radio_2025-03-09_23-17-58_6.mp3
Recording 60s from Radio Swiss Jazz...
 Saved: Radio_Audio_Collection\Radio Swiss Jazz_2025-03-09_23-19-03_7.mp3
Recording 60s from FIP Radio...
 Saved: Radio_Audio_Collection\FIP Radio_2025-03-09_23-20-06_8.mp3
Recording 60s from SomaFM Groove Salad...
 Saved: Radio_Audio_Collection\SomaFM Groove Salad_2025-03-09_23-21-09_9.mp3
Recording 60s from WNYC FM...
 Saved: Radio_Audio_Collection\