In [1]:
import json
import os
import random
import librosa
import glob
import numpy as np
import pandas as pd
import os
import csv
import json
import torch.nn as nn
import torch
import torchaudio
import torchaudio.transforms as T
from sklearn.model_selection import train_test_split
from pydub import AudioSegment
from pydub.generators import WhiteNoise
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
with open("utils/config.json") as file:
    config = json.load(file)

In [3]:
params = config['params']
paths = config['paths']

print("Params:")
for key, value in params.items():
    print(f"{key}: {value}")

print("\nPaths:")
for key, value in paths.items():
    print(f"{key}: {value}")


Params:
seed: 42
num_mixes: 100000
min_num_background_segments: 1
max_num_background_segments: 1
min_num_event_segments: 1
max_num_event_segments: 1
min_len_mixture: 10000
max_len_mixture: 10000
allow_same_class_background_overlap: False
max_foreground_overlaps: 2
allow_same_class_foreground_overlap: False
frame_rate: 16000
channel: 1

Paths:
data_path: audio_sources
dataset_path: audio_sources/dataset
background_segments_path: audio_sources/dataset/background
event_segments_path: audio_sources/dataset/event
output_path: audio_sources/train_set


In [4]:
for key, path in config["paths"].items():
    if path.endswith(('.csv', '.json')):
        continue
    try:
        os.makedirs(path, exist_ok=True)
        print(f"Cartella {path} creata o già esistente.")
    except Exception as e:
        print(f"Errore durante la creazione della cartella {path}: {e}")

Cartella audio_sources creata o già esistente.
Cartella audio_sources/dataset creata o già esistente.
Cartella audio_sources/dataset/background creata o già esistente.
Cartella audio_sources/dataset/event creata o già esistente.
Cartella audio_sources/train_set creata o già esistente.


In [5]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)

seed_everything(config["params"]["seed"])
# Directory del dataset
dataset_dir = config["paths"]["dataset_path"]

In [6]:
def get_random_segments(segments_paths_list, num_segments_list):
    random_segments = []
    for segments_paths, num_segments in zip(segments_paths_list, num_segments_list):
        for _ in range(num_segments):
            selected_class = random.choice(os.listdir(segments_paths))
            selected_segment = random.choice(os.listdir(os.path.join(segments_paths, selected_class)))
            random_segments.append(os.path.join(segments_paths, selected_class, selected_segment))
    return random_segments

# INIZIO DELLA CREAZIONE MIXTURE
for num_mix in tqdm(range(config["params"]["num_mixes"]), desc="Processing mixes"):
    len_mixture = random.randint(config['params']['min_len_mixture'], config['params']['max_len_mixture'])

    # Rumore bianco
    #noise_audio = WhiteNoise().to_audio_segment(duration=len_mixture).set_frame_rate(config["params"]["frame_rate"]).set_channels(config["params"]["channel"])
    #noise_audio = noise_audio.apply_gain(random.randint(-50, -30) - noise_audio.dBFS)

    # Base silenziosa + overlay rumore
    mixture_audio = AudioSegment.silent(duration=len_mixture).set_channels(config["params"]["channel"]).set_frame_rate(config["params"]["frame_rate"])
    #mixture_audio = mixture_audio.overlay(noise_audio)

    # Background
    background_audio = AudioSegment.silent(duration=len_mixture).set_channels(config["params"]["channel"]).set_frame_rate(config["params"]["frame_rate"])
    num_background_segments = random.randint(config["params"]["min_num_background_segments"], config["params"]["max_num_background_segments"])
    random_background = get_random_segments([config["paths"]["background_segments_path"]], [num_background_segments])

    background_info = []
    for segment_path in random_background:
        segment_audio = AudioSegment.from_file(segment_path)
        start = random.randint(0, len_mixture - len(segment_audio))
        end = start + len(segment_audio)
        background_audio = background_audio.overlay(segment_audio, position=start)
        background_info.append({
            "Path": segment_path,
            "Class": os.path.basename(os.path.dirname(segment_path)),
            "Start": start,
            "End": end
        })

    # Eventi
    event_info = []
    include_event = random.random() < 1 #qui da sempre TRUE
    event_audio = AudioSegment.silent(duration=len_mixture).set_channels(config["params"]["channel"]).set_frame_rate(config["params"]["frame_rate"])

    if include_event:
        num_event_segments = random.randint(config["params"]["min_num_event_segments"], config["params"]["max_num_event_segments"])
        random_event = get_random_segments([config["paths"]["event_segments_path"]], [num_event_segments])
        for segment_path in random_event:
            segment_audio = AudioSegment.from_file(segment_path).apply_gain(10)
            start = random.randint(0, len_mixture - len(segment_audio))
            end = start + len(segment_audio)
            event_audio = event_audio.overlay(segment_audio, position=start)
            event_info.append({
                "Path": segment_path,
                "Class": os.path.basename(os.path.dirname(segment_path)),
                "Start": start,
                "End": end
            })
        mixture_audio = mixture_audio.overlay(event_audio)

    # Overlay del background
    mixture_audio = mixture_audio.overlay(background_audio)

    # Gain finale
    #mixture_audio = mixture_audio.apply_gain(random.randint(-20, -15) - mixture_audio.dBFS)

    # Salvataggio
    mix_dir = os.path.join(config["paths"]["output_path"], f"mix_{num_mix}")
    os.makedirs(mix_dir, exist_ok=True)

    background_audio.export(os.path.join(mix_dir, "background.wav"), format="wav")
    event_audio.export(os.path.join(mix_dir, "event.wav"), format="wav")
    mixture_audio.export(os.path.join(mix_dir, "mixture.wav"), format="wav")

    mixture_info = {
        "background_info": background_info,
        "event_info": event_info if event_info else None
    }
    with open(os.path.join(mix_dir, "mixture_info.json"), 'w') as f:
        json.dump(mixture_info, f, indent=4)


Processing mixes: 100%|██████████| 100000/100000 [54:17<00:00, 30.70it/s]  


In [12]:
# import os
# from pydub import AudioSegment
# 
# # Percorso della cartella contenente i file audio
# audio_folder = "audio_sources/dataset/event/1_voice"
# 
# # Soglia di durata massima (10 secondi in millisecondi)
# max_duration_ms = 10 * 1000
# 
# # Estensioni audio supportate
# supported_extensions = (".wav", ".mp3", ".flac", ".ogg", ".m4a")
# 
# # Verifica che la cartella esista
# if not os.path.exists(audio_folder):
#     print(f"❌ Cartella non trovata: {audio_folder}")
# else:
#     for filename in os.listdir(audio_folder):
#         if filename.lower().endswith(supported_extensions):
#             file_path = os.path.join(audio_folder, filename)
#             try:
#                 audio = AudioSegment.from_file(file_path)
#                 if len(audio) > max_duration_ms:
#                     print(f"🗑️ Cancellato '{filename}' (durata: {len(audio) / 1000:.2f} secondi)")
#                     os.remove(file_path)
#             except Exception as e:
#                 print(f"⚠️ Errore nel file '{filename}': {e}")
