In [None]:
# Importar librerías y módulos necesarios
import sys
import os
import numpy as np
import matplotlib.pyplot as plt

# Añadir el directorio raíz del proyecto al sys.path
project_root = os.path.abspath('..')
sys.path.append(project_root)

from src.sound_signal import Signal
from src.composite_signal import CompositeSignal
from src.rhythm_signal import RhythmSignal
from src.utils import (
    calculate_note_frequencies,
    plot_spectrogram,
    normalize_signal
)

import pandas as pd
import seaborn as sns
import random

# Parámetros
SAMPLE_RATE = 44100
DURATION_SEG = 5.0
AMPLITUDE = 1.0
PHASE = 0.0
OCTAVE_RANGE = (3, 5)  # Rango de octavas ajustado a frecuencias audibles
REF_FREQ = 440.0

# Tonalidades disponibles (usando solo sostenidos)
TONALIDADES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

# Directorio base de datos
data_dir = os.path.join(project_root, 'data')

# Directorio de salida para tonos simples
output_dir_tones = os.path.join(data_dir, 'tones')
os.makedirs(output_dir_tones, exist_ok=True)

# Directorio de salida para acordes
output_dir_chords = os.path.join(data_dir, 'chords')
os.makedirs(output_dir_chords, exist_ok=True)

# Directorio de salida para melodías de notas simples
output_dir_melodies = os.path.join(data_dir, 'melodies')
os.makedirs(output_dir_melodies, exist_ok=True)

# Directorio de salida para melodías de acordes
output_dir_chord_melodies = os.path.join(data_dir, 'chord_melodies')
os.makedirs(output_dir_chord_melodies, exist_ok=True)

# Directorio de salida para señales superpuestas
output_dir_superposed = os.path.join(data_dir, 'superposed')
os.makedirs(output_dir_superposed, exist_ok=True)

# Calcular las frecuencias de las notas para todas las octavas
note_freqs = calculate_note_frequencies(octave_range=OCTAVE_RANGE, ref_freq=REF_FREQ)

# Mostrar algunas frecuencias de ejemplo de diferentes octavas
print("Algunas frecuencias de ejemplo de diferentes octavas:")
examples = [
    ('C3', 'C4', 'C5'),
    ('E3', 'E4', 'E5'),
    ('G3', 'G4', 'G5'),
    ('A3', 'A4', 'A5'),
]
for notes in examples:
    for note in notes:
        freq = note_freqs.get(note)
        if freq:
            print(f"{note}: {freq:.3f} Hz")
        else:
            print(f"{note}: Nota no encontrada.")
    print("---")

# Definir intervalos de escalas y modos
SCALES = {
    'major': [0, 2, 4, 5, 7, 9, 11],
    'minor_natural': [0, 2, 3, 5, 7, 8, 10],
    'dorian': [0, 2, 3, 5, 7, 9, 10],
    'phrygian': [0, 1, 3, 5, 7, 8, 10],
    'lydian': [0, 2, 4, 6, 7, 9, 11],
    'mixolydian': [0, 2, 4, 5, 7, 9, 10],
    'locrian': [0, 1, 3, 5, 6, 8, 10],
    'pentatonic_major': [0, 2, 4, 7, 9],
    'pentatonic_minor': [0, 3, 5, 7, 10],
    'blues': [0, 3, 5, 6, 7, 10],
}

# Mapas auxiliares para calcular las notas
SEMITONE_NOTES = {
    0: 'C', 1: 'C#', 2: 'D', 3: 'D#', 4: 'E', 5: 'F', 6: 'F#', 7: 'G',
    8: 'G#', 9: 'A', 10: 'A#', 11: 'B'
}
NOTE_SEMITONES = {v: k for k, v in SEMITONE_NOTES.items()}

def get_scale_notes(tonic, scale_name, note_frequencies):
    """
    Genera una lista de notas pertenecientes a la escala dada a partir de la tónica.
    """
    intervals = SCALES.get(scale_name)
    if not intervals:
        print(f"Escala {scale_name} no definida.")
        return []
    
    if tonic not in NOTE_SEMITONES:
        print(f"Tónica {tonic} no válida.")
        return []
    
    tonic_semitone = NOTE_SEMITONES[tonic]
    
    # Generar las notas de la escala en todas las octavas
    scale_notes = []
    for octave in range(OCTAVE_RANGE[0], OCTAVE_RANGE[1] + 1):
        for interval in intervals:
            note_semitone = (tonic_semitone + interval) % 12
            note_name = SEMITONE_NOTES[note_semitone]
            note = f"{note_name}{octave}"
            if note in note_frequencies:
                scale_notes.append(note)
    return scale_notes

def get_note_octave(note):
    import re
    match = re.match(r'^([A-G]#?)(\d+)$', note)
    if match:
        octave = int(match.group(2))
        return octave
    else:
        return None

def generate_and_save_tone(note: str, note_frequencies: dict,
                           amplitude: float = AMPLITUDE, phase: float = PHASE,
                           duration: float = DURATION_SEG, sample_rate: int = SAMPLE_RATE,
                           output_dir: str = output_dir_tones) -> None:
    """
    Genera un tono simple para la nota dada y guarda el archivo WAV.
    """
    freq = note_frequencies.get(note)
    if freq:
        signal = Signal(amplitude=amplitude, freq=freq, phase=phase,
                        sample_rate=sample_rate, duration_seg=duration)
        signal.generate_signal()
        filename = os.path.join(output_dir, f'{note}.wav')
        signal.save_wav(filename=filename)
        print(f'Tono {note} generado y guardado en {filename}')
    else:
        print(f'Frecuencia para la nota {note} no encontrada.')
        
def generate_and_save_chord(chord_name: str, notes: list, note_frequencies: dict,
                            amplitude: float = AMPLITUDE, phase: float = PHASE,
                            duration: float = DURATION_SEG, sample_rate: int = SAMPLE_RATE,
                            output_dir: str = output_dir_chords) -> None:
    """
    Genera un acorde a partir de una lista de notas y guarda el archivo WAV.
    """
    composite = CompositeSignal(sample_rate=sample_rate, duration_seg=duration)
    for note in notes:
        freq = note_frequencies.get(note)
        if freq:
            composite.add_component(amplitude=amplitude, freq=freq, phase=phase)
        else:
            print(f'Frecuencia para la nota {note} no encontrada.')
    composite.build_signal()
    
    # Guardar el archivo WAV
    filename = os.path.join(output_dir, f'{chord_name}.wav')
    composite.save_wav(filename=filename)
    
    # Generar espectrograma
    spectrogram_path = filename.replace('.wav', '_spectrogram.png')
    plot_spectrogram(composite.signal, composite.sample_rate, save_path=spectrogram_path)
    
    print(f'Acorde {chord_name} generado y guardado en {filename}')

def generate_diatonic_chords(tonic: str, scale_name: str, note_frequencies: dict):
    """
    Genera acordes diatónicos para la tonalidad y escala dada.
    """
    scale_notes = get_scale_notes(tonic, scale_name, note_frequencies)
    if not scale_notes:
        return {}
    
    chords = {}
    num_notes_in_scale = len(scale_notes)
    for i in range(len(scale_notes)):
        chord_notes = []
        for j in [0, 2, 4]:  # Triadas (I, III, V)
            note_index = (i + j) % len(scale_notes)
            chord_notes.append(scale_notes[note_index])
        chord_name = f"{tonic}_{scale_name}_Chord_{i+1}"
        chords[chord_name] = chord_notes
    return chords

def generate_and_save_melody(tonic: str, scale_name: str, note_frequencies: dict,
                             num_notes: int = 16, amplitude: float = AMPLITUDE,
                             phase: float = PHASE, duration: float = DURATION_SEG,
                             sample_rate: int = SAMPLE_RATE, output_dir: str = output_dir_melodies) -> None:
    """
    Genera una melodía aleatoria basada en la escala y tonalidad dadas.
    """
    scale_notes = get_scale_notes(tonic, scale_name, note_frequencies)
    if not scale_notes:
        return
    
    # Seleccionar un rango de octavas para la melodía
    available_octaves = range(OCTAVE_RANGE[0], OCTAVE_RANGE[1] + 1)
    melody_octaves = random.sample(list(available_octaves), k=min(2, len(available_octaves)))
    melody_notes = [note for note in scale_notes if get_note_octave(note) in melody_octaves]
    
    unit_time = duration / num_notes
    melody = RhythmSignal(sample_rate=sample_rate, duration_seg=duration, unit_time=unit_time)
    
    for i in range(num_notes):
        note = random.choice(melody_notes)
        freq = note_frequencies.get(note)
        if freq:
            segment_components = [{'amplitude': amplitude, 'freq': freq, 'phase': phase}]
            melody.add_segment(segment_components, label=note)
        else:
            print(f'Frecuencia para la nota {note} no encontrada.')
    
    melody.build_signal()
    
    # Guardar el archivo WAV
    filename = os.path.join(output_dir, f'melody_{tonic}_{scale_name}_{random.randint(0, 9999)}.wav')
    melody.save_wav(filename=filename)
    
    # Generar espectrograma
    spectrogram_path = filename.replace('.wav', '_spectrogram.png')
    plot_spectrogram(melody.signal, melody.sample_rate, save_path=spectrogram_path)
    
    print(f'Melodía de notas simples generada en tonalidad {tonic} {scale_name} y guardada en {filename}')

def generate_and_save_chord_progression(tonic: str, scale_name: str, note_frequencies: dict,
                                        progression: list = None, num_chords: int = 8,
                                        amplitude: float = AMPLITUDE, phase: float = PHASE,
                                        duration: float = DURATION_SEG, sample_rate: int = SAMPLE_RATE,
                                        output_dir: str = output_dir_chord_melodies) -> None:
    """
    Genera una progresión de acordes basada en la escala y tonalidad dadas.
    """
    chords = generate_diatonic_chords(tonic, scale_name, note_frequencies)
    if not chords:
        return
    
    # Si no se proporciona una progresión específica, generar una aleatoria
    if progression is None:
        chord_indices = list(range(1, len(chords) + 1))
        progression = [random.choice(chord_indices) for _ in range(num_chords)]
    else:
        # Validar que los índices de la progresión estén en el rango correcto
        progression = [i if 1 <= i <= len(chords) else 1 for i in progression]
    
    unit_time = duration / num_chords
    chord_melody = RhythmSignal(sample_rate=sample_rate, duration_seg=duration, unit_time=unit_time)
    
    chord_names = list(chords.keys())
    for idx in progression:
        chord_name = chord_names[idx - 1]
        chord_notes = chords[chord_name]
        segment_components = []
        for note in chord_notes:
            freq = note_frequencies.get(note)
            if freq:
                segment_components.append({'amplitude': amplitude, 'freq': freq, 'phase': phase})
            else:
                print(f'Frecuencia para la nota {note} no encontrada.')
        chord_melody.add_segment(segment_components, label=chord_name)
    
    chord_melody.build_signal()
    
    # Guardar el archivo WAV
    filename = os.path.join(output_dir, f'chord_melody_{tonic}_{scale_name}_{random.randint(0, 9999)}.wav')
    chord_melody.save_wav(filename=filename)
    
    # Generar espectrograma
    spectrogram_path = filename.replace('.wav', '_spectrogram.png')
    plot_spectrogram(chord_melody.signal, chord_melody.sample_rate, save_path=spectrogram_path)
    
    print(f'Progresión de acordes generada en tonalidad {tonic} {scale_name} y guardada en {filename}')

def generate_and_save_superposed_signal(melody_filename: str, chord_melody_filename: str,
                                        output_dir: str = output_dir_superposed) -> None:
    """
    Superpone una melodía de notas simples y una progresión de acordes, y guarda el archivo WAV y espectrograma.
    """
    melody_path = os.path.join(output_dir_melodies, melody_filename)
    chord_melody_path = os.path.join(output_dir_chord_melodies, chord_melody_filename)
    
    # Verificar si los archivos existen
    if not os.path.isfile(melody_path):
        print(f'Melodía {melody_filename} no encontrada.')
        return
    if not os.path.isfile(chord_melody_path):
        print(f'Progresión de acordes {chord_melody_filename} no encontrada.')
        return
    
    # Cargar las señales
    from scipy.io.wavfile import read
    sample_rate_melody, melody_data = read(melody_path)
    sample_rate_chords, chords_data = read(chord_melody_path)
    
    if sample_rate_melody != sample_rate_chords:
        print("Las tasas de muestreo de la melodía y la progresión de acordes no coinciden.")
        return
    
    # Asegurar que las señales tengan la misma longitud
    min_length = min(len(melody_data), len(chords_data))
    melody_data = melody_data[:min_length]
    chords_data = chords_data[:min_length]
    
    # Convertir a float para evitar overflow
    melody_data = melody_data.astype(np.float32)
    chords_data = chords_data.astype(np.float32)
    
    # Superponer las señales y normalizar
    superposed_signal = melody_data + chords_data
    superposed_signal = normalize_signal(superposed_signal)
    
    # Convertir de vuelta a int16
    superposed_signal_int16 = (superposed_signal * 32767).astype(np.int16)
    
    # Guardar el archivo WAV superpuesto
    filename_superposed = os.path.join(
        output_dir,
        f'superposed_{melody_filename[:-4]}_{chord_melody_filename[:-4]}.wav'
    )
    from scipy.io.wavfile import write
    write(filename_superposed, sample_rate_melody, superposed_signal_int16)
    
    # Generar espectrograma
    spectrogram_path_superposed = filename_superposed.replace('.wav', '_spectrogram.png')
    plot_spectrogram(superposed_signal, sample_rate_melody, save_path=spectrogram_path_superposed)
    
    print(f'Señal superpuesta generada y guardada en {filename_superposed}')

def generate_dataset(note_frequencies: dict):
    """
    Genera tonos, acordes, melodías, progresiones de acordes y señales superpuestas para el conjunto de datos.
    """
    # Generar tonos
    print("Generando tonos simples...")
    for note in note_frequencies.keys():
        generate_and_save_tone(note=note, note_frequencies=note_frequencies)
    
    # Generar acordes
    tonalidades = ['C', 'G', 'D', 'A', 'E', 'F', 'B']
    print("Generando acordes diatónicos...")
    for tonic in tonalidades:
        chords = generate_diatonic_chords(tonic=tonic, scale_name='major', note_frequencies=note_frequencies)
        for chord_name, notes in chords.items():
            generate_and_save_chord(chord_name=chord_name, notes=notes, note_frequencies=note_frequencies)
    
    # Generar melodías de notas simples
    escalas = ['major', 'minor_natural', 'dorian', 'mixolydian', 'lydian']
    print("Generando melodías de notas simples...")
    for tonic in tonalidades:
        for scale_name in escalas:
            for _ in range(5):  # Generar 5 melodías por tonalidad y escala
                generate_and_save_melody(tonic=tonic, scale_name=scale_name, note_frequencies=note_frequencies)
    
    # Generar progresiones de acordes
    print("Generando progresiones de acordes...")
    for tonic in tonalidades:
        for scale_name in ['major', 'minor_natural']:
            for _ in range(5):  # Generar 5 progresiones por tonalidad y escala
                generate_and_save_chord_progression(tonic=tonic, scale_name=scale_name, note_frequencies=note_frequencies)
    
    # Generar señales superpuestas
    print("Generando señales superpuestas...")
    melody_files = [f for f in os.listdir(output_dir_melodies) if f.endswith('.wav')]
    chord_melody_files = [f for f in os.listdir(output_dir_chord_melodies) if f.endswith('.wav')]
    for i in range(50):  # Generar 50 señales superpuestas
        melody_file = random.choice(melody_files)
        chord_melody_file = random.choice(chord_melody_files)
        generate_and_save_superposed_signal(melody_filename=melody_file, chord_melody_filename=chord_melody_file)

# Ejecutar la generación del conjunto de datos
generate_dataset(note_freqs)
