In [1]:
import os

# Subir un nivel respecto a la carpeta actual
os.chdir("..")

In [2]:
import sounddevice as sd
from Utils import load_parameters_file, from_matrix_to_preset, NUM_PARAMETERS, MSE, denormalize_preset, normalize_preset, pretty_print
import time
from Synth import Synth
import numpy as np
import cma
import sys
import matplotlib.pyplot as plt
import time
from multiprocessing import Pool
from parallelEvaluation import evaluate_presets
from globals import SAMPLE_RATE, DURATION, PROCESSORS

In [3]:
def render_presets(presets):
    synth = Synth(
        sample_rate=SAMPLE_RATE,
        duration=DURATION,
        presets=presets
    )
    return synth.process_audio()

In [4]:
if __name__ == '__main__':
    target_mfcc = evaluate_presets(load_parameters_file('target.json'))
    target_mfcc_2 = evaluate_presets(load_parameters_file('target2.json'))
    # print(MSE(target_mfcc, target_mfcc_2))
    # sys.exit()

    x0 = np.zeros(NUM_PARAMETERS)
    sigma0 = 0.5

    # Crear estrategia
    es = cma.CMAEvolutionStrategy(x0, sigma0, {
        'popsize': 200, 
        'maxiter': 10000, 
        'bounds': [0, 1], 
        'verb_disp': 0,    # No imprimir en consola cada iteración
        'verb_log': 0,     # No generar archivos de log en disco (outcmaes...)
        'verb_time': 0,    # No mostrar estadísticas de tiempo
        'verb_filenameprefix': 'temp_' # Opcional: si genera algo, que sea con este prefijo
    })

    gen = 1

    with Pool(PROCESSORS) as pool:
        while not es.stop():
            # Generar población
            solutions = np.array(es.ask(), dtype=np.float32)  # devuelve una lista de individuos
            solutions_splitted = np.split(solutions, PROCESSORS)
            presets_splitted = [denormalize_preset(from_matrix_to_preset(chunk)) for chunk in solutions_splitted]
            
            solutions_evaluated = pool.map(evaluate_presets, presets_splitted)
            solutions_evaluated = np.concatenate(solutions_evaluated)
            fitnesses = MSE(solutions_evaluated, target_mfcc)

            best_idx = np.argmin(fitnesses)          # índice del mejor fitness
            best_solution = solutions[best_idx]      # solución correspondiente
            best_fitness = fitnesses[best_idx]       # fitness correspondiente

            print("Gen", gen, "Mejor fitness:", best_fitness)

            es.tell(solutions, fitnesses)  # pasar fitness al algoritmo
            # es.logger.add()  # opcional, para logging interno
            # es.disp()        # imprimir estado de iteración

            gen += 1

    # Mejor solución
    best_solution = es.result.xbest
    print("Mejor individuo:", best_solution)



Gen 1 Mejor fitness: 1.1406111005715844




Gen 2 Mejor fitness: 1.167954528778323
Gen 3 Mejor fitness: 1.0482979321959651
Gen 4 Mejor fitness: 1.0378655312119578
Gen 5 Mejor fitness: 1.0194427184110624
Gen 6 Mejor fitness: 1.0983217963357765
Gen 7 Mejor fitness: 1.32875784696018
Gen 8 Mejor fitness: 1.2124963104188908
Gen 9 Mejor fitness: 1.3296717152997657
Gen 10 Mejor fitness: 0.34157575977810256
Gen 11 Mejor fitness: 1.2871420050496416
Gen 12 Mejor fitness: 0.9999999770888561
Gen 13 Mejor fitness: 1.0819734966905172
Gen 14 Mejor fitness: 0.801455653931575
Gen 15 Mejor fitness: 0.8144854406790741
Gen 16 Mejor fitness: 1.185515499917015
Gen 17 Mejor fitness: 0.4761610353815367
Gen 18 Mejor fitness: 0.9574437826950852
Gen 19 Mejor fitness: 0.8826611676543868
Gen 20 Mejor fitness: 0.875156700937232
Gen 21 Mejor fitness: 1.023774707095504
Gen 22 Mejor fitness: 1.1740342592970665
Gen 23 Mejor fitness: 1.0123391664468617
Gen 24 Mejor fitness: 0.7362525696669618
Gen 25 Mejor fitness: 0.9190658397507165
Gen 26 Mejor fitness: 0.407719

In [13]:
solution = np.expand_dims(best_solution, axis=0)
error = MSE(evaluate_presets(denormalize_preset(from_matrix_to_preset(solution))), target_mfcc)
print('Error', error)

target_audio = render_presets(load_parameters_file('target.json'))
sd.play(target_audio[0], SAMPLE_RATE)
sd.wait()

# plt.plot(target_audio[0])
# plt.show()

predicted_audio = render_presets(denormalize_preset(from_matrix_to_preset(solution)))
sd.play(predicted_audio[0], SAMPLE_RATE)
sd.wait()

# plt.plot(predicted_audio[0])
# plt.show()

Error [0.2755944]


In [8]:
denormalized_predicted_preset = denormalize_preset(from_matrix_to_preset(solution))
pretty_print(denormalized_predicted_preset)

{
  lfo1_rate:   [
    8.916680335998535
  ]
  lfo1_shape:   [
    3.716047763824463
  ]
  lfo2_rate:   [
    12.662152290344238
  ]
  lfo2_shape:   [
    0.6832464337348938
  ]
  osc1_shape:   [
    0.8632935881614685
  ]
  osc1_phase:   [
    0.9606972932815552
  ]
  osc1_volume:   [
    0.9910008311271667
  ]
  osc1_freq:   [
    442.5393981933594
  ]
  osc1_vdepth:   [
    0.7356216907501221
  ]
  osc1_pdepth:   [
    0.8726149201393127
  ]
  osc2_shape:   [
    1.604212760925293
  ]
  osc2_phase:   [
    0.3302754759788513
  ]
  osc2_volume:   [
    0.31016805768013
  ]
  osc2_freq:   [
    19908.98046875
  ]
  osc2_vdepth:   [
    0.613635778427124
  ]
  osc2_pdepth:   [
    0.6521085500717163
  ]
  osc3_shape:   [
    2.659223794937134
  ]
  osc3_phase:   [
    0.3552340865135193
  ]
  osc3_volume:   [
    0.7842036485671997
  ]
  osc3_freq:   [
    10896.1953125
  ]
  osc3_vdepth:   [
    0.6546485424041748
  ]
  osc3_pdepth:   [
    0.5312458276748657
  ]
  osc4_shape:   [
   