<a href="https://colab.research.google.com/github/sbs80/py-drums/blob/master/Kick.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Kick Synthesizer
A simple kick drum synthesizer written in Python with automatic parameter matching. A genetic algorithm is used to tune the synthesizer parameters in an attempt to match the output to an uploaded audio file.

The genetic algorithm is adapted from the following guide: https://towardsdatascience.com/genetic-algorithm-implementation-in-python-5ab67bb124a6. This has since been expanded to make a Python library PyGAD https://pypi.org/project/pygad.

# Install and import packages

In [None]:
#@title Install pyo package

!pip install pyo

In [None]:
#@title Import packages

import librosa
import librosa.feature
import numpy as np
import IPython.display as ipd
import random

from pyo import *

#Define kick synthesizer engine

In [None]:
#@title Kick Synth
 
def kick_synth(args):
  # Creates and boots the pyo server.
  # Initialize the Server in offline mode.
  s = Server(duplex=0, nchnls=1, audio="offline")
  s.setVerbosity(0)
  s.boot()
 
  # Controls the overall gain of the synthesizer
  s.amp = args[0]
 
  # Output file duration
  dur = 1.1
 
  # Set recording parameters
  s.recordOptions(dur=dur,
                  filename='synth_kick.wav',
                  fileformat=0,
                  sampletype=0)
 
  # Synthesis consists of a frequency modulated sine wave generator and a low pass filtered click generator.
  # Frequency of sine wave generators:
  ifreq2 = args[1]*100.0
 
  # Define synthesis envelopes with adjustable gains and decays:
  env2 = Adsr(attack=args[2]/1000.0, decay=args[3], sustain=0.0, release=0.0, dur=dur)
  fenv2 = Adsr(attack=0, decay=args[4], sustain=0.0, release=0.0, dur=dur)

  # Click:
  tr = Adsr(attack=args[5]/1000.0, decay=args[6]/100.0, sustain=0.0, release=0.0, dur=dur).play(delay = 0.1)
 
  env2.setExp(args[7]*2)
  fenv2.setExp(args[8]*2)
 
  env2.play(delay = 0.1)
  fenv2.play(delay = 0.1)
 
  osc2 = Sine(freq=(fenv2*args[9]*100.0)+ifreq2, phase=args[10], mul=env2*args[11]).play(delay=0.1)
 
  # low pass filter
  filt = Biquad(tr, args[12]*10000.0, q=args[13]*20, type=0)
  
  (filt+osc2).out()
 
  # Start rendering
  s.start()
 
  # Cleanup for the next pass
  s.shutdown()

# Match to target sound using a genetic algorithm

In [None]:
#@title Upload target kick sound file

from google.colab import files
target_upload = files.upload()
target = next(iter(target_upload))
ipd.Audio(target)

In [None]:
#@title Extract features of target sound and define fitness function

n_fft = 1024 #@param {type:"number"}
hop_length = 256 #@param {type:"integer"}
n_seconds = 1 #@param {type:"integer"}

# Load target sound
y,fs = librosa.core.load(target, sr=None)

# Trim any silence at the start
y = np.trim_zeros(y, trim='f')

# Pad with zeros at the start for FFT
y = np.pad(y, (int(n_fft/2), ), 'constant')

# Fix length of target kick sound to n_seconds seconds
n_samples = round(fs*n_seconds)
y = librosa.util.fix_length(y, fs*n_seconds)

# Calculate Spectrogram of target kick sound
S = librosa.core.stft(y, n_fft=n_fft, hop_length=hop_length, window='hann')
S_target, phase = librosa.magphase(S)

# Spectral Centroid (not currently used):
# C_target = librosa.feature.spectral_centroid(S=S_target)

# Perform a fitness calculation for an array of synthesizer parameters
def fitness_calc(input):

  fitness = np.zeros(input.shape[0])

  for ch in range(input.shape[0]):
    # Synthesize and load kick sound
    kick_synth(input[ch].tolist())
    y,fs = librosa.core.load('synth_kick.wav', sr=None)

    # Trim any silence at the start
    y = np.trim_zeros(y, trim='f')

    # Pad with zeros at the start for FFT
    y = np.pad(y, (int(n_fft/2), ), 'constant')

    # Force length to n_seconds seconds
    y = librosa.util.fix_length(y, fs*n_seconds)

    # Calculate Spectrogram of synthesized kick sound
    S = librosa.core.stft(y, n_fft=n_fft, hop_length=hop_length, window='hann')
    S_synth, phase = librosa.magphase(S)
 
    # Calulate mean squared error of the synthesized kick compared to the "real" kick
    mse = ((S_synth - S_target)**2).mean()
    
    # Fitness is defined as the negative of the mean squared error
    fitness[ch] = -mse

  return fitness

In [None]:
#@title Define genetic algorithm functions
mute_probability = 0.2 #@param {type:"slider", min:0, max:1, step:0.01}
mute_max_val = 0.86 #@param {type:"slider", min:0, max:1, step:0.01}
parent_1_probability = 0.6 #@param {type:"slider", min:0, max:1, step:0.01}

def calc_pop_fitness(pop, parents_fitness, generation, num_parents):
  # Calculate the fitness value for each synthesizer parameter set in the current population
  # If not the first generation, don't bother recalculating fitness for the "parents"
  if generation > 0:
    fitness = np.empty( pop.shape[0])
    fitness[0:num_parents] = parents_fitness
    fitness[num_parents:] = fitness_calc(pop[parents.shape[0]:, :])
  else:
    fitness = fitness_calc(pop)
  return fitness

def select_mating_pool(pop, fitness, num_parents):
  # Select the fittest as parents for producing the offspring of the next generation
  parents = np.empty((num_parents, pop.shape[1]))
  parents_fitness = np.empty(num_parents)

  for parent_num in range(num_parents):
    max_fitness_idx = np.where(fitness == np.max(fitness))
    max_fitness_idx = max_fitness_idx[0][0]

    # print(max_fitness_idx)
    # if parent_num == 0:
    # print(pop[max_fitness_idx, :])

    parents[parent_num, :] = pop[max_fitness_idx, :]
    parents_fitness[parent_num] = fitness[max_fitness_idx]

    fitness[max_fitness_idx] = -99999999999

  return parents, parents_fitness

def crossover(parents, offspring_size):
  offspring = np.empty(offspring_size)

  for k in range(offspring_size[0]):
    # Index of the first parent to mate
    parent1_idx = k%parents.shape[0]
    # Index of the second parent to mate
    parent2_idx = (k+1)%parents.shape[0]

    for l in range(offspring_size[1]):
      if np.random.uniform() < parent_1_probability:
        offspring[k, l] = parents[parent1_idx, l]
      else:
        offspring[k, l] = parents[parent2_idx, l]
  return offspring

def mutation(offspring_crossover,num_weights):
  # Mutation changes a single gene in each offspring randomly 
  for idx in range(offspring_crossover.shape[0]):
    # Randomly mutate some genes
    for weight in range(num_weights):
      if np.random.uniform() < mute_probability:
        random_value = np.random.uniform(-mute_max_val, mute_max_val, 1)

        # Apply mutation
        offspring_crossover[idx, weight] = offspring_crossover[idx, weight] + random_value

  return offspring_crossover

In [None]:
#@title Run genetic algorithm
num_population = 30 #@param {type:"integer"}
num_generations = 200 #@param {type:"integer"}
num_parents_mating = 5 #@param {type:"integer"}

num_params = 20

# Calculate size of array need to hold entire population of parameters
pop_size = (num_population,num_params) 

#Create an initial random population.
new_population = np.random.uniform(low=0.0, high=1.0, size=pop_size)

parents_fitness = np.empty(num_parents_mating)




for generation in range(num_generations):
  # Measure the fitness of each member in the population
  fitness = calc_pop_fitness(new_population, parents_fitness, generation, num_parents_mating)
     
  print("Generation " + str(generation) + " best score: " + str(fitness.max()))

  # Allow user to listen to the result after every 10 generations
  if generation % 10 == 0:
    max_fitness_idx = np.where(parents_fitness == np.max(parents_fitness))
    max_fitness_idx = max_fitness_idx[0][0]
    kick_synth(new_population[max_fitness_idx].tolist())
    ipd.display(ipd.Audio('synth_kick.wav'))
    print("parameters used to achieve closest match:")
    print(new_population[max_fitness_idx])

  # Select the best parents in the population for mating
  parents, parents_fitness = select_mating_pool(new_population, fitness, num_parents_mating)
 
  # Generate the next generation using crossover
  offspring_crossover = crossover(parents, offspring_size=(pop_size[0]-parents.shape[0], num_params))
 
  # Adding some variations to the offsrping using mutation
  offspring_mutation = mutation(offspring_crossover,num_params)
  # Creating the new population based on the parents and offspring
  new_population[0:parents.shape[0], :] = parents
  new_population[parents.shape[0]:, :] = offspring_mutation

In [None]:
#@title Listen to results!

print("Generation number reached: " + str(generation))

print("Target sample:")
ipd.display(ipd.Audio(target))

print("Closest match according to fitness function:")
max_fitness_idx = np.where(parents_fitness == np.max(parents_fitness))
max_fitness_idx = max_fitness_idx[0][0]
kick_synth(new_population[max_fitness_idx].tolist())
ipd.display(ipd.Audio('synth_kick.wav'))
print("parameters used to achieve closest match:")
print(new_population[max_fitness_idx])

print("Randomly generated sample for comparison:")
random_params = np.random.uniform(low=0.0, high=1.0, size=num_params)
kick_synth(random_params.tolist())
ipd.display(ipd.Audio('synth_kick.wav'))