## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import keras
from keras.optimizers import Adam
from keras.layers import Input, GRU, Flatten, MaxPool2D, MaxPool1D
from keras.layers import PReLU, Dropout, Lambda, Dense
from keras.models import Model
import tensorflow as tf
from scipy.signal import stft

from music_generator.basic.random import generate_dataset
from music_generator.basic.signalproc import SamplingInfo
from music_generator.musical.timing import Tempo
from music_generator.musical.scales import GenericScale
from music_generator.analysis.play import play_mono_as_stereo, play_array
from music_generator.basic.signalproc import mix_at
from music_generator.analysis import preprocessing

from music_generator.musical import scales
import numpy as np
from multiprocessing import Pool
from functools import partial

import matplotlib.pyplot as plt
from IPython.display import Audio
%matplotlib inline
import matplotlib

from scipy.io.wavfile import read
import pandas as pd

In [None]:
matplotlib.rcParams['figure.figsize'] = (16.0, 12.0)
matplotlib.rcParams['lines.linewidth'] = 2
matplotlib.rcParams['axes.linewidth'] = 1.5
matplotlib.rcParams['font.size'] = 18
matplotlib.rcParams['xtick.major.size'] = 5
matplotlib.rcParams['xtick.major.width'] = 2
matplotlib.rcParams['ytick.major.size'] = 5
matplotlib.rcParams['ytick.major.width'] = 2
matplotlib.rcParams['figure.figsize'] = (16.0, 8.0)

## Load & show & play music

Before we start out, how do we read audio files?

In [None]:
from scipy.io.wavfile import read
sr, data_raw = read('../data/full-mix.wav')

The sample rate is the number of samples per second of audio. CD-quality music contains 44100 samples per second. 

In [None]:
data_raw[50000:50100]

* Data is stored as signed integers. We would like to convert to floating point numbers between -1, 1

* What do these numbers mean?

In [None]:
data_raw.shape

Since we have loaded mono music, the data is 1D.

In [None]:
sr

The highest frequency that can be encoded is then 22 kHz (44100/2), which is already higher than the maximum frequency that we can hear (~20 kHz, but decreases with age). 

In [None]:
data = pd.Series(data_raw)
data.index = data.index / sr
data = data / 2**15 

In [None]:
data.plot();

A very nice tool when working with audio is the Audio element in `IPython.display`

```from IPython.display import Audio```

In [None]:
plt.plot(data.loc[40:50])
Audio(data.loc[40:50].values, rate=sr)

In [None]:
plt.plot(data.loc[40:41])
Audio(data.loc[40:41].values, rate=sr)

In [None]:
plt.plot(data.loc[40.8:41])
Audio(data.loc[40.8:41].values, rate=sr)

## Simple synthesizer: tone generation

### Sine generator

$$ f(t) = \sin(2 \pi f t)$$

In [None]:
generated = pd.DataFrame({'time': np.arange(0, 1, 1/sr)}).set_index('time')
generated['sine'] = np.sin(generated.index * 440 * 2 * np.pi)

In [None]:
generated.head()

In [None]:
def plot_and_play(df, col_name):
    fig, ax = plt.subplots(ncols=2)

    plt.sca(ax[0])
    plt.title('All data')
    df[col_name].plot()

    plt.sca(ax[1])
    plt.title('Zoomed in')
    df.loc[0:0.02][col_name].plot()
    
    plt.show()
    
    fig, ax = plt.subplots()
    plt.title('Spectral')
    f_vec, t_vec, Zxx = stft(df[col_name], sr, nperseg=2048, noverlap=2048 // 4)
    plt.pcolormesh(t_vec, f_vec, np.abs(Zxx))
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    plt.ylim([0, 5000])
    
    plt.show()

    return Audio(df[col_name], rate=sr)

In [None]:
plot_and_play(generated, 'sine')

### Envelope

In [None]:
dr = 0.2
generated['decay_envelope'] = np.exp(-generated.index / dr)
generated['decay_sine'] = generated['sine'] * generated['decay_envelope']

In [None]:
plot_and_play(generated, 'decay_sine')

### Additive synthesis

The sine that we have generated had a frequency. Let's call this the fundamental frequency.

If we add sine wave with a frequency that is a multiple of the fundamental frequency, the whole wave form is still periodic with period that corresponds to the fundamental frequency. Our ears and brains understand this and the tone is perceived as having the same pitch.

Typically we say that the tone changes colour (or timbre)

In [None]:
def additive_synthesis(t, amps, freqs, phases):
    return np.sum([amp * np.sin(t * freq * 2 * np.pi + phase) 
                   for amp, freq, phase in zip(amps, freqs, phases)], 
                  axis=0)

In [None]:
dr = 0.2

amps = [1, -0.3, 0.1, -0.1, 0.4, 0.01, -0.2]
freqs = [440, 2*440, 3*440, 4*440, 5*440, 6*440, 7*440]
phases = [0, 0, 0, 0, 0, 0, 0]

generated['additive'] = additive_synthesis(generated.index, amps, freqs, phases)
generated['additive_decay'] = generated['additive'] * generated['decay_envelope']

In [None]:
plot_and_play(generated, 'additive')

## Note that indeed it has the same pitch

In [None]:
Audio(generated['additive'], rate=sr)

In [None]:
Audio(generated['sine'], rate=sr)

## The phases are inaudible*!

`*` in mono-sound, for stereo sound it add a spatial feeling to the sound

In [None]:
dr = 0.2

amps = [1, -0.3, 0.1, -0.1, 0.4, 0.01, -0.2]
freqs = [440, 2*440, 3*440, 4*440, 5*440, 6*440, 7*440]
phases = np.random.uniform(0, 2*np.pi, size=len(freqs))

generated['additive_random_phase'] = additive_synthesis(generated.index, amps, freqs, phases)

In [None]:
fig, ax = plt.subplots(ncols=2)

plt.sca(ax[0])
plt.title('All phases 0')
generated.loc[0:0.01]['additive'].plot()

plt.sca(ax[1])
plt.title('Random phases')
generated.loc[0:0.01]['additive_random_phase'].plot()

In [None]:
Audio(generated['additive'], rate=sr)

In [None]:
Audio(generated['additive_random_phase'], rate=sr)

## Decay applied on the additive sound

In [None]:
generated['additive_decay'] = generated['additive'] * generated['decay_envelope']

In [None]:
plot_and_play(generated, 'additive_decay')

### Drums

In [None]:
generated['kick'] = np.sin( (1 / (generated.index + 0.1) + 30 ) * 2 * np.pi)
generated['kick'] *= np.exp(-generated.index / 0.4)
plot_and_play(generated, 'kick')

### Snare

In [None]:
generated['noise'] = np.random.uniform(low=-1, high=1, size=generated.index.shape)
plot_and_play(generated, 'noise')

In [None]:
generated['short_decay'] = np.exp(-generated.index / 0.05)
generated['snare_base'] = np.sin( (1 / (generated.index + 0.1) + 30 ) * 4 * np.pi) * np.exp(-generated.index / 0.4)
generated['snare'] = generated['snare_base'] + generated['noise'] * generated['short_decay']
plot_and_play(generated, 'snare')

### Subtractive synthesis

The main idea behind subtractive synthesis is to start with a basic wave shape that has a lot of harmics and then apply frequency filters on it in order to remove part of the frequency content. 

The filter characteristics can have their own envelopes which can give the sound its own dynamics. For instance, when the tone is struck it can be very bright and slowly becomes more dull as the tone lasts longer. This makes it possible to approximate plucked string tones such as harp, piano or guitar.

In [None]:
generated['square'] = np.sign(np.sin(generated.index * 440 * 4)) * generated['decay_envelope']
plot_and_play(generated, 'square')

In [None]:
from music_generator.basic.signalproc import apply_filter

In [None]:
generated['filtered_square'] = apply_filter(generated['square'].values, 
                                            SamplingInfo(sr), 3000, order=5, ftype='lowpass')

In [None]:
plot_and_play(generated, 'filtered_square')

## There is much more to it

* Effects: distortion, bit-crushing, delay, reverb, chorus, ring modulation, ...
* FM synthesis: a different way of generating tones
* Synthesis using sampling (wavetable synthesis)
* Physical modeling: model air flow in a saxophone, resonances in a grand piano, etc.
* LFO: low-frequency oscillators