In [None]:
import numpy as np
import scipy.signal as signal
import librosa
import tensorflow as tf
from tensorflow.keras import layers, models
import soundfile as sf

# Signal Generation: create basic waveforms (sine, saw, square, triangle), chirps (sweeps up and down), and FM modulated signals
Fs = 44100
t = np.linspace(0, 1, Fs)
f = 210

sine_wave = np.sin(2 * np.pi * f * t)
saw_wave = signal.sawtooth(2 * np.pi * f * t)
square_wave = signal.square(2 * np.pi * f * t)
triangle_wave = signal.sawtooth(2 * np.pi * f * t, 0.5)

chirp_up = signal.chirp(t, f0=420, f1=840, t1=1, method='linear')
chirp_down = signal.chirp(t, f0=210, f1=420, t1=1, method='linear')[::-1]

s1 = np.sin(2 * np.pi * 300 * t)
s2 = signal.sawtooth(2 * np.pi * 200 * t)
s3 = np.sin(2 * np.pi * 500 * t)

x1 = signal.chirp(t, f0=300, f1=475, t1=1, method='linear')
x2 = signal.chirp(t, f0=200, f1=345, t1=1, method='linear')
x3 = signal.chirp(t, f0=500, f1=613, t1=1, method='linear')

# Feature Extraction: uses STFT to convert time to frequency (spectrograms)
FFTwindow = 1000
hop = FFTwindow
window_fn = 'hann'

def compute_stft(sig):
    return np.abs(librosa.stft(sig, n_fft=FFTwindow, hop_length=hop, window=window_fn))

FREQMOD = compute_stft(x1).T
FREQMOD2 = compute_stft(x2).T
FREQMOD3 = compute_stft(x3).T

# Normalize data (combine data and zero mean, unit variance application)
data = np.concatenate([FREQMOD, FREQMOD2, FREQMOD3], axis=0)
mean = np.mean(data, axis=0)
std = np.std(data, axis=0)
norm_data = (data - mean) / std

# Defining simple autoencoder; compress inputs features into 2D latent, binary space then reconstructs original features from this latent space
visible_dim = norm_data.shape[1]
hidden_dim = 2

input_layer = layers.Input(shape=(visible_dim,))
encoded = layers.Dense(hidden_dim, activation='sigmoid')(input_layer)
decoded = layers.Dense(visible_dim, activation='sigmoid')(encoded)

autoencoder = models.Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

# Train autoencoder on norm spectrogram using MSE loss
autoencoder.fit(norm_data, norm_data, epochs=10, batch_size=32)

# Uses model to generate new spectrogram from sample input, applie I-STFT to convert spectrogram into waveform
sample_input = norm_data[0:1]
generated = autoencoder.predict(sample_input)

generated_denorm = generated * std + mean
generated_audio = librosa.istft(generated_denorm.T, hop_length=hop, window=window_fn)

# Save audio
sf.write("generated_audio.wav", generated_audio, Fs)