In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import tensorflow as tf
import random
import matplotlib.pyplot as plt

from sklearn import manifold
from cs_systems.lstm_vae import VariationalRecurrentAutoEncoder

In [3]:
def create_ctable(characters):
    def encode(sequence):
        encoded_s = []
        for sc in sequence:
            enc = [1 if sc == c else 0 for i, c in enumerate(characters)]
            encoded_s.append(enc)

        return encoded_s

    def decode(sequence, calc_argmax=True):
        if calc_argmax:
            sequence = sequence.argmax(axis=-1)

        return [characters[i] for i in sequence]

    return encode, decode


def vectorize(data, encode):
    return np.array([encode(sequence) for sequence in data], dtype='float32')


def generate_tonal_data(size, timesteps):
    samples = []
    samples_reverse = []

    diatonic = np.array([0, 2, 4, 5, 7, 9, 11])
    characters = np.array([str(i) for i in range(24,46)])
    
    seen = []
    while len(samples) < size:
        root = random.randint(0, len(characters) - 1)
        diatonic_indices = (diatonic + root) % 11
        diatonic_characters = characters[diatonic_indices]

        notes = [random.randint(0, len(diatonic_characters) - 1) for _ in range(timesteps)]
        tune = [diatonic_characters[i] for i in notes]
        tune_as_string = "".join(tune)

        if tune_as_string in seen:
            continue
        else:
            seen.append(tune_as_string)

        samples.append(tune)
        samples_reverse.append(tune[::-1])

    return samples, samples_reverse

def generate_atonal_data(size, timesteps):
    samples = []
    samples_reverse = []
    # -- one octave + equal amount of rests. 50/50 chance of rest?
    characters = [str(i) for i in range(24, 36)] 
    seen = []
    while len(samples) < size:
        notes = [random.randint(0, len(characters) - 1) for _ in range(timesteps)]
        tune = [characters[i] for i in notes]
        tune_as_string = "".join(tune)

        if tune_as_string in seen:
            continue
        else:
            seen.append(tune_as_string)

        samples.append(tune)
        samples_reverse.append(tune[::-1])

    return samples, samples_reverse

In [4]:
characters = [' '] + [str(i) for i in range(24, 36)]
encode, decode = create_ctable(characters)

timesteps = 10
hidden_dim = 128
latent_dim = 32

atonal_x, atonal_x_reverse = generate_atonal_data(10000, timesteps)
# tonal_x, tonal_x_reverse = generate_tonal_data(10000, timesteps)

x_reverse = vectorize(atonal_x_reverse, encode)
x = vectorize(atonal_x, encode)

y = np.zeros(10000)
print(x_reverse.shape, x.shape, y.shape)

(10000, 10, 13) (10000, 10, 13) (10000,)


In [8]:
vae = VariationalRecurrentAutoEncoder(timesteps, len(characters), hidden_dim, latent_dim, RNN=tf.keras.layers.LSTM)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
vae.compile(optimizer, loss='categorical_crossentropy')

In [9]:
history = vae.fit(x_reverse, x, epochs=50, batch_size=128)

Train on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# New Artefacts

Below 10 new artefacts are introduced and learned by the agent. It stops when it succesfully recreates the original input.

In [10]:
new_x, new_x_reverse = generate_atonal_data(10, timesteps)

new_x_reverse = vectorize(new_x_reverse, encode)
new_x = vectorize(new_x, encode)

for epoch in range(100):
    print(f"Training Epoch {epoch:03d}")
    history = vae.fit(new_x_reverse, new_x, epochs=1, batch_size=128)

    reconstructions = vae.predict(new_x_reverse)

    misclassifieds = 0
    for original, reconstruction in zip(new_x, reconstructions):
        a = decode(original)
        b = decode(reconstruction)

        if not np.array_equiv(a, b):
            misclassifieds += 1

    print('misclassifieds:', misclassifieds)
    if misclassifieds == 0:
        print("All X reconstructed correctly.")
        break

Training Epoch 000
Train on 10 samples
misclassifieds: 8
Training Epoch 001
Train on 10 samples
misclassifieds: 10
Training Epoch 002
Train on 10 samples
misclassifieds: 10
Training Epoch 003
Train on 10 samples
misclassifieds: 9
Training Epoch 004
Train on 10 samples
misclassifieds: 9
Training Epoch 005
Train on 10 samples
misclassifieds: 8
Training Epoch 006
Train on 10 samples
misclassifieds: 8
Training Epoch 007
Train on 10 samples
misclassifieds: 9
Training Epoch 008
Train on 10 samples
misclassifieds: 9
Training Epoch 009
Train on 10 samples
misclassifieds: 10
Training Epoch 010
Train on 10 samples
misclassifieds: 9
Training Epoch 011
Train on 10 samples
misclassifieds: 8
Training Epoch 012
Train on 10 samples
misclassifieds: 10
Training Epoch 013
Train on 10 samples
misclassifieds: 9
Training Epoch 014
Train on 10 samples
misclassifieds: 7
Training Epoch 015
Train on 10 samples
misclassifieds: 8
Training Epoch 016
Train on 10 samples
misclassifieds: 9
Training Epoch 017
Train on

In [88]:
z_mean, z_logvar, z = vae.encode(new_x_reverse)

new_artefact = z.numpy().mean(axis=0).reshape(1, 32)
new_artefact = np.array(vae.decode(new_artefact))

print('Artefacts')
print(np.array([decode(x) for x in new_x]))

print('\nMean Artefact')
print(np.array([decode(new_artefact[0])]))

Artefacts
[['28' '32' '30' '24' '29' '27' '33' '28' '35' '27']
 ['29' '24' '27' '35' '27' '25' '31' '27' '35' '33']
 ['24' '25' '35' '32' '32' '29' '35' '32' '35' '26']
 ['24' '33' '34' '27' '31' '31' '30' '29' '35' '31']
 ['28' '31' '30' '34' '28' '31' '28' '32' '26' '28']
 ['29' '27' '25' '26' '33' '27' '28' '32' '31' '33']
 ['30' '34' '24' '28' '24' '31' '26' '34' '31' '27']
 ['24' '24' '26' '30' '34' '31' '34' '33' '32' '30']
 ['25' '35' '24' '28' '28' '24' '28' '35' '31' '26']
 ['27' '30' '25' '29' '28' '29' '35' '32' '34' '35']]

Mean Artefact
[['24' '24' '27' '27' '31' '31' '32' '32' '26' '26']]
