# BACH BOT -- Implementing an RNN to Generate Sequences of Notes 

# Colab-Specific




In [None]:
#import Google Drive API
#pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
#log into google drive with oAuth
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
#and now download the data onto the colab machine
downloaded=drive.CreateFile({'id':'1IlEPPcMzchX1-E4i3bmMwhIBrI6f-4qw'})
downloaded.GetContentFile('chorales.lisp')

# Get and process the data

In [None]:
#parse the lisp parenthetical into a python nested list
from pyparsing import OneOrMore, nestedExpr
inputdata=''.join(open("chorales.lisp",'r').readlines())

In [None]:
#Process Data -- STEP 1: Into Python List
data = OneOrMore(nestedExpr()).parseString(inputdata)
for x in data.asList():
    for y in x:
        print(y,'\n')
    print('--------------------------------------------------------\n')
    

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

[['st', '12'], ['pitch', '62'], ['dur', '2'], ['keysig', '0'], ['timesig', '16'], ['fermata', '0']] 

[['st', '14'], ['pitch', '64'], ['dur', '2'], ['keysig', '0'], ['timesig', '16'], ['fermata', '0']] 

[['st', '16'], ['pitch', '65'], ['dur', '4'], ['keysig', '0'], ['timesig', '16'], ['fermata', '0']] 

[['st', '20'], ['pitch', '67'], ['dur', '4'], ['keysig', '0'], ['timesig', '16'], ['fermata', '0']] 

[['st', '24'], ['pitch', '69'], ['dur', '4'], ['keysig', '0'], ['timesig', '16'], ['fermata', '0']] 

[['st', '28'], ['pitch', '67'], ['dur', '4'], ['keysig', '0'], ['timesig', '16'], ['fermata', '0']] 

[['st', '32'], ['pitch', '72'], ['dur', '4'], ['keysig', '0'], ['timesig', '16'], ['fermata', '0']] 

[['st', '36'], ['pitch', '71'], ['dur', '2'], ['keysig', '0'], ['timesig', '16'], ['fermata', '0']] 

[['st', '38'], ['pitch', '69'], ['dur', '2'], ['keysig', '0'], ['timesig', '16'], ['fermata', '0']] 

[['st', '40'], [

In [None]:
#Process Data -- STEP 2: Into sequences of Integers (not sequences of attribute-val sublists)
#This will be a little bit harder but do some magic i believe it will be pretty doable. watch the video.

In [None]:
#process the input into a list of numpy arrays
import numpy as np
notes = []
for x in data.asList():
    for y in x[1:]:
        notes.append(np.array([
            int(y[1][1]),
            int(y[2][1]),             
        ]))
    
print(notes)        

[array([67,  4]), array([67,  8]), array([74,  4]), array([71,  6]), array([69,  2]), array([67,  4]), array([67,  6]), array([69,  2]), array([71,  4]), array([69,  8]), array([71,  4]), array([74,  8]), array([72,  4]), array([71,  4]), array([69,  8]), array([67,  8]), array([71,  4]), array([71,  4]), array([72,  4]), array([74,  4]), array([74,  6]), array([72,  2]), array([71,  4]), array([69,  8]), array([67,  4]), array([71,  8]), array([72,  4]), array([74,  8]), array([72,  4]), array([71, 12]), array([67,  8]), array([71,  4]), array([74,  8]), array([72,  4]), array([71,  8]), array([69,  4]), array([67,  6]), array([69,  2]), array([71,  4]), array([69,  8]), array([71,  4]), array([74,  8]), array([72,  4]), array([71,  4]), array([69,  8]), array([67,  8]), array([69,  4]), array([69,  4]), array([69,  4]), array([69,  4]), array([71,  4]), array([67,  4]), array([66,  4]), array([64,  4]), array([71,  4]), array([73,  4]), array([71,  4]), array([69,  4]), array([68,  2

In [None]:
#indexOf function. returns the index of a note in the unique_notes array.
def indexOf(n):
  for i,comp in enumerate(unique_notes):
    broke=False
    for j in range(len(comp)):
      if(n[j] != comp[j]):
        broke=True
        break
    if not broke:
      return i
  return None


In [None]:
#get a unique set of notes used
unique_notes = []
for note in notes:
    if indexOf(note) is None:
        unique_notes.append(note)
import random
random.shuffle(unique_notes)

In [None]:
print(len(unique_notes))

102


In [None]:
print(unique_notes)

[array([66,  6]), array([70, 12]), array([77,  8]), array([69, 16]), array([66,  8]), array([73,  3]), array([72,  6]), array([74, 12]), array([74,  6]), array([63,  4]), array([64,  8]), array([65,  2]), array([72,  8]), array([67, 16]), array([70,  8]), array([71, 12]), array([66, 12]), array([69,  6]), array([68,  2]), array([77,  1]), array([67,  1]), array([69,  2]), array([72,  2]), array([71,  2]), array([63,  2]), array([73,  6]), array([67,  4]), array([65,  8]), array([63, 16]), array([77, 12]), array([64, 16]), array([65,  6]), array([76,  6]), array([69, 12]), array([73,  4]), array([65, 16]), array([71,  6]), array([77,  4]), array([67,  8]), array([68,  6]), array([76,  2]), array([70,  4]), array([62,  1]), array([60,  4]), array([71,  1]), array([68,  8]), array([73,  8]), array([62,  4]), array([70,  6]), array([65,  4]), array([61,  1]), array([77,  2]), array([64,  6]), array([79,  2]), array([75,  2]), array([61,  8]), array([66,  4]), array([71,  4]), array([66,  2

In [None]:
#represent each note in the sequence by its index in the unique_notes list.
intseq = np.array([indexOf(note) for note in notes]).reshape(len(notes),1)

In [None]:
print(notes)
print(len(notes))

[array([67,  4]), array([67,  8]), array([74,  4]), array([71,  6]), array([69,  2]), array([67,  4]), array([67,  6]), array([69,  2]), array([71,  4]), array([69,  8]), array([71,  4]), array([74,  8]), array([72,  4]), array([71,  4]), array([69,  8]), array([67,  8]), array([71,  4]), array([71,  4]), array([72,  4]), array([74,  4]), array([74,  6]), array([72,  2]), array([71,  4]), array([69,  8]), array([67,  4]), array([71,  8]), array([72,  4]), array([74,  8]), array([72,  4]), array([71, 12]), array([67,  8]), array([71,  4]), array([74,  8]), array([72,  4]), array([71,  8]), array([69,  4]), array([67,  6]), array([69,  2]), array([71,  4]), array([69,  8]), array([71,  4]), array([74,  8]), array([72,  4]), array([71,  4]), array([69,  8]), array([67,  8]), array([69,  4]), array([69,  4]), array([69,  4]), array([69,  4]), array([71,  4]), array([67,  4]), array([66,  4]), array([64,  4]), array([71,  4]), array([73,  4]), array([71,  4]), array([69,  4]), array([68,  2

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
#build the model as a 3 layer sequential neural network.
  # -- Layer 1: Embedding Layer to represent notes in 65 dimensions
  # -- Layer 2: LSTM Layer 
  # -- Layter 3: Prediction Layer -- outputs index of predicted note
def build_bot():
  BachBot = keras.Sequential([
      keras.layers.Embedding(len(unique_notes),
                            64+1),
     keras.layers.LSTM(1024,
                       return_sequences=True,
                       ),

     keras.layers.Dense(len(unique_notes))
  ])  
  return BachBot

BachBot=build_bot()

In [None]:
#using the vectorized sequences, create input/output training data. 
def splitter(seq):
    return seq[:-1],seq[1:]

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(intseq).batch(64,drop_remainder=True).map(splitter)
print(len(intseq))
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
BachBot.compile(loss=loss,optimizer="Adam")

4693


In [None]:
#run on the GPU
history=BachBot.fit(dataset,epochs=250)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

In [None]:
#TRAINED!!!! WAHOOOOOOOOO. damn, not bad! 82% accuracy, much higher than i expected. with some more epochs this could actually do pretty well!
#Its still wrong. After all this, you will read a paper and then do it right.

#TODO: find a way to programmatically play these......

In [None]:
#TODO: test it!!!
def generate_notes(model, start_notes):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 64

  # Converting our start string to numbers (vectorizing)
  input_eval = [indexOf(n) for n in start_notes]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  notes_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
    
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      notes_generated.append(unique_notes[predicted_id])

  return (start_notes + notes_generated)

In [None]:
start_notes = [notes[0]]
bot_notes = generate_notes(BachBot,start_notes)

In [None]:
print(len(bot_notes))
print(bot_notes)

65
[array([67,  4]), array([69,  4]), array([67,  4]), array([65,  4]), array([64,  4]), array([69,  4]), array([69,  4]), array([71,  8]), array([69,  4]), array([65,  4]), array([67,  4]), array([70,  4]), array([70,  4]), array([69,  2]), array([70,  1]), array([72,  1]), array([70,  3]), array([72,  1]), array([70,  2]), array([72,  4]), array([74,  4]), array([73,  4]), array([71,  8]), array([71,  4]), array([73,  2]), array([74,  4]), array([72,  4]), array([70,  4]), array([72,  4]), array([70,  4]), array([70,  4]), array([72,  4]), array([74,  4]), array([74,  4]), array([76,  4]), array([74,  4]), array([72,  4]), array([71,  4]), array([78,  4]), array([76,  4]), array([74,  4]), array([73,  4]), array([78,  4]), array([79,  2]), array([77,  2]), array([72,  2]), array([70,  2]), array([69,  2]), array([71,  2]), array([69,  2]), array([70,  4]), array([69,  4]), array([69,  4]), array([71,  4]), array([69,  8]), array([72,  4]), array([70,  4]), array([68,  4]), array([69,

In [None]:
#NO LONGER NEEDED but i'm keeping it because it's beautiful
#Note class: this should make things a bit easier to manage. this is basically just a struct 
class Note:
    def __init__(self,st,pitch,dur,keysig,timesig,fermata):
        self.st=st
        self.pitch=pitch
        self.dur=dur
        self.keysig=keysig
        self.timesig=timesig
        self.fermata=fermata
    def __repr__(self):
        return f"\n\t<Note> st:{self.st} ; pitch:{self.pitch} ; dur:{self.dur} ; keysig:{self.keysig} ; timesig:{self.timesig} ; fermata:{self.fermata}</Note>\n"
    def __eq__(self,comp):
        return(isinstance(comp,Note) and self.st==comp.st and self.pitch==comp.pitch and self.dur==comp.dur and self.keysig==comp.keysig and self.timesig==comp.timesig and self.fermata == comp.fermata)

# PLAYING THE NOTES


In [None]:
#see notes.py