<a href="https://colab.research.google.com/github/robgon-art/Frost-Songs/blob/main/Frost_Songs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Frost Songs: Using AI to Generate Melodies from Poems
### How Robert Frost's words can breathe life into computer-generated music.
## Initial setup.
Hover over play button and hit the Run cell. </br></br>
It takes about 3 minutes to complete the setup.

In [None]:
#@title
%tensorflow_version 1.x
print('Copying Salamander piano SoundFont (via https://sites.google.com/site/soundfonts4u) from GCS...')
!gsutil -q -m cp -r gs://magentadata/models/music_transformer/primers/* /content/
# !gsutil -q -m cp gs://magentadata/soundfonts/Yamaha-C5-Salamander-JNv5.1.sf2 /content/
!gsutil -q -m cp gs://download.magenta.tensorflow.org/soundfonts/SGM-v2.01-Sal-Guit-Bass-V1.3.sf2 /content/

print('Installing dependencies...')
!apt-get update -qq && apt-get install -qq libfluidsynth1 build-essential libasound2-dev libjack-dev
!pip install -q 'tensorflow-datasets < 4.0.0'
!pip install -qU google-cloud magenta pyfluidsynth

import ctypes.util
def proxy_find_library(lib):
  if lib == 'fluidsynth':
    return 'libfluidsynth.so.1'
  else:
    return ctypes.util.find_library(lib)
ctypes.util.find_library = proxy_find_library

print('Importing libraries...')

import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow.compat.v1 as tf
tf.logging.set_verbosity(tf.logging.ERROR)

from google.colab import files

from tensor2tensor import models
from tensor2tensor import problems
from tensor2tensor.data_generators import text_encoder
from tensor2tensor.utils import decoding
from tensor2tensor.utils import trainer_lib

from magenta.models.score2perf import score2perf
import note_seq

tf.disable_v2_behavior()

SF2_PATH = '/content/SGM-v2.01-Sal-Guit-Bass-V1.3.sf2'
SAMPLE_RATE = 16000

# Upload a MIDI file and convert to NoteSequence.
def upload_midi():
  data = list(files.upload().values())
  if len(data) > 1:
    print('Multiple files uploaded; using only one.')
  return note_seq.midi_to_note_sequence(data[0])

# Decode a list of IDs.
def decode(ids, encoder):
  ids = list(ids)
  if text_encoder.EOS_ID in ids:
    ids = ids[:ids.index(text_encoder.EOS_ID)]
  return encoder.decode(ids)

model_name = 'transformer'
hparams_set = 'transformer_tpu'
ckpt_path = 'gs://magentadata/models/music_transformer/checkpoints/melody_conditioned_model_16.ckpt'

class MelodyToPianoPerformanceProblem(score2perf.AbsoluteMelody2PerfProblem):
  @property
  def add_eos_symbol(self):
    return True

problem = MelodyToPianoPerformanceProblem()
melody_conditioned_encoders = problem.get_feature_encoders()

# Set up HParams.
hparams = trainer_lib.create_hparams(hparams_set=hparams_set)
trainer_lib.add_problem_hparams(hparams, problem)
hparams.num_hidden_layers = 16
hparams.sampling_method = 'random'

# Set up decoding HParams.
decode_hparams = decoding.decode_hparams()
decode_hparams.alpha = 0.0 # 0.0
decode_hparams.beam_size = 1 # 1

# Create Estimator.
run_config = trainer_lib.create_run_config(hparams)
estimator = trainer_lib.create_estimator(
    model_name, hparams, run_config,
    decode_hparams=decode_hparams)

# These values will be changed by the following cell.
inputs = []
decode_length = 0

# Create input generator.
def input_generator():
  global inputs
  while True:
    yield {
        'inputs': np.array([[inputs]], dtype=np.int32),
        'targets': np.zeros([1, 0], dtype=np.int32),
        'decode_length': np.array(decode_length, dtype=np.int32)
    }

# Start the Estimator, loading from the specified checkpoint.
input_fn = decoding.make_input_fn_from_generator(input_generator())
melody_conditioned_samples = estimator.predict(
    input_fn, checkpoint_path=ckpt_path)

# "Burn" one.
_ = next(melody_conditioned_samples)

!fileid="1FbKTMX4w7nKyMf4-ZQF5J5DC71S-GPzh"; filename="20200919Updatedyylab1Release1.zip"; \
curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null; \
curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}

!unzip 20200919Updatedyylab1Release1.zip
!mkdir Lyrics-Conditioned-Neural-Melody-Generation
!mv 20200919Updatedyylab1Release1/0919/* Lyrics-Conditioned-Neural-Melody-Generation
!pip install py-midi
!pip install pretty_midi

import numpy as np
from gensim.models import Word2Vec

syll_model_path = 'Lyrics-Conditioned-Neural-Melody-Generation/enc_models/syllEncoding_20190419.bin'
word_model_path = 'Lyrics-Conditioned-Neural-Melody-Generation/enc_models/wordLevelEncoder_20190419.bin'
syllModel = Word2Vec.load(syll_model_path)
wordModel = Word2Vec.load(word_model_path)

!pip install pyphen

import pyphen
dic = pyphen.Pyphen(lang='en_US')

import music21
def create_music_21_pattern(sample):
  notes = []
  offset = 0
  for i in range(0, len(sample)):
    pitch=int(sample[i][0])
    n = note.Note(pitch)
    length = round(float(sample[i][1]),4)
    n.quarterLength = length
    n.offset = offset
    notes.append(n)
    offset += length
  return notes

def transpose_notes(notes, new_key, old_key = None):
  midi_stream = music21.stream.Stream(notes)
  if old_key is None:
    old_key = midi_stream.analyze('key')
  # print("transpose from", old_key, "to", new_key)
  interval = music21.interval.Interval(old_key.tonic, new_key.tonic)
  new_stream = midi_stream.transpose(interval)
  return new_stream.notes

!wget https://raw.githubusercontent.com/robgon-art/Frost-Songs/main/Robert_Frost_Poems.txt

titles = []
poems = []

with open("Robert_Frost_Poems.txt") as fp: 
  text = fp.read().strip()
  pieces = text.split("\n\n")
  for piece in pieces:
    parts = piece.split("\n")
    titles.append(parts[0].strip())
    poem_text = ""
    for part in parts[1:]:
      poem_text += part + "\n"
    poems.append(poem_text.strip())

## Generate a Song
To create a song, choose a poem by Robert Frost, the beats per minute, the musical key, and then hit the play button.

In [None]:
poem_title = 'Plowmen' #@param ["Design", "Into My Own", "My November Guest", "A Late Walk", "Stars", "Storm Fear", "To the Thawing Wind", "A Prayer in Spring", "Flower-gathering", "A Dream Pang", "In Neglect", "The Vantage Point", "Mowing", "Revelation", "The Demiurge's Laugh", "Now Close the Windows", "The Pasture", "Good Hours", "The Road Not Taken", "A Patch of Old Snow", "The Telephone", "Meeting and Passing", "Hyla Brook", "The Oven Bird", "Putting in the Seed", "A Time to Talk", "The Cow in Apple Time", "Range-Finding", "The Hill Wife Loneiness (Her Word)", "House Fear", "The Smile (Her Word)", "The Oft-Repeated Dream", "The Line-Gang", "Fire and Ice", "In a Disused Graveyard", "Dust of Snow", "Nothing Gold Can Stay", "The Aim was Song", "Stopping by Woods on a Snowy Evening", "For Once, then Something", "Blue Butterfly Day", "The Kitchen Chimney", "Looking for a Sunset Bird in Winter", "A Boundless Moment", "Evening in a Sugar Orchard", "The Valley's Singing Day", "Misgiving", "Plowmen", "On a Tree Fallen Across the Road (To hear us talk)", "The Lockless Door"]
musical_key = "C" #@param ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"]
index = titles.index(poem_title)
poem = poems[index]
bpm = 100 #@param {type:"slider", min:60, max:140, step:1}
from music21 import note, stream
import re
np.seterr(divide='ignore')
model_path = "Lyrics-Conditioned-Neural-Melody-Generation/saved_gan_models/saved_model_best_overall_mmd"

def generate_melody(flattened_cond, length_song):
  x_list = []
  y_list = []

  with tf.Session(graph=tf.Graph()) as sess:
    tf.saved_model.loader.load(sess, [], model_path)
    graph = tf.get_default_graph()
    keep_prob = graph.get_tensor_by_name("model/keep_prob:0")
    input_metadata = graph.get_tensor_by_name("model/input_metadata:0")
    input_songdata = graph.get_tensor_by_name("model/input_data:0")
    output_midi = graph.get_tensor_by_name("output_midi:0")
    feed_dict = {}
    feed_dict[keep_prob.name] = 1.0
    condition = []
    feed_dict[input_metadata.name] = condition
    feed_dict[input_songdata.name] = np.random.uniform(size=(1, 20, 3))
    condition.append(np.split(np.asarray(flattened_cond), 20))
    feed_dict[input_metadata.name] = condition
    generated_features = sess.run(output_midi, feed_dict)
    sample = [x[0, :] for x in generated_features]
    midi_pattern = create_music_21_pattern(sample[0:length_song])
    return(midi_pattern)

pre_process = True
lines = poem.split('\n')
notes = []
song_length = 0
the_key = music21.key.Key(musical_key)
key_of_c_major = {0, 2, 4, 5, 7, 9, 11}
lyric_syllables = []
lyric_times = []
lyric_pitches = []

print("Preparing the poem,'" + poem_title +"', for melody generation.\n")

for line in lines:
  line = re.sub(r'[^a-zA-Z ]+', '', line.strip())
  print(line)
  line = re.sub(' +', ' ', line)
  words = line.split(' ')
  lyrics = []
  for word in words:
    syllables = dic.inserted(word).split('-')
    if len(syllables) > 0:
      for syllable in syllables:
        if len(syllable) is 0:
          continue
        lyric_syllables.append(syllable)
        if syllable in syllModel.wv.vocab and word in wordModel.wv.vocab:
          # print(syllable)
          lyrics.append([syllable, word])
        else:
          lyrics.append(["la", "la"])
          # print("la")
    else:
      lyric_syllables.append(word)
      if len(syllable) is 0:
          continue
      if word in wordModel.wv.vocab and syllable in syllModel.wv.vocab:
        lyrics.append([word, word])
        # print(word)
      else:
        lyrics.append(["la", "la"])
        # print("la")

  length_song = len(lyrics)
  cond = []
 
  for i in range(20):
    if i < length_song:
      syll2Vec = syllModel.wv[lyrics[i][0]]
      word2Vec = wordModel.wv[lyrics[i][1]]
      cond.append(np.concatenate((syll2Vec, word2Vec)))
    else:
      cond.append(np.concatenate((syll2Vec, word2Vec)))

  flattened_cond = []
  for x in cond:
    for y in x:
      flattened_cond.append(y)

  new_notes = []
  pattern = generate_melody(flattened_cond, length_song)
  pattern_length = 0

  # quantize the start times and note durations
  for c, n in enumerate(pattern):
    if pre_process:
      n.offset = int(float(n.offset)*4+0.5) / 4
      n.quarterLength = int(float(n.quarterLength)*4+0.5) / 4
    n.offset += song_length
    new_notes.append(n)
    pattern_length += n.quarterLength
  
  diff = 0
  # stretch the last note out to hold the time
  if pre_process:
    pattern_length_adjusted = float(pattern_length-0.125)
    new_length = 4 * (1 + pattern_length_adjusted//4)
    diff = new_length - float(pattern_length)
    new_notes[-1].quarterLength += diff

    # transpose
    new_notes = transpose_notes(new_notes, the_key)

  # if any notes are sharp, knock it down into they key of A minor
  for n in new_notes:
    if pre_process and int(n.pitch.ps) % 12 not in key_of_c_major:
      n.pitch.ps -= 1

  # transpose into the final key
  if pre_process:
    new_notes = transpose_notes(new_notes, music21.key.Key(musical_key),
                                old_key=music21.key.Key("C"))
      
  notes.extend(new_notes)

  song_length += pattern_length + diff

  for n in new_notes:
    lyric_times.append(float(n.offset))
    lyric_pitches.append(n.pitch.ps)

# print(lyric_syllables)
midi_stream = music21.stream.Stream(notes)
midi_stream = midi_stream.augmentOrDiminish(120.0/bpm)
_ = midi_stream.write('midi', "test.mid")

melody_ns = note_seq.midi_file_to_sequence_proto("test.mid")

melody_instrument = note_seq.infer_melody_for_sequence(melody_ns)
notes = [note for note in melody_ns.notes
          if note.instrument == melody_instrument]
del melody_ns.notes[:]
melody_ns.notes.extend(
    sorted(notes, key=lambda note: note.start_time))
for i in range(len(melody_ns.notes) - 1):
  melody_ns.notes[i].end_time = melody_ns.notes[i + 1].start_time
inputs = melody_conditioned_encoders['inputs'].encode_note_sequence(
    melody_ns)

# Play and plot the melody.
print("\nOriginal melody")
note_seq.play_sequence(
    melody_ns,
    synth=note_seq.fluidsynth, sample_rate=SAMPLE_RATE, sf2_path=SF2_PATH)
note_seq.plot_sequence(melody_ns)

melody_ns = note_seq.midi_file_to_sequence_proto("test.mid")

print("Creating accompaniment.")

# Generate sample events.
decode_length = 4096
sample_ids = next(melody_conditioned_samples)['outputs']

# Decode to NoteSequence.
midi_filename = decode(
    sample_ids,
    encoder=melody_conditioned_encoders['targets'])
accompaniment_ns = note_seq.midi_file_to_note_sequence(midi_filename)

def find_closest_note(n, notes):
  closest = None
  smallest_diff = float("inf")
  for x in notes:
    if n.pitch == x.pitch:

      start_diff = n.start_time - x.start_time
      end_diff = (n.start_time + n.end_time) - (x.start_time + x.end_time)
      diff = start_diff * start_diff + end_diff * end_diff
      if (diff < smallest_diff):
        closest = x
        smallest_diff = diff
  return closest

for n in accompaniment_ns.notes:
  n.instrument = 1
  n.program = 2 # piano
  n.velocity += 20

original_pitches = []
original_times = []
lyric_pitches = []
lyric_times = []
closest_notes = []

for n in melody_ns.notes:
  original_pitches.append(n.pitch)
  original_times.append(n.start_time)
  closest_note = find_closest_note(n, accompaniment_ns.notes)
  closest_note.instrument = 0
  closest_note.program = 26
  closest_notes.append(closest_note)
  lyric_times.append(closest_note.start_time)
  lyric_pitches.append(closest_note.pitch)

# map to the closest notes in the original melody
for c in range(len(closest_notes)-1):
  if (closest_notes[c].end_time > closest_notes[c+1].start_time and
      closest_notes[c+1].start_time > closest_notes[c].start_time):
    closest_notes[c].end_time = closest_notes[c+1].start_time

# hold the last 5 notes to create an ending
pitches = []
for i in range(1,6):
  n = accompaniment_ns.notes[-i]
  if n.pitch not in pitches:
    n.end_time += 4

# Play and plot.
print("\nFinal song")
note_seq.play_sequence(
    accompaniment_ns,
    synth=note_seq.fluidsynth, sample_rate=SAMPLE_RATE, sf2_path=SF2_PATH)
note_seq.plot_sequence(accompaniment_ns)

Preparing the poem,'Plowmen', for melody generation.

A plow they say to plow the snow
They cannot mean to plant it no
Unless in bitterness to mock
At having cultivated rock

Original melody


Creating accompaniment.

Final song


# Show a Piano Roll
Hit the play button below to show a piano roll of the song with lyrics.

In [None]:
#@title
import bokeh
from bokeh.models import ColumnDataSource, Label, LabelSet, Range1d
from bokeh.plotting import figure, output_file, show

fig = note_seq.plot_sequence(accompaniment_ns, show_figure = False)
fig.width = 1600
fig.height = 900
fig.toolbar.logo = None
# fig.toolbar_location = None
source = ColumnDataSource(data=dict(pitch=lyric_pitches, time=lyric_times, words=lyric_syllables))
labels = LabelSet(x='time', y='pitch', text='words', level='overlay',
              x_offset=0, y_offset=10, source=source, text_font_size='12px', text_font_style="bold")
fig.add_layout(labels)
show(fig)