<a href="https://colab.research.google.com/github/robgon-art/ai-tunes/blob/main/AI_Tunes_GPT_3_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **AI-Tunes: Creating New Songs with Artificial Intelligence**
### **How I fine-tuned OpenAI's GPT-3 to generate music with a global structure**
By Robert. A Gonsalves

You can read my article about this project on Medium.

The source code is released under the CC BY-SA license.

![CC BY-NC-SA](https://licensebuttons.net/l/by-sa/4.0/88x31.png)


##**Initialize the System**

In [None]:
!git clone https://github.com/robgon-art/music-geometry-eval
!git clone https://github.com/00sapo/OpenEWLD
!gsutil -q -m cp -r gs://magentadata/models/music_transformer/primers/* /content/
!gsutil -q -m cp gs://magentadata/soundfonts/Yamaha-C5-Salamander-JNv5.1.sf2 /content/
!apt-get update -qq && apt-get install -qq libfluidsynth1 build-essential libasound2-dev libjack-dev
!pip install magenta
!pip install pyfluidsynth
!pip install openai
import note_seq
SF2_PATH = '/content/Yamaha-C5-Salamander-JNv5.1.sf2'
SAMPLE_RATE = 16000

!wget https://wim.vree.org/svgParse/xml2abc.py-143.zip
!unzip xml2abc.py-143.zip
import sys
sys.path.append('/content/music-geometry-eval/music_geometry_eval')
import music_geometry_eval
import glob
import random
import music21
import music_geometry_eval
from collections.abc import Iterable
import numpy as np
random.seed(42)
song_files = glob.glob("OpenEWLD/dataset/*/*/*.mxl")
random.shuffle(song_files)
num_files = len(song_files)
print(song_files)
print("number of song files is", num_files)

transpose_dict = {"G major": 5, "A- major": 4, "A major": 3, "B- major": 2, "B major": 1, "C major": 0, "D- major": -1, "D major": -2, "E- major": -3, "E major": -4, "F major": -5, "F# major": -6}

##**Evaluate the Song Files**

In [None]:
keys = {}
metres = {}

cmm_arr = np.empty((0), np.float32)
lm_arr = np.empty((0), np.float32)
cent_arr = np.empty((0), np.float32)

for s in song_files:
  print("\n" + s)
  score = music21.converter.parse(s)

  key = None
  metre = None
  part = score.parts[0]
  for p in part:
    if isinstance(p, Iterable):
      for n in p:
        if type(n) == music21.key.Key:
          key = n.name
        if type(n) == music21.meter.TimeSignature:
          metre = n.ratioString

  if metre in metres.keys():
    metres[metre] += 1
  else:
    metres[metre] = 1

  if key in keys.keys():
    keys[key] += 1
  else:
    keys[key] = 1

  if not (metre == "4/4" or metre == "2/2"):
    continue

  if not "major" in key:
    continue

  print(key, metre)

  if key in keys.keys():
    keys[key] += 1
  else:
    keys[key] = 1

  if key in transpose_dict.keys():
    interval = transpose_dict[key]
    print("transposing from key", key, "to C major using interval", interval)
    score = score.transpose(interval)

  note_array = []

  for p in part:
    if isinstance(p, Iterable):
      for n in p:
        if type(n) == music21.note.Note:
          note_array.append([int(n.pitch.ps), int(n.quarterLength*4+0.5)])

  # print(note_array)

  CMM = music_geometry_eval.calculate_time_supported_conjunct_melodic_motion(note_array)
  LM = music_geometry_eval.calculate_time_supported_limited_macroharmony(note_array, span_size=32)
  CENT = music_geometry_eval.calculate_time_supported_centricity(note_array, span_size=32)

  print("CMM :", round(CMM, 4))
  print("LM  :", round(LM, 4))
  print("CENT:", round(CENT, 4))
  
  cmm_arr = np.append(cmm_arr, CMM)
  lm_arr = np.append(lm_arr, LM)
  cent_arr = np.append(cent_arr, CENT)

print(metres)
print(keys)

## **Show the Statistics**

In [None]:
CMM_mean = cmm_arr.mean()
CMM_std = cmm_arr.std()

LM_mean = lm_arr.mean()
LM_std = lm_arr.std()

CENT_mean = cent_arr.mean()
CENT_std = cent_arr.std()

print("Conjunct Melodic Motion (CMM) :", round(CMM_mean, 4), "±", round(CMM_std, 4))
print("Limited Macroharmony    (LM)  :", round(LM_mean, 4), "±", round(LM_std, 4))
print("Centricity              (CENT):", round(CENT_mean, 4), "±", round(CENT_std, 4))

## **Prepare the Training Data**

In [None]:
import subprocess
from collections.abc import Iterable

num_prompts = 0
prompt_file = open("songs.jsonl", "w")
prompts = []
original_songs = []
for s in song_files:
  print(s)
  score = music21.converter.parse(s)

  key = None
  metre = None
  part = score.parts[0]
  for p in part:
    if isinstance(p, Iterable):
      for n in p:
        if type(n) == music21.key.Key:
          key = n.name
        if type(n) == music21.meter.TimeSignature:
          metre = n.ratioString
  print(key, metre)

  if not (metre == "4/4" or metre == "2/2"):
    continue

  if not "major" in key:
    continue

  if key in transpose_dict.keys():
    interval = transpose_dict[key]
    print("transposing from key", key, "to C major using interval", interval)
    score = score.transpose(interval)

  score.write('xml', fp='song.xml')

  try:
    output_bytes = subprocess.check_output(["python", "/content/xml2abc_143/xml2abc.py", "song.xml", "-u", "-d", "4"], timeout=5)
    output = output_bytes.decode("utf-8").strip()
    with open("song.txt", "w") as abc_file:
      abc_file.write(output)
  except:
    print("Unexpected error:", sys.exc_info()[0])
    continue

  showed_title = False
  prompt_string = ""
  completion_string = ""
  is_header = True
  with open("song.txt") as song_file:
    lines = song_file.readlines()
    for line in lines:
      line = line.replace("$", "")
      line = line.replace("dc=", "")
      line = line.strip()

      if line.startswith("V:"):
        is_header = False

      if is_header:
        if line.startswith("X:") or line.startswith("C:"):
          prompt_string += line+"\n"

        if line.startswith("T:") and not showed_title:
            prompt_string += line+"\n"
            showed_title = True
      else:
        if not line.startswith("w:") and not line.startswith("V:"):

          # remove end of line comments
          index = line.rfind('%')
          if index > 0:
            line = line[:index].strip()

          # remove inline comments
          parts = line.split('"')
          newline = ""
          for i, p in enumerate(parts):
            if i%2 == 0:
              newline += p
            elif not p.startswith("^"):
              newline += '"' + p + '"'
          line = ' '.join(newline.split())

          completion_string += line+"\n"

      if line.startswith("V:"):
        is_header = False

    prompt_string = prompt_string.replace(":",": ")
    prompt_string = prompt_string.replace('"', "`")
    prompt_string = prompt_string.replace("\n"," $ ")

    completion_string = completion_string.replace('"', "`")
    completion_string = completion_string.strip().replace("\n"," $ ")

    prompt = '{"prompt": "' + prompt_string + '<song>", '
    prompt += '"completion": " ' + completion_string + ' $ <end>"}\n'

    if prompt not in prompts:
      original_songs.append(s)
      prompt_file.write(prompt)
      prompts.append(prompt)
      num_prompts += 1

prompt_file.close()
print("num prompts is", num_prompts)

# **Check the Training File**

In [None]:
!openai tools fine_tunes.prepare_data -f /content/songs.jsonl

# **Train GPT-3**

In [None]:
!export OPENAI_API_KEY="<Your OpenAI API Key>"; openai api fine_tunes.create -t file-2Bc4IBGYtOqFAv2CudaHtABu --model curie --n_epochs 5

# **Generate Five Songs**

In [None]:
import openai
import music21
import numpy as np
from collections.abc import Iterable
import numpy as np

CMM_mean = 2.2715
CMM_std = 0.4831

LM_mean = 2.0305
LM_std = 0.5386

CENT_mean = 0.3042
CENT_std = 0.0891

band_name = "I Lost My Voice"
song_name = "The Rare Pearls"
prompt = "X: 1 $ T: " + song_name + " $ C: " + band_name + " $ <song>"
print(prompt)
print()

openai.api_key = "<Your OpenAI API Key>"

songs_with_scores = []
score_arr = np.empty((0), np.float32)

for i in range(5):
  print("\n  Generating Song", i)
  response = openai.Completion.create(
      model="curie:ft-user-j0julqovorjakyuyt3kv3zci-2021-08-24-11-42-59",
      prompt=prompt,
      stop = " $ <end>",
      temperature=0.75,
      top_p=1.0,
      frequency_penalty=0.0,
      presence_penalty=0.0,
      max_tokens = 1000)

  print(response)
  print()

  formatted_prompt = "X: 1 $ T: " + song_name + " $ C: " + band_name + " $ L: 1/4 $ M: 4/4 $ K: C $ V: 1 treble"
  formatted_prompt = formatted_prompt.replace(" $ ", "\n")
  formatted_prompt = formatted_prompt.replace("<song>", "").strip()

  formatted_song = response["choices"][0]["text"].strip()
  formatted_song = formatted_song.replace('`', '"')
  formatted_song = formatted_song.replace(" $ ", "\n")
  new_song = formatted_prompt+ "\n" + formatted_song
  print(new_song)
  with open("new_song.abc", "w") as new_song_file:
    new_song_file.write(new_song)

  song = music21.converter.parse("new_song.abc")

  part = song.parts[0]
  chord_end = song.highestTime
  for pi in reversed(range(len(part))):
    p = part[pi]
    for ni in reversed(range(len(p))):
      n = p[ni]
      if type(n) == music21.harmony.ChordSymbol:
        chord_start = p.offset + n.offset
        n.duration.quarterLength = chord_end - chord_start
        n.volume = music21.volume.Volume(velocity=48)
        chord_end = chord_start
      elif type(n) == music21.note.Note:
        n.volume = music21.volume.Volume(velocity=64)
  file_name = "song" + str(i).zfill(2) + ".mid"
  song.write('midi', fp=file_name)

  part = song.parts[0]
  note_array = []

  for p in part:
    if isinstance(p, Iterable):
      for n in p:
        if type(n) == music21.note.Note:
          note_array.append([int(n.pitch.ps), int(n.quarterLength*4+0.5)])

  CMM = music_geometry_eval.calculate_time_supported_conjunct_melodic_motion(note_array)
  LM = music_geometry_eval.calculate_time_supported_limited_macroharmony(note_array, span_size=32)
  CENT = music_geometry_eval.calculate_time_supported_centricity(note_array, span_size=32)

  print("  CMM :", round(CMM, 4))
  print("  LM  :", round(LM, 4))
  print("  CENT:", round(CENT, 4))

  norm_cmm = (CMM - CMM_mean) / CMM_std
  norm_lm = (LM - LM_mean) / LM_std
  norm_cent = (CENT - CENT_mean) / CMM_std

  norm_score_squared = norm_cmm * norm_cmm + norm_lm * norm_lm + norm_cent * norm_cent
  print("  NDM:", round(norm_score_squared, 4))
  score_arr = np.append(score_arr, norm_score_squared)

  songs_with_scores.append([norm_score_squared, file_name])

songs_with_scores.sort()
for pair in songs_with_scores:
  print(round(pair[0], 4), pair[1])

# **Choose a Song and Play It**


In [None]:
song_number = 0

melody_ns = note_seq.midi_file_to_sequence_proto(songs_with_scores[song_number][1])
print(round(songs_with_scores[song_number][0], 4), songs_with_scores[song_number][1])

note_seq.play_sequence( 
  melody_ns,
  synth=note_seq.fluidsynth, sample_rate=SAMPLE_RATE, sf2_path=SF2_PATH)
note_seq.plot_sequence(melody_ns)