<a href="https://colab.research.google.com/github/jwang44/GuitarPro-Stuff/blob/main/anno_gen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pyguitarpro

Collecting pyguitarpro
  Downloading PyGuitarPro-0.8-py2.py3-none-any.whl (45 kB)
[?25l[K     |███████▎                        | 10 kB 22.9 MB/s eta 0:00:01[K     |██████████████▌                 | 20 kB 9.9 MB/s eta 0:00:01[K     |█████████████████████▉          | 30 kB 8.0 MB/s eta 0:00:01[K     |█████████████████████████████   | 40 kB 7.4 MB/s eta 0:00:01[K     |████████████████████████████████| 45 kB 1.4 MB/s 
Installing collected packages: pyguitarpro
Successfully installed pyguitarpro-0.8


In [2]:
import os
import glob
import guitarpro
import json

In [3]:
def get_metadata(song):
  metadata = {
      "title": song.title, 
      # "artist": song.artist, 
      # "album": song.album, 
      # "tab": song.tab, 
      # "tempo_name": song.tempoName,
      "tempo": song.tempo,
      # "key": song.key.name,
      # "track_count": len(song.tracks)
    }
  return metadata
  
def get_guitar_tracks(song):
  """
  24 Acoustic Guitar (nylon)
  25 Acoustic Guitar (steel)
  26 Electric Guitar (jazz)
  27 Electric Guitar (clean)
  28 Electric Guitar (muted)
  29 Overdriven Guitar
  30 Distortion Guitar
  """
  GUITAR_MIDI_PROGRAMS = [24, 25, 26, 27, 28, 29, 30]
  # get all non-percussive tracks (this is still necessary because some drum tracks use a guitar program number)
  m_tracks = [track for track in song.tracks if not track.isPercussionTrack]
  guitar_tracks = [track for track in m_tracks if track.channel.instrument in GUITAR_MIDI_PROGRAMS and len(track.strings)==6]
  return guitar_tracks

def get_track_info(track):
  track_info = {
      "track_no": track.number,
      # "string_count": len(track.strings), # total no of strings
      # "fret_count": track.fretCount, # total no of frets
      # "is_percussion": track.isPercussionTrack,
      # "is_12_string": track.is12StringedGuitarTrack,
      "name": track.name,
      # "measure_count": len(track.measures),
      "midi_channel_instrument": track.channel.instrument, # 30 for distortion guitar
      # "midi_channel_effect": track.channel.effectChannel, # ignore for now
  }
  return track_info

def get_string_info(string):
  string_info = {
      "string_number": string.number,
      "string_value": string.value # EADGBE=40, 45, 50, 55, 59, 64
  }
  return string_info

def get_measure_info(measure):
  voices = [voice for voice in measure.voices if not voice.isEmpty]
  measure_info = {
      "voice_count": len(voices),
      "is_empty": measure.isEmpty,
      "measure_number": measure.number,
      # "key_sig": measure.keySignature,
      # "time_sig": measure.timeSignature,
      "start": measure.start,
      "end": measure.end,
      "length": measure.length
  }
  return measure_info

def get_voice_info(voice):
  voice_info = {
      "beat_count": len(voice.beats),
      "is_empty": voice.isEmpty # empty voice means the voice has 0 beats
  }
  return voice_info

def get_beat_info(beat):
  beat_info = {
      "note_count": len(beat.notes),
      "duration_time": beat.duration.time,
      "start": beat.start,
      "vibrato": beat.effect.vibrato, # bool
      "has_vibrato": beat.hasVibrato, # true if any note in this beat has vibrato effect
      "has_harmonic": beat.hasHarmonic, # true if any note in this beat has harmonic effect
      "offset": beat.startInMeasure # beat.start - beat.voice.measure.start
  }
  return beat_info

def get_measure_notes(measure):
  notes = []
  for voice in measure.voices:
    if not voice.isEmpty:
      for beat in voice.beats:
        notes.extend(beat.notes)
      return notes

def get_note_info(note):
  note_info = {
      "string": note.string,
      "fret": note.value, # fret number
      # "dur_percent": note.durationPercent, 
      "pitch": note.realValue, # self.value + string.value = MIDI note number
      "type": note.type.name, # NoteType class, rest=0, normal=1, tie=2, dead=3
      "effects": get_effect_info(note.effect)
  }
  return note_info

def get_effect_info(effect):
  effect_info = {
      "ghost_note": effect.ghostNote, # bool
      "hammer": effect.hammer, # bool
      "mute": effect.palmMute, # bool
      "vibrato": effect.vibrato # bool
  }

  # if effect.isHarmonic:
  #   effect_info["harmonic_type"] = effect.harmonic.type.name # natural=1, artificial=2, tapped=3, pinch=4, semi=5
  effect_info["harmonic"] = True if effect.isHarmonic else False

  effect_info["bend_type"] = effect.bend.type.name if effect.isBend else None
  effect_info["bend_value"] = effect.bend.value if effect.isBend else None

  effect_info["slide_types"] = [slide.name for slide in effect.slides] if effect.slides else None

  if effect.isGrace:
    effect_info["grace_dur"] = effect.grace.durationTime # grace note effect duration
    effect_info["grace_fret"] = effect.grace.fret
    effect_info["grace_transition"] = effect.grace.transition.name # GraceEffectTransition class, none=0, slide=1, bend=2, hammer=3

  if effect.isTrill: # TrillEffect, hammerOnPullOff
    effect_info["trill_fret"] = effect.trill.fret
    effect_info["trill_dur"] = effect.trill.duration.time

  return effect_info

def get_note_time(note, bpm):
  start = note.beat.start
  start_sec = round(((start-960)/960)/(bpm/60), 4)
  dur = note.beat.duration.time
  dur_sec = round((dur/960)/(bpm/60), 4)
  time = {
      "start": start_sec,
      "dur": dur_sec
  }
  return time

## Get single guitar tracks from the multi-track gtp files and generate annotations for each single-track file

In [4]:
from guitarpro.models import GPException

def get_single_tracks(file, output_dir):
  song = guitarpro.parse(file)
  # tempo = song.tempo
  tracks = get_guitar_tracks(song)
  for i, track in enumerate(tracks):
    single_track_song = song # this preserves the metadata in orginal song
    single_track_song.tracks = [track]
    file_name = "{}_{}.gp5".format(file.split("/")[-1].split(".")[0], i)
    try:
      guitarpro.write(single_track_song, os.path.join(output_dir, file_name))
    except GPException:
      print(f"GPException, removing the corrupt file {file_name}")
      os.remove(os.path.join(output_dir, file_name))

def get_anno(file, output_dir):
  # for pre-processed one-track files only
  try:
    single_track_song = guitarpro.parse(file)
  except GPException:
    print(f"GPEXCEPTION in parsing {file.split('/')[-1]}")
    return

  data = dict()
  metadata = get_metadata(single_track_song)
  data["meta"] = metadata

  gt_tracks = get_guitar_tracks(single_track_song)
  assert len(single_track_song.tracks) == 1
  track = single_track_song.tracks[0]
  track_info = get_track_info(track)

  measures = []
  for measure in track.measures:
    measure_info = get_measure_info(measure)
    notes = []
    for note in get_measure_notes(measure):
      note_info = get_note_info(note)
      note_time = get_note_time(note, metadata["tempo"])
      note_info.update(note_time)
      notes.append(note_info)
      measure_info["notes"] = notes
    measures.append(measure_info)

  track_info["measures"] = measures
  data["track"] = track_info

  file_name = "{}.json".format(file.split("/")[-1].split(".")[0])

  with open(os.path.join(output_dir, file_name), "w") as file:
    json.dump(data, file, indent=2)

In [None]:
# get single track gtps
MULTI_TRACK_DIR = "/content/drive/MyDrive/UG/raw_all_time_top_hits"
SINGLE_TRACK_DIR = "/content/drive/MyDrive/UG/proc_single_track_gtp"

i = 0
for file in glob.glob(os.path.join(MULTI_TRACK_DIR, "*.gp*")):
  i += 1
  print(f"processing file {i}: {file.split('/')[-1]}")
  get_single_tracks(file=file, output_dir=SINGLE_TRACK_DIR)

processing file 1: Metallica - Nothing Else Matters (ver 5).gp5
processing file 2: Guns N' Roses - Sweet Child O Mine (ver 2).gp4
processing file 3: Led Zeppelin - Stairway To Heaven.gp5
processing file 4: Metallica - Fade To Black (ver 4).gp5
processing file 5: JerryC - Canon Rock.gp4
processing file 6: Metallica - One (ver 2).gp5
processing file 7: Metallica - Master Of Puppets (ver 4 by DUDERMAN).gp5
processing file 8: Metallica - Enter Sandman.gp5
processing file 9: ACDC - Back In Black (ver 4 by GuitarManiac09).gp5
processing file 10: Deep Purple - Smoke On The Water.gp4
processing file 11: Metallica - The Day That Never Comes.gp4
processing file 12: Black Sabbath - Paranoid.gp5
processing file 13: Metallica - Nothing Else Matters (ver 6).gp3
processing file 14: Pink Floyd - Comfortably Numb.gp3
processing file 15: Nirvana - Smells Like Teen Spirit (ver 2).gp5
processing file 16: Ozzy Osbourne - Crazy Train.gp5
processing file 17: ACDC - Highway To Hell (ver 3).gp5
processing file

9 is an unknown ChordExtension in track 1, measure 38, voice 1, beat 1
9 is an unknown ChordExtension in track 1, measure 39, voice 1, beat 7


processing file 93: Avenged Sevenfold - Beast And The Harlot.gp5
processing file 94: Extreme - More Than Words (ver 3).gp5
processing file 95: Green Day - American Idiot (ver 2).gp5
processing file 96: Bullet For My Valentine - Waking The Demon.gp5
processing file 97: Led Zeppelin - Black Dog.gp5
processing file 98: Metallica - Creeping Death.gp5
processing file 99: Queen - Bohemian Rhapsody.gp5
processing file 100: Scorpions - Still Loving You (ver 3).gp5


In [None]:
# get single track annotations
# this is likely not useful in our experiments
# but just as a test, all single tracks of Don't Cry and the first track of Hey Joe (jimi hendrix) is invalid.
# guitar pro fails to parse them
from guitarpro.models import GPException
JSON_DIR = "/content/drive/MyDrive/UG/proc_single_track_anno"

for file in glob.glob(os.path.join(SINGLE_TRACK_DIR, "*.gp*")):
  print(f"processing {file.split('/')[-1]}")
  get_anno(file=file, output_dir=JSON_DIR)

processing Metallica - Nothing Else Matters (ver 5)_0.gp5
processing Metallica - Nothing Else Matters (ver 5)_1.gp5
processing Metallica - Nothing Else Matters (ver 5)_2.gp5
processing Metallica - Nothing Else Matters (ver 5)_3.gp5
processing Metallica - Nothing Else Matters (ver 5)_4.gp5
processing Guns N' Roses - Sweet Child O Mine (ver 2)_0.gp5
processing Guns N' Roses - Sweet Child O Mine (ver 2)_1.gp5
processing Guns N' Roses - Sweet Child O Mine (ver 2)_2.gp5
processing Led Zeppelin - Stairway To Heaven_0.gp5
processing Led Zeppelin - Stairway To Heaven_1.gp5
processing Led Zeppelin - Stairway To Heaven_2.gp5
processing Led Zeppelin - Stairway To Heaven_3.gp5
processing Led Zeppelin - Stairway To Heaven_4.gp5
processing Metallica - Fade To Black (ver 4)_0.gp5
processing Metallica - Fade To Black (ver 4)_1.gp5
processing Metallica - Fade To Black (ver 4)_2.gp5
processing Metallica - Fade To Black (ver 4)_3.gp5
processing Metallica - Fade To Black (ver 4)_4.gp5
processing Metallica

## Break the single-track gtp into 4-bar phrases. Then generate annotations for the phrases

In [None]:
def get_phrases(single_track_file, output_dir):
  # get 4 bar single-track phrases
  try:
    song = guitarpro.parse(single_track_file)
  except GPException:
    print(f"GPEXCEPTION in parsing {single_track_file.split('/')[-1]}")
    return
  tempo = song.tempo
  assert len(song.tracks) == 1
  track = song.tracks[0]
  measures = track.measures
  n = 4
  bar_phrases = [measures[i:i+n] for i in range(0, len(measures), n)]
  for i, phrase in enumerate(bar_phrases):
    if not all(len(get_measure_notes(measure))==0 for measure in phrase):
      phrase_track = track
      phrase_track.measures = phrase
      # print(len(phrase_track.measures)
      phrase_song = song
      phrase_song.tracks = [phrase_track]
      # phrase_song = guitarpro.Song(tracks=[phrase_track], tempo=song.tempo)
      file_name = "{}_{}.gp5".format(single_track_file.split("/")[-1].split(".")[0], i)

      guitarpro.write(phrase_song, os.path.join(output_dir, file_name))
    else:
      print(f"empty measure found in {single_track_file.split('/')[-1]} - {i}")
      # raise Exception(f"empty measure found in {single_track_file.split('/')[-1]} - {i}")

In [None]:
from guitarpro.models import GPException
SINGLE_TRACK_DIR = "/content/drive/MyDrive/UG/proc_single_track_gtp"
PHRASE_DIR = "/content/drive/MyDrive/UG/proc_phrases_gtp"
for file in glob.glob(os.path.join(SINGLE_TRACK_DIR, "*.gp*")):
  print(f"processing {file.split('/')[-1]}")
  get_phrases(file, PHRASE_DIR)

In [None]:
JSON_DIR = "/content/drive/MyDrive/UG/proc_phrases_anno"
for file in glob.glob(os.path.join(PHRASE_DIR, "*.gp*")):
  print(f"processing {file.split('/')[-1]}")
  get_anno(file, JSON_DIR)

In [None]:
"Led Zeppelin - Stairway To Heaven_4_34" seems to be empty because of tied notes from the previous phrase

In [29]:
file = "/content/drive/MyDrive/tempochange.gp5"
song = guitarpro.parse(file)
song.tracks[0].measures[1].voices[0].beats[2].effect.mixTableChange.tempo

MixTableItem(value=200, duration=0, allTracks=False)

In [33]:
song.tracks[0].measures[1].voices[0].beats[0].start

4800

In [41]:
song.tracks[0].measures[1].voices[0].beats[1].duration.time

960

In [53]:
song.tracks[0].measures[1].voices[0].beats

[<guitarpro.models.Beat at 0x7faab7e9dcd0>,
 <guitarpro.models.Beat at 0x7faab4308590>,
 <guitarpro.models.Beat at 0x7faab3d43d90>,
 <guitarpro.models.Beat at 0x7faab3d6c410>]

In [37]:
song.tracks[0].measures[1].voices[0].beats[3].start

7680

In [58]:
file = "/content/drive/MyDrive/repeat.gp5"
song = guitarpro.parse(file)
song.tracks[0].measures[0].voices[0].beats[-1].duration.time

1920