In [2]:
%load_ext autoreload

In [3]:
%autoreload 2
import os
import glob
import guitarpro
import librosa, soundfile
import numpy as np
import scipy
from utils import *
from operations import *

## Break multi-track gtp files into single-track gtp files

In [None]:
MULTI_TRACK_DIR = "/Volumes/MacOnly/UG_raw/all_time_top_by_hits"
SINGLE_TRACK_DIR = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/single_track_gtps"

i = 0
for file in glob.glob(os.path.join(MULTI_TRACK_DIR, "*.gp*")):
    i += 1
    print(f"processing file {i}: {file.split('/')[-1]}")
    get_single_tracks(
        file=file,
        output_dir=SINGLE_TRACK_DIR,
        unify_volume=True,
        force_clean=False,
        disable_repeats=False,
        disable_mixTableChange=False,
    )



In [None]:
MULTI_TRACK_DIR = "/Volumes/MacOnly/UG_raw/all_time_top_by_hits"
# clean_single_track_gtps contains gtp files that have:
# unified volume, clean guitar tone, no repeats, no mixTableChanges
CLEAN_SINGLE_TRACK_DIR = (
    "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/clean_single_track_gtps"
)

i = 0
for file in glob.glob(os.path.join(MULTI_TRACK_DIR, "*.gp*")):
    i += 1
    print(f"processing file {i}: {file.split('/')[-1]}")
    get_single_tracks(file=file, output_dir=CLEAN_SINGLE_TRACK_DIR)



## Segment single-track audio into poly and mono segments (GT)

In [4]:
# a test
file_name = "DragonForce - Through The Fire And Flames_Lead 2.gp5"
song = guitarpro.parse(os.path.join(CLEAN_SINGLE_TRACK_DIR, file_name))
poly, mono = poly_vs_mono(song)


In [5]:
poly


[[37.2, 38.400000000000006],
 [288.6, 289.2],
 [289.8, 290.4],
 [414.0, 418.79999999999995]]

In [6]:
mono


[[0.0, 37.2],
 [38.4, 288.59999999999997],
 [289.2, 289.8],
 [290.4, 414.0],
 [418.8, 433.2]]

In [4]:
CLEAN_SINGLE_TRACK_DIR = (
    "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/clean_single_track_gtps"
)
SINGLE_TRACK_AUDIO_DIR = (
    "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/clean_single_track_audio"
)
POLY_SEGMENTS_DIR = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/poly_audio_segments"
MONO_SEGMENTS_DIR = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/mono_audio_segments"

i = 0
for file in glob.glob(os.path.join(CLEAN_SINGLE_TRACK_DIR, "*.gp5")):
    i += 1
    track_title, _ = os.path.splitext(file.split("/")[-1])
    print(f"processing file {i}: {track_title}")

    song = guitarpro.parse(file)
    poly, mono = poly_vs_mono(song)

    y, sr = librosa.load(
        os.path.join(SINGLE_TRACK_AUDIO_DIR, track_title + ".wav"), sr=None, mono=True
    )

    for j, poly_segment in enumerate(poly):
        start_sec = poly_segment[0]
        end_sec = poly_segment[1]
        segment = y[int(start_sec * sr) : int(end_sec * sr)]
        # write segment to file, store it in the "poly" folder
        segment_file_name = "{}_{}_{}s.wav".format(
            track_title, j, int(start_sec)
        )
        soundfile.write(os.path.join(POLY_SEGMENTS_DIR, segment_file_name), segment, sr)

    for j, mono_segment in enumerate(mono):
        start_sec = mono_segment[0]
        end_sec = mono_segment[1]
        segment = y[int(start_sec * sr) : int(end_sec * sr)]
        # write segment to file, store it in the "mono" folder
        segment_file_name = "{}_{}_{}s.wav".format(track_title, j, int(start_sec))
        soundfile.write(os.path.join(MONO_SEGMENTS_DIR, segment_file_name), segment, sr)



processing file 1: ACDC - Back In Black (ver 4 by GuitarManiac09)_Angus Young
processing file 2: ACDC - Back In Black (ver 4 by GuitarManiac09)_Malcolm Young
processing file 3: ACDC - Back In Black_Guitar 1
processing file 4: ACDC - Back In Black_Guitar 2
processing file 5: ACDC - Back In Black_Guitar 3
processing file 6: ACDC - Highway To Hell (ver 3)_Guitar 2
processing file 7: ACDC - Highway To Hell (ver 3)_Guitar
processing file 8: Adele - Rolling In The Deep_Acoustic guitar (muted)
processing file 9: Adele - Rolling In The Deep_Electric guitar
processing file 10: Aerosmith - Dream On (ver 3)_Guitar 1 (dist)
processing file 11: Aerosmith - Dream On (ver 3)_Guitar 1
processing file 12: Aerosmith - Dream On (ver 3)_Guitar 2
processing file 13: Avenged Sevenfold - Afterlife_  Synyster Gates - Rhythm
processing file 14: Avenged Sevenfold - Afterlife_  Zacky Vengeance - Lead
processing file 15: Avenged Sevenfold - Afterlife_  Zacky Vengeance - Rhythm
processing file 16: Avenged Sevenfol

## Generate frames and labels from the poly and mono segments

In [None]:
# get a bunch of frames, compute mel-spectra, and put them into a npz file
# put corresponding labels in the last row
# shuffle them
# train/test split
# sklearn classifier


In [35]:
# frame-level features: mel-spectrum (compute via mel-spectrogram)
FRAME_SIZE = 2048
HOP_SIZE = 2048
MONO_LABEL = 0

file = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/mono_audio_segments/ACDC - Back In Black (ver 4 by GuitarManiac09)_Angus Young_1_195s.wav"
y, sr = librosa.load(file, sr=None, mono=True)
# y = np.concatenate((np.ones((2048,)), np.zeros((2048,))))
mel_spec = librosa.feature.melspectrogram(y, sr, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, center=False)

no_frames = mel_spec.shape[1]
columns_to_delete = []

for i in range(no_frames):
    if np.all(mel_spec[:, i] == 0):
        columns_to_delete.append(i)
mel_spec = np.delete(mel_spec, columns_to_delete, axis=1)
print(f"deleted {len(columns_to_delete)} empty frames")
print(mel_spec.shape)

# zero for mono
label = np.zeros((1, mel_spec.shape[1]))
features_with_label = np.append(mel_spec, label, axis=0)

for i in range(features_with_label.shape[1]):
    feature = features_with_label[:, i]
    print(feature[-1])


deleted 0 empty frames
(128, 6)
0.0
0.0
0.0
0.0
0.0
0.0


In [44]:
# frame-level features: mel-spectrum (compute via mel-spectrogram)
FRAME_SIZE = 2048
HOP_SIZE = 2048
MONO_LABEL = 0

# POLY_SEGMENTS_DIR = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/poly_audio_segments"
MONO_SEGMENTS_DIR = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/mono_audio_segments"
MONO_SEGMENTS_DIR = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/test"

i = 0
features_with_label = []

for file in glob.glob(os.path.join(MONO_SEGMENTS_DIR, "*.wav")):
    i += 1
    title, _ = os.path.splitext(file.split("/")[-1])
    print(f"processing file {i}: {title}")

    y, sr = librosa.load(file, sr=None, mono=True)
    mel_spec = librosa.feature.melspectrogram(y, sr, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, center=False)

    no_frames = mel_spec.shape[1]
    columns_to_delete = []
    
    for j in range(no_frames):
        if np.all(mel_spec[:, j] == 0):
            columns_to_delete.append(j)
    mel_spec = np.delete(mel_spec, columns_to_delete, axis=1)
    print(f"deleted {len(columns_to_delete)} empty frames")
    # zero for mono
    label = np.zeros((1, mel_spec.shape[1]))
    feature_with_label = np.append(mel_spec, label, axis=0)
    features_with_label.append(feature_with_label)

mono_data = np.concatenate(features_with_label, axis=1)
mono_data.shape

processing file 1: DragonForce - Through The Fire And Flames_Acoustic Guitar_43_0s
deleted 8903 empty frames
processing file 2: Metallica - Seek And Destroy (ver 2)_Gtr. III_192_0s
deleted 8604 empty frames
processing file 3: Metallica - The Day That Never Comes_Harmony_194_0s
deleted 10625 empty frames
processing file 4: Queen - Bohemian Rhapsody_Gtr 1_247_0s
deleted 9705 empty frames


(129, 2881)

In [43]:
np.savez_compressed("/Volumes/MacOnly/UG_proc/all_time_top_by_hits/test.npz", mono_data)
# problem: normalization eliminates zero segments by substracting the mean
# maybe do frame level normalization? normalize only if it's not entirely zeros
# overlapping is probably not necessary?
# larger frame size? 
# run the whole thing on colab, may consume a lot of memory

In [47]:
from sklearn.svm import LinearSVC
model = LinearSVC()
features = features_with_label[:, :-1]
labels = features_with_label[:, -1]

labels[3] = 1
model.fit(X=features, y=labels)

LinearSVC()

## Break the single-track gtp files into 4-bar phrases (necessary??)

In [None]:
SINGLE_TRACK_DIR = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/single_track_gtps"
PHRASE_DIR = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/phrase_gtps"
for file in glob.glob(os.path.join(SINGLE_TRACK_DIR, "*.gp*")):
    print(f"processing {file.split('/')[-1]}")
    get_phrases(file, PHRASE_DIR)



In [12]:
PHRASE_DIR = "/Volumes/MacOnly/UG_proc/all_time_top_by_hits/phrase_gtps"
file_name = "ACDC - Highway To Hell (ver 3)_Guitar 2_6.gp5"
song = guitarpro.parse(os.path.join(PHRASE_DIR, file_name))
poly, mono = poly_vs_mono(song)


BELOW IS SUBJECT TO CHANGE

In [None]:
JSON_DIR = "/content/drive/MyDrive/UG/proc_phrases_anno"
for file in glob.glob(os.path.join(PHRASE_DIR, "*.gp*")):
    print(f"processing {file.split('/')[-1]}")
    # get_anno(file, JSON_DIR)



In [None]:
# "Led Zeppelin - Stairway To Heaven_4_34" seems to be empty because of tied notes from the previous phrase


In [29]:
file = "/content/drive/MyDrive/tempochange.gp5"
song = guitarpro.parse(file)
song.tracks[0].measures[1].voices[0].beats[2].effect.mixTableChange.tempo


MixTableItem(value=200, duration=0, allTracks=False)

In [33]:
song.tracks[0].measures[1].voices[0].beats[0].start


4800

In [41]:
song.tracks[0].measures[1].voices[0].beats[1].duration.time


960

In [53]:
song.tracks[0].measures[1].voices[0].beats


[<guitarpro.models.Beat at 0x7faab7e9dcd0>,
 <guitarpro.models.Beat at 0x7faab4308590>,
 <guitarpro.models.Beat at 0x7faab3d43d90>,
 <guitarpro.models.Beat at 0x7faab3d6c410>]

In [37]:
song.tracks[0].measures[1].voices[0].beats[3].start


7680

In [58]:
file = "/content/drive/MyDrive/repeat.gp5"
song = guitarpro.parse(file)
song.tracks[0].measures[0].voices[0].beats[-1].duration.time


1920