In [51]:
import warnings
warnings.filterwarnings('ignore')

import IPython.display as ipd
import numpy as np
import pandas as pd
import librosa
import librosa.display
import os
import matplotlib.pyplot as plt
import sys

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split 

sys.path.insert(1, "/Users/jrmylee/Documents/Development/projects/mir/projects/sonata/models")
from preprocess import Preprocess
import torch

In [52]:
datasets = {
    "isophonics-beetles" : {
        "mp3": "/Users/jrmylee/Documents/Development/projects/mir/datasets/isophonics/beetles_albums",
        "labels": "/Users/jrmylee/Documents/Development/projects/mir/datasets/isophonics/beetles_annotations"
    },
    "isophonics-king" : {
        "mp3": "/Users/jrmylee/Documents/Development/projects/mir/datasets/isophonics/carol_king_albums",
        "labels": "/Users/jrmylee/Documents/Development/projects/mir/datasets/isophonics/carol_king_annotations"
    }
}

sample_rate = 22050
hop_size= 2048
window_size= 10
song_hz= 22050
p = Preprocess(sample_rate, hop_size, window_size, song_hz, None)

king_albums = p.get_mp3(datasets['isophonics-king']['mp3'])
king_labels = p.get_labels(datasets['isophonics-king']['labels'])
beetles_albums = p.get_mp3(datasets['isophonics-beetles']['mp3'])
beetles_labels = p.get_labels(datasets['isophonics-beetles']['labels'])

data = [
    (king_albums, king_labels),
    (beetles_albums, beetles_labels)
]

In [53]:
import re

def filename_to_title(filename):
    name = re.sub(r'\([^)]*\)', '', filename)
    new_name = ""
    for character in name:
        if character == '.':
            break
        if character.isalnum() and not character.isnumeric():
            new_name += character
    return new_name

def path_to_album(path):
    return os.path.basename(os.path.normpath(path))

def generate_song_labels(label_album_path, labels_dict):
    song_label_dict = {}
    file_labels = labels_dict[label_album_path]
    for file in file_labels:
        if not file['filename'].endswith('.lab'):
            continue
        song_label_dict[file['title']] = []
        with open(os.path.join(label_album_path, file['filename'])) as fp:
            line = fp.readline()
            while line:
                tokens = line.split(' ')
                if len(tokens) == 1: tokens = line.split('\t')
                onset = int(float(tokens[0]))
                offset = int(float(tokens[1]))
                chord = tokens[2][:len(tokens[2]) - 1]
                song_label_dict[file['title']].append((onset, offset, chord))
                line = fp.readline()
    return song_label_dict

In [54]:
import chords
import random
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
c = chords.Chords()

def augment_pitch(data, sr, label):
    semitone = random.randint(1, 12)
    aug_chord = c.shift(semitone, data, sr,label)
    mfccs = librosa.feature.mfcc(y=aug_chord[0], sr=sr, n_mfcc=40)
    mfccs_processed = np.mean(mfccs.T,axis=0)
    
    return mfccs_processed, aug_chord[1]

def augment_stretched_noise(data, sr, label, noise=True, stretch=True):
    composition = []
    if noise:
        composition.append(AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5))
    if stretch:
        composition.append(TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5))
    augmenter = Compose(composition)
    
    aug_chord = augmenter(samples=data, sample_rate=sr)
    mfccs = librosa.feature.mfcc(y=aug_chord, sr=sr, n_mfcc=40)
    mfccs_processed = np.mean(mfccs.T,axis=0)
    
    return mfccs_processed

In [55]:
#                         intervals = album_label_dict[album_title][song_title]
#                         audio_slice, chords = get_chords_in_interval(data, intervals, (curr_sec, curr_sec + window_size))
#                         mfccs = librosa.feature.mfcc(y=audio_slice, sr=sample_rate, n_mfcc=40)
#                         mfccs_processed = np.mean(mfccs.T, axis=0)
#                         curr_sec += hop_interval
#                         features_list.append(mfccs_processed)
#                         chords_list.append(chords)
#                     for intervals in album_label_dict[album_title][song_title]:
#                         start, end, chord = intervals[0], intervals[1], intervals[2]
#                         if end > start:
#                             start_index = librosa.time_to_samples(start)
#                             end_index = librosa.time_to_samples(end)
#                             audio_slice = data[int(start_index):int(end_index)]
#                             if len(audio_slice) == 0:
#                                 continue
#                             mfccs = librosa.feature.mfcc(y=audio_slice, sr=sample_rate, n_mfcc=40)
#                             mfccs_processed = np.mean(mfccs.T,axis=0)
#                             features.append([mfccs_processed,  chord])

#                             if chord != "N":
#                                 pitched, pitched_label = augment_pitch(audio_slice, sample_rate, chord)
#                                 features.append([pitched, pitched_label])

#                             stretch_noised = augment_stretched_noise(audio_slice, sample_rate, chord)
#                             features.append([stretch_noised, chord])
                            
#                             noised = augment_stretched_noise(audio_slice, sample_rate, chord, True, False)
#                             features.append([noised, chord])
                            
#                             stretched = augment_stretched_noise(audio_slice, sample_rate, chord, False, True)
#                             features.append([stretched, chord])


In [56]:
from torch import save, load
save_dir = "/Users/jrmylee/Documents/Development/projects/mir/projects/sonata/checkpoints/"

song_hz = 22050
hop_size = 2048
window_size = 10

hop_interval = hop_size / song_hz #in seconds

get_num_samples = lambda x : x / hop_interval

def get_chords_in_interval(audio, chord_intervals, interval):
    start_index = librosa.time_to_samples(interval[0])
    end_index = librosa.time_to_samples(interval[1])
    audio_slice = audio[int(start_index):int(end_index)]
    ref_start, ref_end = interval[0], interval[1]
    
    chords = []
    curr_interval = chord_intervals[0]
    index = 0
    while curr_interval[0] < ref_end and index < len(chord_intervals):
        curr_interval = chord_intervals[index]
        if curr_interval[1] > ref_start:
            chords.append(curr_interval[2])
        index += 1
    return audio_slice, chords

def get_chord_at_time(chord_intervals, time):
    for interval in chord_intervals:
        start, end = interval[0], interval[1]
        if start <= time and end >= time:
            return interval[2]
    return "C"
def get_mfcc(audio, sample_rate):
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=144)
    mfccs_processed = np.mean(mfccs.T, axis=0)
    return mfccs_processed
def get_cqt(audio, sample_rate):
    return librosa.cqt(audio, sr=sample_rate, n_bins=108, bins_per_octave=24, hop_length=2048)
def get_start_end_indices(start_time, end_time):
    start_index = librosa.time_to_samples(start_time)
    end_index = librosa.time_to_samples(end_time)
    return start_index, end_index

def generate_features(albums_dict, album_label_dict):
    features_list = []
    chords_list = []
    counter = 0
    for album in albums_dict:
        album_title = path_to_album(album)
        for song in albums_dict[album]:
            counter += 1
            song_path = os.path.join(album, song["filename"])
            song_title = filename_to_title(song["filename"])
            song_save_path = save_dir + song_title + "_data.pth"
            print(str(counter) +"th song: " + song_title)
            if not os.path.exists(song_save_path):
                if album_title in album_label_dict:
                    if song_title in album_label_dict[album_title]:
                        print(song_title + " does not exist. Generating features.")
                        data, sr = librosa.load(song_path)
                        curr_start_time = 0
                        total_duration = librosa.get_duration(y=data, sr=sr)
                        num_samples = get_num_samples(total_duration)
                        intervals = album_label_dict[album_title][song_title]
                        song_features = []
                        song_chords = []
                        while curr_start_time + window_size < total_duration:
                            curr_sec = curr_start_time
                            curr_chords = [] # chords in the time frame
                            while curr_sec < curr_start_time + window_size:
                                chord = get_chord_at_time(intervals, curr_sec)
                                curr_sec += hop_interval
                                curr_chords.append(chord)
                            start_index, end_index = get_start_end_indices(curr_start_time, curr_start_time+window_size)
                            audio_slice = data[int(start_index):int(end_index)]
                            curr_features = get_cqt(audio_slice, sr)
                            song_features.append(curr_features)
                            song_chords.append(curr_chords)
                            curr_start_time += hop_interval
                        print("saving song: " + song_title)
                        save_obj = {
                            "song": song_title,
                            "album": album_title,
                            "features": song_features,
                            "chords": song_chords
                        }
                        save(save_obj, song_save_path)
                        features_list.extend(song_features)
                        chords_list.extend(song_chords)
            else:
                print(song_title + " exists.  Fetching cached.")
                cached = load(song_save_path)
                features_list.extend(cached['features'])
                chords_list.extend(cached['chords'])
                            
    return features_list, chords_list

def save_to_file(fname, arr):
    a = np.array(arr)
    np.savetxt(fname, a, fmt='%d')
def load_from_file(fname):
    try:
        b = np.loadtxt(fname, dtype=int)
        return b
    except:
        return None

In [57]:
features, chords = [], []
for d in data:
    album_label_dict = {}
    albums_dict = d[0]
    l_dict = d[1]
    for label_path in l_dict:
        song_label_dict = generate_song_labels(label_path, l_dict)
        album_title = path_to_album(label_path)
        album_label_dict[album_title] = song_label_dict
    f, c = generate_features(albums_dict, album_label_dict)
    features.extend(f)
    chords.extend(c)

1th song: SmackwaterJack
2th song: HomeAgain
HomeAgain exists.  Fetching cached.
3th song: ANaturalWoman
4th song: IFeelTheEarthMove
IFeelTheEarthMove exists.  Fetching cached.
5th song: Tapestry
6th song: Beautiful
Beautiful exists.  Fetching cached.
7th song: WayOverYonder
WayOverYonder exists.  Fetching cached.
8th song: YouveGotAFriend
YouveGotAFriend exists.  Fetching cached.
9th song: SmackwaterJack
10th song: ItsTooLate
ItsTooLate exists.  Fetching cached.
11th song: WillYouLoveMeTomorrow
12th song: WhereYouLead
13th song: SoFarAway
SoFarAway exists.  Fetching cached.
14th song: OutInTheCold
1th song: Flying
Flying exists.  Fetching cached.
2th song: IAmTheWalrus
IAmTheWalrus exists.  Fetching cached.
3th song: MagicalMysteryTour
MagicalMysteryTour exists.  Fetching cached.
4th song: TheFoolOnTheHill
TheFoolOnTheHill exists.  Fetching cached.
5th song: IAmTheWalrusRecordingSession
6th song: IAmtheWalrus
IAmtheWalrus exists.  Fetching cached.
7th song: IAmTheWalrus
IAmTheWalrus e

174th song: IWannaBeYourMan
IWannaBeYourMan exists.  Fetching cached.
175th song: HappyBirthdayDearSaturdayClub
176th song: ThisBoy
177th song: Money
Money exists.  Fetching cached.
178th song: Clarabella
179th song: DevilInHerHeart
DevilInHerHeart exists.  Fetching cached.
180th song: LendMeYourComb
181th song: SheLovesYou
182th song: TheHippyHippyShake
183th song: YouReallyGotAHoldOnMe
YouReallyGotAHoldOnMe exists.  Fetching cached.
184th song: AllMyLoving
AllMyLoving exists.  Fetching cached.
185th song: Money
Money exists.  Fetching cached.
186th song: TillThereWasYou
TillThereWasYou exists.  Fetching cached.
187th song: IWanttoHoldYourHand
188th song: YouReallyGotAHoldOnMe
YouReallyGotAHoldOnMe exists.  Fetching cached.
189th song: RollOverBeethoven
RollOverBeethoven exists.  Fetching cached.
190th song: HoldMeTight
HoldMeTight exists.  Fetching cached.
191th song: DontBotherMe
DontBotherMe exists.  Fetching cached.
192th song: Lucille
193th song: ThisBoy
194th song: ThisBoy
195th

In [58]:
dataset = list(zip(features, chords))

In [59]:
le = LabelEncoder()

In [60]:
import sys
sys.path.insert(1, '/Users/jrmylee/Documents/Development/projects/mir/repos/BTC-ISMIR19')
from btc_model import *
from torch import optim
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
config = {
    'feature_size' : 108,
    'timestep' : 108,
    'num_chords' : 133,
    'input_dropout' : 0.2,
    'layer_dropout' : 0.2,
    'attention_dropout' : 0.2,
    'relu_dropout' : 0.2,
    'num_layers' : 8,
    'num_heads' : 4,
    'hidden_size' : 128,
    'total_key_depth' : 128,
    'total_value_depth' : 128,
    'filter_size' : 128,
    'loss' : 'ce',
    'probs_out' : False
}
model = BTC_model(config=config).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.0, betas=(0.9, 0.98), eps=1e-9)

In [61]:
from torch import is_tensor, tensor
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder

class ChordDataset(Dataset):
    def __init__(self, dataset):
        le = LabelEncoder()
        d = [[None, None] for i in range(len(dataset))]
        for i in range(len(dataset)):
            d[i][0] = dataset[i][0]
            d[i][1] = le.fit_transform(dataset[i][1])
        self.dataset = d
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self, idx):
        if is_tensor(idx):
            idx = idx.tolist()
        sample = { 'audio': self.dataset[idx][0], 'chord': self.dataset[idx][1]}
        return sample

def _collate_fn(batch):
    batch_size = len(batch)

    features = []
    chords = []

    for i in range(batch_size):
        sample = batch[i]
        feature = sample['audio']
        chord = sample['chord']
        features.append(feature)
        chords.append(chord)
    features = torch.tensor(features, dtype=torch.float32)
    chords = torch.tensor(chords, dtype=torch.int64)

    return features, chords

class ChordDataloader(DataLoader):
    def __init__(self, *args, **kwargs):
        super(ChordDataloader, self).__init__(*args, **kwargs)
        self.collate_fn = _collate_fn

In [62]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_set, test_set = ChordDataset(train_dataset), ChordDataset(test_dataset) 
train_dataloader = ChordDataloader(train_set, batch_size=128, shuffle=True, num_workers=0)
test_dataloader = ChordDataloader(test_set, batch_size=128, shuffle=True, num_workers=0)

for epoch in range(1):
    model.train()
    
    running_loss = 0.0
    print("epoch: " + str(epoch))
#     Training
    print(" Training...")
    remaining = train_size
    for i_batch, data in enumerate(train_dataloader):
        if i_batch % 10 == 0:
            print(" Number of samples remaining: " + str(remaining))
        features, chords = data
        features.requires_grad = True
        
        optimizer.zero_grad()
        features = features.to(device)
        chords = chords.to(device)
        # Train
        prediction, total_loss, weights, second = model(features, chords)
        
        running_loss += total_loss.item()
        
        if i_batch % 100 == 99:
            print("  batch: " + str(i_batch))
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
        
        total_loss.backward()
        optimizer.step()
        
        remaining -= 128
# Validation
    print("Done training!  Validation:")

    with torch.no_grad():
        model.eval()
        correct = 0
        total = 0
        for i, data in enumerate(test_dataloader):
            val_features, val_chords = data
            val_features.requires_grad = True
            
            optimizer.zero_grad()
            val_features = features.to(device)
            val_chords = chords.to(device)
            # Train
            val_prediction, val_loss, weights, val_second = model(val_features, val_chords)
            total += val_prediction.size(0)
            correct += (val_prediction.view(val_chords.size(0), 108) == val_chords).sum().item()
        result = (100 * correct / total)
        print("Validation result: %" + str(result) )
    file_name = "model-epoch-" + str(epoch)
    model_obj = {"model": model.state_dict(), 'optimizer': optimizer.state_dict(), "epoch": epoch}
    torch.save(model_obj, file_name)

epoch: 0
 Training...
 Number of samples remaining: 161987
 Number of samples remaining: 160707
 Number of samples remaining: 159427
 Number of samples remaining: 158147
 Number of samples remaining: 156867
 Number of samples remaining: 155587
 Number of samples remaining: 154307
 Number of samples remaining: 153027
 Number of samples remaining: 151747
 Number of samples remaining: 150467
  batch: 99
[1,     2] loss: 2.320
 Number of samples remaining: 149187
 Number of samples remaining: 147907
 Number of samples remaining: 146627
 Number of samples remaining: 145347
 Number of samples remaining: 144067
 Number of samples remaining: 142787
 Number of samples remaining: 141507
 Number of samples remaining: 140227
 Number of samples remaining: 138947
 Number of samples remaining: 137667
  batch: 199
[1,     2] loss: 1.715
 Number of samples remaining: 136387
 Number of samples remaining: 135107
 Number of samples remaining: 133827
 Number of samples remaining: 132547
 Number of samples 

In [46]:
save_dir

'/Users/jrmylee/Documents/Development/projects/mir/projects/sonata/checkpoints/'

In [17]:
import torch
a = torch.tensor(chords)

In [28]:
a = list(zip(flatten_features, chords))

In [32]:
print(len(a[0][1]))

108
