# Packages and input data

In [1]:
import os
import numpy as np
import pandas as pd
import librosa
import soundfile
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import ccpreprocess, ccdownload, ccchord, ccplay

np.random.seed(0)
torch.manual_seed(0)

dir_path   = os.path.abspath('')
excel_train = pd.read_excel(os.path.join(dir_path, 'ccClub music data (popular music).xlsx'), dtype={'Capo':int, '3 or 4':int})
excel_test  = pd.read_excel(os.path.join(dir_path, 'ccClub test data.xlsx'), dtype={'3 or 4':int})

excel_train.head(5)

Unnamed: 0,Title,Link,Start_Second,Tempo,Capo,Tune,3 or 4,Chords
0,心願便利貼,https://www.youtube.com/watch?v=ySb4DUmqrMo,0.0,96.0,0,C,4,"C|C,F/C|C|G7|F|C/E|Dm7,G7|C| C,G/B|Am,C/G|F,..."
1,飛鳥和蟬,https://www.youtube.com/watch?v=V0XUd8f2pz8,3.2,74.0,3,G,4,G|D/F#|Em7|Cmaj7|G|D/F#|Em7|Cmaj7|G|Bm7|Em7|Bm...
2,晴天,https://www.youtube.com/watch?v=v10bFxUDZsA,1.1,68.5,0,G,4,"Em7,Cadd9|G,D/F#|Em7,Cadd9|G,D/F#|Em7,Cadd9|G,..."
3,不是因為天氣晴朗才愛你,https://www.youtube.com/watch?v=9_068Ekk_fs,3.5,68.0,1,G,4,"G|D/F#|Cmaj7|D11|G|D/F#|Cmaj7|D11|G,D/F#|Em7|C..."
4,披星戴月的想你,https://www.youtube.com/watch?v=LX-qN5V1eiE,2.9,100.0,4,C,4,C|C|Em|Em|Am|Am|F|F|C|C|Em|Em|Am|Am|F|F|C|C|Em...


# Download and trim the original music

In [2]:
trim_path = os.path.join(dir_path, 'music_trim')
if os.path.isdir(trim_path) == False:
    os.mkdir(trim_path)

def trim_the_original_music(excel_data, mode):
    for i in range(len(excel_data)):
        ccdownload.yt_wav(link=excel_data['Link'][i], title=excel_data['Title'][i])
        original_music = os.path.join(ccdownload.music_path, f"{excel_data['Title'][i]}.wav")
        trim_music  = os.path.join(trim_path, f"{excel_data['Title'][i]}.wav")
        print(f"Now trimming   {excel_data['Title'][i]} ... ", end='')
        if os.path.isfile(trim_music) == False:
            y, sr = librosa.load(original_music)
            length_per_slice = int(60 / excel_data['Tempo'][i] * sr / excel_data['3 or 4'][i]) # the length of the array per slice
            trim_start    = int(sr * excel_data['Start_Second'][i])
            if  mode == 'train':
                trim_len = length_per_slice * excel_data['3 or 4'][i] * 4 * len(excel_data['Chords'][i].strip('|').split('|'))
            elif mode == 'test':
                trim_len = length_per_slice * excel_data['3 or 4'][i] * 4 * ((len(y)-trim_start)//(length_per_slice * excel_data['3 or 4'][i] * 4))
            else:
                assert False, f"Check your mode -> '{mode}'' should be changed 'train' or 'test'"
            y = y[trim_start : trim_start + trim_len]
            soundfile.write(trim_music, y, sr)
            print(f"Trimming finished!")
        else:
            print(f"File already cut!")

trim_the_original_music(excel_train, mode='train')
trim_the_original_music(excel_test , mode='test')

Now processing 心願便利貼 ... File already exists!
Now trimming   心願便利貼 ... File already cut!
Now processing 飛鳥和蟬 ... File already exists!
Now trimming   飛鳥和蟬 ... File already cut!
Now processing 晴天 ... File already exists!
Now trimming   晴天 ... File already cut!
Now processing 不是因為天氣晴朗才愛你 ... File already exists!
Now trimming   不是因為天氣晴朗才愛你 ... File already cut!
Now processing 披星戴月的想你 ... File already exists!
Now trimming   披星戴月的想你 ... File already cut!
Now processing 想見你想見你想見你 ... File already exists!
Now trimming   想見你想見你想見你 ... File already cut!
Now processing 熱愛105度C的你 ... File already exists!
Now trimming   熱愛105度C的你 ... File already cut!
Now processing 夜空中最亮的星 ... File already exists!
Now trimming   夜空中最亮的星 ... File already cut!
Now processing 在這座城市遺失了你 ... File already exists!
Now trimming   在這座城市遺失了你 ... File already cut!
Now processing 稻香 ... File already exists!
Now trimming   稻香 ... File already cut!
Now processing 幾分之幾 ... File already exists!
Now trimming   幾分之幾 ... File alread

# Transform the input data

In [3]:
class MusicData():
    def __init__(self, music_data, mode, use_slice3or4=False, num_section=None):
        self.music_data = music_data

        if use_slice3or4:
            self.slice3or4 = music_data['3 or 4']
        else:
            self.slice3or4 = 1

        if mode == 'train':
            len_section = len(music_data['Chords'].strip('|').split('|'))
        elif mode == 'test':
            len_section = num_section
        music = ccpreprocess.Music(music_data['Title'], music_data['Tempo'], sections=len_section,
                                    beats_per_section=4, slices_per_beat=self.slice3or4,
                                    f_min=-36, f_max=23, A4=440)
        amplitude_matrix = music.extract_amplitude_matrix()
        amplitude_matrix = amplitude_matrix.reshape(-1, 5, 12)
        amplitude_matrix = np.sum(amplitude_matrix, axis=1)

        self.x = amplitude_matrix
        
        if mode == 'train':
            padding_chord_list = ccchord.chord_padding(music_data['Chords'])
            for c in range(len(padding_chord_list)):
                padding_chord_list[c] = ccchord.chord_simplify(padding_chord_list[c])
                padding_chord_list[c] = ccchord.chord_numeralize(padding_chord_list[c], music_data['Capo'], major_pitch_weight=1)
            numerized_chords = np.array(padding_chord_list)
            self.y = np.repeat(numerized_chords, self.slice3or4, axis=0)
        elif mode == 'test':
            self.y = None
        else:
            assert False, f"Check your mode -> '{mode}'' should be changed 'train' or 'test'"

In [4]:
train_music_data = [MusicData(excel_train.iloc[i], mode='train') for i in range(len(excel_train))]
print(f"minimum of data length = {min(train_music_data, key=lambda m:m.x.shape[0]).x.shape[0]}")

minimum of data length = 80


# Dataset and Neural Network

In [5]:
class MusicDataset(Dataset):
    def __init__(self, music_data, mode, pad_section):
        self.mode = mode

        x_data = []
        y_data = []
        
        for data in music_data:
            for i in range(data.x.shape[0]):
                if i-4*pad_section < 0:
                    x = np.r_[np.zeros((4*pad_section-i, data.x.shape[1])), data.x[:i+4*pad_section]]
                elif i+4*pad_section > data.x.shape[0]:
                    x = np.r_[data.x[i-4*pad_section:], np.zeros((i+4*pad_section-data.x.shape[0], data.x.shape[1]))]
                else:
                    x = data.x[i-4*pad_section:i+4*pad_section]
                x_data.append(torch.tensor(x, dtype=torch.float))
                if mode == 'train':
                    y = data.y[i]
                    y_data.append(torch.tensor(y, dtype=torch.float))

        self.x = x_data
        if self.mode == 'train':
            self.y = y_data

    def __getitem__(self, index):
        if self.mode == 'train':
            return self.x[index], self.y[index]
        elif self.mode == 'test':
            return self.x[index]

    def __len__(self):
        return len(self.x)

In [6]:
class LinearNN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),

            nn.BatchNorm1d(input_dim),
            nn.Linear(input_dim, 1024),
            nn.ReLU(),

            nn.BatchNorm1d(1024),
            nn.Linear(1024, 256),
            nn.ReLU(),

            nn.BatchNorm1d(256),
            nn.Linear(256, 12),

            nn.Softmax(dim=1),
        )

    def forward(self, x):
        x = self.net(x)
        return x

def cos_loss(y_pred, y_true):
    cos = nn.CosineSimilarity(dim=1, eps=1e-08)
    return -cos(y_pred, y_true)

# Training

In [7]:
def train(model, config, loss_func):
    epochs = config['epochs']
    optimizer = getattr(torch.optim, config['optimizer'])(model.parameters(), **config['optim_hypara'])

    model = model.to(device)
    best_valid_loss, early_stop_counter = 1e9, 0
    for epoch in range(epochs):
        train_loss = 0
        model.train()
        for x_train, y_true in train_dataloader:
            optimizer.zero_grad()
            x_train, y_true = x_train.to(device), y_true.to(device)
            y_pred = model(x_train)
            loss = loss_func(y_pred, y_true).mean()
            loss.backward()
            optimizer.step()
            train_loss += loss.detach().cpu().item() * len(x_train)
        train_loss /= len(train_dataset)

        valid_loss = 0
        model.eval()
        for x_valid, y_true in valid_dataloader:
            x_valid, y_true = x_valid.to(device), y_true.to(device)
            with torch.no_grad():
                y_pred = model(x_valid)
                loss = loss_func(y_pred, y_true).mean()
                valid_loss += loss.detach().cpu().item() * len(x_valid)
        valid_loss /= len(valid_dataset)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), config['model_path'])
            early_stop_counter = 0
        else:
            early_stop_counter += 1
        if early_stop_counter >= config['early_stop'] and config['early_stop'] != -1:
            break

        print(f"Epoch {epoch:4d} : train_loss = {train_loss:.3f} | valid_loss = {valid_loss:.3f}")

In [8]:
percentage_of_train = 0.8
pad_section = 4
train_dataset = MusicDataset(train_music_data[:int(percentage_of_train*len(train_music_data))], mode='train', pad_section=pad_section)
valid_dataset = MusicDataset(train_music_data[int(percentage_of_train*len(train_music_data)):], mode='train', pad_section=pad_section)
assert train_dataset[0][0].shape[0] > 12, f"The dimension of x.shape[0] should be greater than 12 -> {train_dataset[0][0].shape[0]}"

train_dataloader    = DataLoader(train_dataset, batch_size=128, shuffle=True)
valid_dataloader    = DataLoader(valid_dataset, batch_size=128, shuffle=True)

print(f"The shape of x is {train_dataset[0][0].shape}  , y is {train_dataset[0][1].shape[0]} | Total data is {len(train_dataset)}")

The shape of x is torch.Size([32, 12])  , y is 12 | Total data is 5396


In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if os.path.isdir(os.path.join(dir_path, 'model')) == False:
    os.mkdir(os.path.join(dir_path, 'model'))

linear_model  = LinearNN(np.prod(train_dataset[0][0].shape))
linear_config = {
    'model_path' : os.path.join(dir_path, 'model', 'linear.pth'),
    'optimizer' : 'Adam',
    'optim_hypara' : {
        'lr' : 1e-4,
    },
    'epochs' : 500,
    'early_stop' : 10,
}

train(linear_model, linear_config, cos_loss)

Epoch    0 : train_loss = -0.610 | valid_loss = -0.635
Epoch    1 : train_loss = -0.740 | valid_loss = -0.666
Epoch    2 : train_loss = -0.788 | valid_loss = -0.679
Epoch    3 : train_loss = -0.819 | valid_loss = -0.688
Epoch    4 : train_loss = -0.842 | valid_loss = -0.692
Epoch    5 : train_loss = -0.858 | valid_loss = -0.694
Epoch    6 : train_loss = -0.872 | valid_loss = -0.695
Epoch    7 : train_loss = -0.884 | valid_loss = -0.695
Epoch    8 : train_loss = -0.894 | valid_loss = -0.696
Epoch    9 : train_loss = -0.903 | valid_loss = -0.697
Epoch   10 : train_loss = -0.910 | valid_loss = -0.699
Epoch   11 : train_loss = -0.917 | valid_loss = -0.696
Epoch   12 : train_loss = -0.924 | valid_loss = -0.699
Epoch   13 : train_loss = -0.928 | valid_loss = -0.701
Epoch   14 : train_loss = -0.932 | valid_loss = -0.702
Epoch   15 : train_loss = -0.935 | valid_loss = -0.699
Epoch   16 : train_loss = -0.941 | valid_loss = -0.700
Epoch   17 : train_loss = -0.943 | valid_loss = -0.697
Epoch   18

# Predicting Testing Date

In [10]:
def predicting_chord(model, test_MusicData):
    test_dataset    = MusicDataset([test_MusicData], mode='test', pad_section=pad_section)
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    chord_prediction = []
    model.to(device)
    model.eval()
    for x_test in test_dataloader:
        x_test= x_test.to(device)
        with torch.no_grad():
            y_pred = model(x_test).cpu().numpy()
            chord_prediction.append(y_pred)
    
    return np.concatenate(chord_prediction)

In [15]:
test_title, test_index = '', 0
test_section = 50

if test_title != '':
    test_MusicData = MusicData(excel_test[excel_test['Title'] == test_title].iloc[0], mode='test', num_section=test_section)
elif test_index != -1:
    test_MusicData = MusicData(excel_test.iloc[test_index], mode='test', num_section=test_section)
else:
    assert False, "Check your test data."

chord_nparray   = predicting_chord(linear_model, test_MusicData)
chord_candidate = [ccchord.chord_inverse(chord, num_candidate=1, capo=0) for chord in chord_nparray]

ccplay.display_chord(test_MusicData.music_data, delay=0.4, stop_time=60, padding_chord_list=chord_candidate)

Now playing 願溫柔的妳被世界溫柔以待 with Key=nan and Capo=nan
G#          G#          G#          G#          
|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..
G#          Cm          D#          Cm          
|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..
G#          G#          G#          Fm          
|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..
C#          Fm          G#          G#          
|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..
G#          G#          G#          Cm          
|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..
Gm          D#          Gm          Gm          
|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..
G#          G#          G#          D#          
|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..
A#m         D#          Cm          D#          
|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..
Fm          G#          D#          Cm          
|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..|..
Cm          Cm          Cm          G#          
|..|..|..|..|..|..