# Semantic Music Tagging

In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchaudio
from torchaudio.transforms import MelSpectrogram
from pathlib import Path

## Dataset

**Source:** http://mirg.city.ac.uk/codeapps/the-magnatagatune-dataset

TODO: show directory tree

In [46]:
clips = pd.read_csv('./data/clip_info_final.csv', delimiter='\t')
clips.head()

Unnamed: 0,clip_id,track_number,title,artist,album,url,segmentStart,segmentEnd,original_url,mp3_path
0,2,1,BWV54 - I Aria,American Bach Soloists,J.S. Bach Solo Cantatas,http://www.magnatune.com/artists/albums/abs-so...,30,59,http://he3.magnatune.com/all/01--BWV54%20-%20I...,f/american_bach_soloists-j_s__bach_solo_cantat...
1,6,1,BWV54 - I Aria,American Bach Soloists,J.S. Bach Solo Cantatas,http://www.magnatune.com/artists/albums/abs-so...,146,175,http://he3.magnatune.com/all/01--BWV54%20-%20I...,f/american_bach_soloists-j_s__bach_solo_cantat...
2,10,1,BWV54 - I Aria,American Bach Soloists,J.S. Bach Solo Cantatas,http://www.magnatune.com/artists/albums/abs-so...,262,291,http://he3.magnatune.com/all/01--BWV54%20-%20I...,f/american_bach_soloists-j_s__bach_solo_cantat...
3,11,1,BWV54 - I Aria,American Bach Soloists,J.S. Bach Solo Cantatas,http://www.magnatune.com/artists/albums/abs-so...,291,320,http://he3.magnatune.com/all/01--BWV54%20-%20I...,f/american_bach_soloists-j_s__bach_solo_cantat...
4,12,1,BWV54 - I Aria,American Bach Soloists,J.S. Bach Solo Cantatas,http://www.magnatune.com/artists/albums/abs-so...,320,349,http://he3.magnatune.com/all/01--BWV54%20-%20I...,f/american_bach_soloists-j_s__bach_solo_cantat...


In [47]:
annotations = pd.read_csv('./data/annotations_final.csv', delimiter='\t')
annotations.head()

Unnamed: 0,clip_id,no voice,singer,duet,plucking,hard rock,world,bongos,harpsichord,female singing,...,rap,metal,hip hop,quick,water,baroque,women,fiddle,english,mp3_path
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...
1,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...
2,10,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...
3,11,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...
4,12,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...


### Filter dataset
Filter clips that don't belong to 50 most frequent tags

In [48]:
top50_tags = annotations.iloc[:,1:-1].sum(axis=0).sort_values(ascending=False)[:50].index
annotations = annotations[annotations[top50_tags].sum(axis=1) > 0]
tags_to_remove = annotations.columns[1:-1].difference(top50_tags)
annotations = annotations.drop(tags_to_remove, axis=1)
clips = clips[clips['clip_id'].isin(annotations['clip_id'])]
top50_tags

Index(['guitar', 'classical', 'slow', 'techno', 'strings', 'drums',
       'electronic', 'rock', 'fast', 'piano', 'ambient', 'beat', 'violin',
       'vocal', 'synth', 'female', 'indian', 'opera', 'male', 'singing',
       'vocals', 'no vocals', 'harpsichord', 'loud', 'quiet', 'flute', 'woman',
       'male vocal', 'no vocal', 'pop', 'soft', 'sitar', 'solo', 'man',
       'classic', 'choir', 'voice', 'new age', 'dance', 'male voice',
       'female vocal', 'beats', 'harp', 'cello', 'no voice', 'weird',
       'country', 'metal', 'female voice', 'choral'],
      dtype='object')

### Split dataset
Split dataset randomly while making sure that clips from the same track don't end up in different splits

In [68]:
# split tracks
track_nums = clips['track_number'].unique()
np.random.shuffle(track_nums)
train_split, val_split, test_split = np.split(track_nums, [int(len(track_nums) * 0.8), int(len(track_nums) * 0.9)])

# assign all clips from tracks to their corresponding split
def clip_files_from_tracks(track_nums):
    return 'data/audio/' + clips[clips['track_number'].isin(track_nums)]['mp3_path'].to_numpy()

train_clips = clip_files_from_tracks(train_split)
val_clips = clip_files_from_tracks(val_split)
test_clips = clip_files_from_tracks(test_split)

### Load dataset

In [77]:
class MagnaTagATuneDataset(Dataset):
    def __init__(self, files, sample_rate=16000):
        self.files = files
        self.sample_rate = sample_rate
        self.mel_spectrogram = MelSpectrogram(
            sample_rate=sample_rate
        )
    
    def __getitem__(self, index):
        file = self.files[index]
        waveform, _ = torchaudio.load(file)
        return self.mel_spectrogram(waveform)
    
    def __len__(self):
        return len(self.files)

In [78]:
loader = {
    mode: DataLoader(MagnaTagATuneDataset(clips), shuffle=True, pin_memory=True, batch_size=8)
    for mode, clips in [('train', train_clips), ('val', val_clips), ('test', test_clips)]
}