# Setup

In [5]:
import os
import numpy as np
from pathlib import Path
import pandas as pd

import utils

In [6]:
gtzan_dir = Path(str(os.environ.get('GTZAN_DIR')))   #C:/VisualStudioRepositories/MUSIC_DATA/gt/Data
fma_dir = Path(str(os.environ.get('FMA_DIR')))       #C:/VisualStudioRepositories/MUSIC_DATA/fma

gtzan_image_dir = gtzan_dir / 'spectrograms'
gtzan_df_path = gtzan_dir / 'gtzan_df.csv'

fma_small_audio_dir = fma_dir / 'fma_small'
fma_small_image_dir = fma_dir / 'spectrograms/fma_small'

fma_medium_audio_dir = fma_dir / 'fma_medium'
fma_medium_image_dir = fma_dir / 'spectrograms/fma_medium'

fma_df_path = fma_dir / 'fma_df.csv'

# Summary

GTZAN:
- 10 CLASSES
- 998 - entire dataset
- 800 - train set (80 samples per each class)
- 98 - val set (9 samples for Jazz and Reggae, 10 samples per each other class)
- 100 - test set (10 samples per each class)

FMA-SMALL Summary:
- 8 TOP CLASSES
- 7994 - entire dataset
- 6394 - train set:
    - 800 samples per Pop, Folk, International, Instrumental
    - 799 samples per Rock, Experimental, Electronic
    - 797 samples for Hip-Hop
- 800 - val set (100 samples per class)
- 800 - test set (100 samples per class)

FMA-MEDIUM Summary:
- 16 TOP CLASSES
- 24979 - entire dataset
- 19903 - train set:
    - Rock                   5676
    - Electronic             5048
    - Experimental           1800
    - Hip-Hop                1752
    - Folk                   1214
    - Instrumental           1044
    - Pop                     945
    - International           814
    - Classical               495
    - Old-Time / Historic     408
    - Jazz                    306
    - Country                 142
    - Spoken                   94
    - Soul-RnB                 94
    - Blues                    58
    - Easy Listening           13
- 2504 - val set:
    - Rock                   711
    - Electronic             631
    - Experimental           225
    - Hip-Hop                220
    - Folk                   152
    - Instrumental           131
    - Pop                    122
    - International          102
    - Classical               62
    - Old-Time / Historic     51
    - Jazz                    39
    - Country                 18
    - Soul-RnB                18
    - Spoken                  12
    - Blues                    8
    - Easy Listening           2
- 2572 - test set:
    - Rock                   710
    - Electronic             632
    - Experimental           225
    - Hip-Hop                220
    - Instrumental           174
    - Folk                   152
    - Pop                    119
    - International          102
    - Classical               62
    - Old-Time / Historic     51
    - Soul-RnB                42
    - Jazz                    39
    - Country                 18
    - Spoken                  12
    - Blues                    8
    - Easy Listening           6

# Create and save GTZAN dataframe

In [16]:
#path - relative spectrogram image path (only parent and filename)
#set - train/validation/test
#genre - one of 10 classes
gtzan_df = pd.DataFrame(columns=['path', 'set', 'genre'])

In [17]:
# Load image filepaths
gtzan_image_filepaths = list(gtzan_image_dir.rglob('*.png'))
gtzan_image_filepaths = [Path(path.parent.name) / Path(path.name) for path in gtzan_image_filepaths]   

In [18]:
# Save filepaths and assign genres from paths
gtzan_df['path'] = gtzan_image_filepaths
gtzan_df['genre'] = gtzan_df['path'].apply(lambda x: x.parent.name)

80 songs of every genre land in train set.

Split probably could be improved by accounting for artist effect (i. e. forbiding artists in train set from appearing in test set) but GTZAN has so much artist repetition that it could be difficult to meaningfully achieve. It also has no official metadata available.

In [19]:
# Divide into sets
np.random.seed(42)
gtzan_sampled_df = gtzan_df.groupby('genre').sample(n=80, replace=False)
gtzan_df['set'] = np.where(gtzan_df['path'].isin(gtzan_sampled_df['path']), 'train', 'validation')

10 songs for every class moves from validation to test

In [20]:
gtzan_test_df = gtzan_df[gtzan_df['set'] == 'validation']
gtzan_test_sampled_df = gtzan_test_df.groupby('genre').sample(n=10, replace=False)
gtzan_df['set'] = np.where(gtzan_df['path'].isin(gtzan_test_sampled_df['path']), 'test', gtzan_df['set'])

In [21]:
gtzan_df.head(4)

Unnamed: 0,path,set,genre
0,Blues\blues.00000.png,train,Blues
1,Blues\blues.00001.png,test,Blues
2,Blues\blues.00002.png,test,Blues
3,Blues\blues.00003.png,train,Blues


In [22]:
#Save
gtzan_df.to_csv(gtzan_df_path, index=False)

# Create and save FMA dataframe

In [None]:
# contains labels for tracks
tracks_df = utils.load(fma_dir / 'fma_metadata/tracks.csv')

In [None]:
#track_id -  id from tracks.csv
#path     -  relative spectrogram image path (only parent and filename)
#set      -  train or test
#size     -  small/medium fma dataset (note: tracks in 'small' also belong to 'medium')
#genre    -  one of 8 classes
fma_df = pd.DataFrame(columns=['track_id','path', 'set', 'size', 'genre'])

In [None]:
# Load image filepaths
fma_medium_image_filepaths = list(fma_medium_image_dir.rglob('*.png'))
fma_medium_image_filepaths = [Path(path.parent.name) / Path(path.name) for path in fma_medium_image_filepaths]   

In [None]:
# Save filepaths and assign:
# genre from track, top_genre in tracks_df
# size from set, subset in tracks_df
# set from set, split in tracks_df
fma_df['path'] = fma_medium_image_filepaths
fma_df['track_id'] = fma_df['path'].apply(lambda x: int(x.stem))

In [None]:
# train/val/test split done by fma creators, takes care of "artist effect" etc.
fma_df['genre'] = fma_df['track_id'].apply(lambda track_id: tracks_df.loc[track_id]['track']['genre_top'])
fma_df['size'] = fma_df['track_id'].apply(lambda track_id: tracks_df.loc[track_id]['set']['subset'])
fma_df['set'] = fma_df['track_id'].apply(lambda track_id: tracks_df.loc[track_id]['set']['split'])

In [None]:
fma_df.head(13)

Unnamed: 0,track_id,path,set,size,genre
0,2,000\000002.png,training,small,Hip-Hop
1,3,000\000003.png,training,medium,Hip-Hop
2,5,000\000005.png,training,small,Hip-Hop
3,10,000\000010.png,training,small,Pop
4,134,000\000134.png,training,medium,Hip-Hop
5,136,000\000136.png,training,medium,Rock
6,139,000\000139.png,training,medium,Folk
7,140,000\000140.png,training,small,Folk
8,141,000\000141.png,training,small,Folk
9,148,000\000148.png,validation,small,Experimental


In [None]:
#Save
fma_df.to_csv(fma_df_path, index=False)