### Getting Librosa Features for FMA Small Dataset
Dataset Link: https://github.com/mdeff/fma 

### File Paths

In [1]:
import numpy as np 
import pandas as pd
import json

In [7]:
genres_file = '/home/ayush/FOML_Project/fma_metadata/genres.csv'
tracks_file = '/home/ayush/FOML_Project/fma_metadata/tracks.csv'
librosa_file = '/home/ayush/FOML_Project/fma_metadata/features.csv'
fma_small_track_id_audio_path_file = '/home/ayush/FOML_Project/track_id_to_audio_path_fma_small.csv'

### Create Mapping from Genre Title to Genre ID

In [8]:
def map_genre_title_to_genre_id(genres_file):

    genres_data = pd.read_csv(genres_file,delimiter=',',index_col=0,header=0)
    genres_data = genres_data.reset_index()

    genre_id_list = list(genres_data['genre_id'])
    genre_title_list = list(genres_data['title'])

    dict_genre_title_to_genre_id = {}

    for i in range(len(genre_title_list)):
        dict_genre_title_to_genre_id[genre_title_list[i]] = genre_id_list[i]

    with open('genre_title_to_genre_id.json', 'w') as fp:
        json.dump(dict_genre_title_to_genre_id, fp, sort_keys=True, indent=4)

    return dict_genre_title_to_genre_id

### Create Mapping from Track ID to Parent Genre Title

In [9]:
def map_track_id_to_parent_genre_title(tracks_file):

    tracks_data = pd.read_csv(tracks_file,delimiter=',',index_col=0,header=[0,1])

    tracks_data = tracks_data.reset_index()

    track_id_list = list(tracks_data['track_id'])
    parent_genre_title_list = list(tracks_data['track']['genre_top'])

    dict_track_id_to_parent_genre_title = {} 

    for i in range(len(track_id_list)):
        dict_track_id_to_parent_genre_title[track_id_list[i]] = parent_genre_title_list[i]

    with open('track_id_to_parent_genre_title.json', 'w') as fp:
        json.dump(dict_track_id_to_parent_genre_title, fp, sort_keys=True, indent=4)

    return dict_track_id_to_parent_genre_title

### Create two Jsons

In [10]:
dict_genre_title_to_genre_id = map_genre_title_to_genre_id(genres_file)
dict_track_id_to_parent_genre_title = map_track_id_to_parent_genre_title(tracks_file)

###  Get Librosa Features for FMA Small

In [11]:
def get_librosa_features_fma_small(librosa_file,fma_small_track_id_audio_path_file):
    
    librosa_data = pd.read_csv(librosa_file,delimiter=',',index_col=0,header=[0,1,2])
    librosa_data = librosa_data.reset_index()
    
    fma_small_track_data = pd.read_csv(fma_small_track_id_audio_path_file,delimiter=',',header=0)
    fma_small_track_data = fma_small_track_data.drop(['track_path'], axis=1)
    
    librosa_fma_small_data = librosa_data[librosa_data.track_id.isin(fma_small_track_data.track_id)]
    
    return librosa_fma_small_data

In [12]:
librosa_fma_small_data = get_librosa_features_fma_small(librosa_file,fma_small_track_id_audio_path_file)

### Get Parent Genre For Fma Small

In [13]:
def get_parent_genre_title_id(librosa_fma_small_data):
    fma_small_parent_genre_title_list = []
    fma_small_parent_genre_id_list = []

    # Get Parent Title
    for id in librosa_fma_small_data['track_id']:
        fma_small_parent_genre_title_list.append(dict_track_id_to_parent_genre_title[id])

    librosa_fma_small_data['parent_genre_title'] = fma_small_parent_genre_title_list

    #Get Parent Genre ID for Echonest
    for genre_title in fma_small_parent_genre_title_list:
        if pd.isnull(genre_title):
            fma_small_parent_genre_id_list.append(np.nan)
        else:
            fma_small_parent_genre_id_list.append(dict_genre_title_to_genre_id[genre_title])

    librosa_fma_small_data['parent_genre_id'] = fma_small_parent_genre_id_list
    
    return librosa_fma_small_data

In [14]:
librosa_fma_small_data = get_parent_genre_title_id(librosa_fma_small_data)

### Create FMA Small Labels

In [15]:
def create_labels_fma_small_label(librosa_fma_small_data):
    fma_small_labels = pd.DataFrame(columns=['track_id','parent_genre_id','parent_genre_title'])
    
    fma_small_labels['track_id']=librosa_fma_small_data['track_id']
    fma_small_labels['parent_genre_title']=librosa_fma_small_data['parent_genre_title']
    fma_small_labels['parent_genre_id']=librosa_fma_small_data['parent_genre_id']
    
    fma_small_labels.to_csv('./fma_small_labels.csv',index=False,sep=',')

In [16]:
create_labels_fma_small_label(librosa_fma_small_data)
librosa_fma_small_data.to_csv('./librosa_fma_small_data.csv',index=False,sep=',')

In [17]:
print librosa_fma_small_data.shape

(8000, 521)


In [18]:
print librosa_fma_small_data.head()

feature    track_id chroma_cens                                          \
statistics             kurtosis                                           
number                       01        02        03        04        05   
0                 2    7.180653  5.230309  0.249321  1.347620  1.482478   
2                 5    0.527563 -0.077654 -0.279610  0.685883  1.937570   
3                10    3.702245 -0.291193  2.196742 -0.234449  1.367364   
15              140    0.533579 -0.623885 -1.086205 -1.081079 -0.765151   
16              141    0.172898 -0.284804 -1.169662 -1.062855 -0.706868   

feature                                                  ...         tonnetz  \
statistics                                               ...             std   
number            06        07        08        09       ...              06   
0           0.531371  1.481593  2.691455  0.866868       ...        0.012111   
2           0.880839 -0.923192 -0.927232  0.666617       ...        0.014759   

In [90]:
print librosa_fma_small_data['track_id'].head()

0       2
2       5
3      10
15    140
16    141
Name: track_id, dtype: int64


In [91]:
print librosa_fma_small_data['parent_genre_id'].head()

0     21
2     21
3     10
15    17
16    17
Name: parent_genre_id, dtype: int64


In [92]:
print librosa_fma_small_data['parent_genre_title'].head()

0     Hip-Hop
2     Hip-Hop
3         Pop
15       Folk
16       Folk
Name: parent_genre_title, dtype: object


In [20]:
print librosa_fma_small_data['mfcc'].head()

statistics  kurtosis                                                    \
number            01        02        03        04        05        06   
0           3.856789  1.541901  0.000816  0.330728  0.118731 -0.342687   
2           2.624517  2.415293  0.440233 -0.782131 -0.771069 -0.724216   
3           5.076893  1.161854  2.095651  1.372743 -0.203574 -0.345354   
15          0.384906  1.203378  0.429393  0.639471 -0.080144 -0.440946   
16         -0.038922  4.771146 -0.378949 -0.309047 -0.661308 -0.531602   

statistics                                            ...           std  \
number            07        08        09        10    ...            11   
0          -0.259252  0.146735  0.410656 -0.162872    ...     10.059609   
2           0.090260  0.152119  0.261731 -0.608905    ...      8.863638   
3          -0.529139  0.561974  0.281350 -0.150672    ...      8.289734   
15         -0.432567 -0.126163  0.507695  0.408915    ...      9.883532   
16          0.029186  0.662524 

In [22]:
print librosa_fma_small_data.columns

MultiIndex(levels=[[u'chroma_cens', u'chroma_cqt', u'chroma_stft', u'mfcc', u'rmse', u'spectral_bandwidth', u'spectral_centroid', u'spectral_contrast', u'spectral_rolloff', u'tonnetz', u'zcr', u'track_id', u'parent_genre_title', u'parent_genre_id'], [u'kurtosis', u'max', u'mean', u'median', u'min', u'skew', u'std', u''], [u'01', u'02', u'03', u'04', u'05', u'06', u'07', u'08', u'09', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18', u'19', u'20', u'']],
           labels=[[11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2