In [1]:
%matplotlib inline

import os

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display
import ast
from sklearn import svm

import utils

plt.rcParams['figure.figsize'] = (17, 5)

In [2]:
# Directory where mp3 are stored.
AUDIO_DIR = os.environ.get('AUDIO_DIR')

# Load metadata and features.
tracks = utils.load('data/fma_metadata/tracks.csv')
genres = utils.load('data/fma_metadata/genres.csv')
features = utils.load('data/fma_metadata/features.csv')
echonest = utils.load('data/fma_metadata/echonest.csv')

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

tracks.shape, genres.shape, features.shape, echonest.shape

((106574, 53), (163, 4), (106574, 518), (13129, 249))

In [3]:
smalltracks  = tracks.loc[tracks['subset']=='small']
smalltracks.shape

(8000, 53)

In [4]:
smallfeatures = features.loc[features.index.isin(smalltracks.index)]
smallfeatures.shape

(8000, 518)

In [5]:
smallechonest = echonest.loc[echonest.index.isin(smalltracks.index)]
smallechonest.shape

(1294, 249)

In [7]:
genres['title'].value_counts()

Symphony         1
Chamber Music    1
Europe           1
Drone            1
Sound Poetry     1
                ..
Nerdcore         1
Dubstep          1
Surf             1
Hardcore         1
Instrumental     1
Name: title, Length: 163, dtype: int64

In [9]:
smalltracks.columns

MultiIndex([( 'album',          'comments'),
            ( 'album',      'date_created'),
            ( 'album',     'date_released'),
            ( 'album',          'engineer'),
            ( 'album',         'favorites'),
            ( 'album',                'id'),
            ( 'album',       'information'),
            ( 'album',           'listens'),
            ( 'album',          'producer'),
            ( 'album',              'tags'),
            ( 'album',             'title'),
            ( 'album',            'tracks'),
            ( 'album',              'type'),
            ('artist', 'active_year_begin'),
            ('artist',   'active_year_end'),
            ('artist', 'associated_labels'),
            ('artist',               'bio'),
            ('artist',          'comments'),
            ('artist',      'date_created'),
            ('artist',         'favorites'),
            ('artist',                'id'),
            ('artist',          'latitude'),
          

In [24]:
genres_list = set(list(smalltracks['track','genre_top'].values))

In [25]:
genres_list

{'Electronic',
 'Experimental',
 'Folk',
 'Hip-Hop',
 'Instrumental',
 'International',
 'Pop',
 'Rock'}

In [26]:
smallgenres = genres.loc[genres['title'].isin(genres_list)]

In [27]:
smallgenres

Unnamed: 0_level_0,#tracks,parent,title,top_level
genre_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,5271,0,International,2
10,13845,0,Pop,10
12,32923,0,Rock,12
15,34413,0,Electronic,15
17,12706,0,Folk,17
21,8389,0,Hip-Hop,21
38,38154,0,Experimental,38
1235,14938,0,Instrumental,1235


In [28]:
smalltracks.to_csv('data/fma_metadata/smalltracks.csv')
smallfeatures.to_csv('data/fma_metadata/smallfeatures.csv')
smallechonest.to_csv('data/fma_metadata/smallechonest.csv')
smallgenres.to_csv('data/fma_metadata/smallgenres.csv')