In [11]:
import pandas as pd
import os
import ast

In [13]:
def load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


In [15]:
tracks = load("../../../fma_metadata/tracks.csv")

In [18]:
track_name_artist = pd.DataFrame(columns = ["title","artist","genre_top"])

In [22]:
track_name_artist["title"] = tracks['track']["title"]

In [24]:
track_name_artist["artist"]  = tracks['artist']["name"]

In [26]:
track_name_artist["genre_top"]  = tracks['track']["genre_top"]

In [32]:
track_name_artist.to_csv("track_title_artist.csv")

In [37]:
pd.read_csv("track_title_artist.csv")

Unnamed: 0,track_id,title,artist,genre_top
0,2,Food,AWOL,Hip-Hop
1,3,Electric Ave,AWOL,Hip-Hop
2,5,This World,AWOL,Hip-Hop
3,10,Freeway,Kurt Vile,Pop
4,20,Spiritual Level,Nicky Cook,
...,...,...,...,...
106569,155316,The Auger,Spowder,Rock
106570,155317,Let's Skin Ruby,Spowder,Rock
106571,155318,My House Smells Like Kim Deal/Pulp,Spowder,Rock
106572,155319,The Man With Two Mouths,Spowder,Rock


In [785]:
artist_name

track_id
123965         BenJamin Banger
145744          Cheese N Pot-C
97691                   Tickle
36653                 Diploide
141159               godmanwho
94235                 ICE FLEX
25801             Buguinha Dub
60496                     MUTE
75647            Stinky Treats
34098     Propaganda Anonymous
Name: name, dtype: object

In [787]:
genre_name

track_id
123965          Hip-Hop
145744          Hip-Hop
97691           Hip-Hop
36653           Hip-Hop
141159          Hip-Hop
94235           Hip-Hop
25801     International
60496           Hip-Hop
75647           Hip-Hop
34098           Hip-Hop
Name: genre_top, dtype: category
Categories (16, object): ['Blues', 'Classical', 'Country', 'Easy Listening', ..., 'Pop', 'Rock', 'Soul-RnB', 'Spoken']