In [65]:
import pandas as pd
import os
import ast

In [67]:
def load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


In [69]:
tracks = load("../../../fma_metadata/tracks.csv")

In [70]:
tracks['track']

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2,256000,0,,2008-11-26 01:48:12,2008-11-26,168,2,Hip-Hop,[21],[21],,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
3,256000,0,,2008-11-26 01:48:14,2008-11-26,237,1,Hip-Hop,[21],[21],,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
5,256000,0,,2008-11-26 01:48:20,2008-11-26,206,6,Hip-Hop,[21],[21],,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World
10,192000,0,Kurt Vile,2008-11-25 17:49:06,2008-11-26,161,178,Pop,[10],[10],,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
20,256000,0,,2008-11-26 01:48:56,2008-01-01,311,0,,"[76, 103]","[17, 10, 76, 103]",,978,en,Attribution-NonCommercial-NoDerivatives (aka M...,361,,3,,[],Spiritual Level
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155316,320000,0,,2017-03-30 15:23:34,NaT,162,1,Rock,[25],"[25, 12]",,122,,Creative Commons Attribution-NonCommercial-NoD...,102,,3,,[],The Auger
155317,320000,0,,2017-03-30 15:23:36,NaT,217,1,Rock,[25],"[25, 12]",,194,,Creative Commons Attribution-NonCommercial-NoD...,165,,4,,[],Let's Skin Ruby
155318,320000,0,,2017-03-30 15:23:37,NaT,404,2,Rock,[25],"[25, 12]",,214,,Creative Commons Attribution-NonCommercial-NoD...,168,,6,,[],My House Smells Like Kim Deal/Pulp
155319,320000,0,,2017-03-30 15:23:39,NaT,146,0,Rock,[25],"[25, 12]",,336,,Creative Commons Attribution-NonCommercial-NoD...,294,,5,,[],The Man With Two Mouths


In [77]:
classified_top_genre = pd.read_csv("base_data_prob_overfitting_logistic.csv",index_col=[0])

In [26]:
classified_remaining_genre = pd.read_csv("../reclassified_remaining_genres.csv",index_col=[0])

In [28]:
all_genre_probs = pd.concat([classified_top_genre,classified_remaining_genre])

In [79]:
track_recommendation_data = pd.DataFrame(tracks['track'][["interest","favorites","listens"]])

In [81]:
track_recommendation_data

Unnamed: 0_level_0,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,4656,2,1293
3,1470,1,514
5,1933,6,1151
10,54881,178,50135
20,978,0,361
...,...,...,...
155316,122,1,102
155317,194,1,165
155318,214,2,168
155319,336,0,294


In [83]:
recommendation_data = classified_top_genre.merge(track_recommendation_data,left_index=True,right_index=True)

In [85]:
# recommendation_data.to_csv("optimized_recommendation_data.csv")

In [87]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.286368,0.016376,0.236220,0.124459,0.336576,4656,2,1293
3,0.317199,0.011163,0.440916,0.112801,0.117921,1470,1,514
5,0.550803,0.035311,0.181760,0.102528,0.129597,1933,6,1151
10,0.061826,0.014149,0.035781,0.052516,0.835728,54881,178,50135
134,0.264930,0.040877,0.170940,0.133790,0.389463,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.101480,0.056093,0.038208,0.052576,0.751643,153,1,128
155316,0.062029,0.079743,0.033572,0.039813,0.784843,122,1,102
155317,0.306270,0.099794,0.073537,0.114412,0.405988,194,1,165
155318,0.116667,0.054354,0.053267,0.055791,0.719921,214,2,168


In [93]:
recommendation_data.to_csv("basic_recommendation_data_logistic_new.csv")