In [575]:
import pandas as pd
import os
import ast

In [577]:
def load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


In [579]:
tracks = load("../../../fma_metadata/tracks.csv")

In [580]:
tracks['track']

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2,256000,0,,2008-11-26 01:48:12,2008-11-26,168,2,Hip-Hop,[21],[21],,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
3,256000,0,,2008-11-26 01:48:14,2008-11-26,237,1,Hip-Hop,[21],[21],,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
5,256000,0,,2008-11-26 01:48:20,2008-11-26,206,6,Hip-Hop,[21],[21],,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World
10,192000,0,Kurt Vile,2008-11-25 17:49:06,2008-11-26,161,178,Pop,[10],[10],,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
20,256000,0,,2008-11-26 01:48:56,2008-01-01,311,0,,"[76, 103]","[17, 10, 76, 103]",,978,en,Attribution-NonCommercial-NoDerivatives (aka M...,361,,3,,[],Spiritual Level
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155316,320000,0,,2017-03-30 15:23:34,NaT,162,1,Rock,[25],"[25, 12]",,122,,Creative Commons Attribution-NonCommercial-NoD...,102,,3,,[],The Auger
155317,320000,0,,2017-03-30 15:23:36,NaT,217,1,Rock,[25],"[25, 12]",,194,,Creative Commons Attribution-NonCommercial-NoD...,165,,4,,[],Let's Skin Ruby
155318,320000,0,,2017-03-30 15:23:37,NaT,404,2,Rock,[25],"[25, 12]",,214,,Creative Commons Attribution-NonCommercial-NoD...,168,,6,,[],My House Smells Like Kim Deal/Pulp
155319,320000,0,,2017-03-30 15:23:39,NaT,146,0,Rock,[25],"[25, 12]",,336,,Creative Commons Attribution-NonCommercial-NoD...,294,,5,,[],The Man With Two Mouths


In [582]:
classified_top_genre = pd.read_csv("top_genre_prob_by_optimized_classification.csv",index_col=[0])

In [585]:
classified_top_genre

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515
3,0.045891,0.044842,0.106042,0.785288,0.017937
5,0.005435,0.020162,0.036643,0.930348,0.007411
10,0.162003,0.133598,0.663075,0.021528,0.019796
134,0.030814,0.061230,0.042618,0.854423,0.010914
...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475
155316,0.995178,0.000831,0.001596,0.000050,0.002345
155317,0.970530,0.010954,0.010181,0.000799,0.007536
155318,0.994592,0.002137,0.002263,0.000290,0.000719


In [587]:
classified_remaining_genre = pd.read_csv("../reclassified_remaining_genres.csv",index_col=[0])

In [589]:
all_genre_probs = pd.concat([classified_top_genre,classified_remaining_genre])

In [591]:
track_recommendation_data = pd.DataFrame(tracks['track'][["interest","favorites","listens"]])

In [593]:
track_recommendation_data

Unnamed: 0_level_0,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,4656,2,1293
3,1470,1,514
5,1933,6,1151
10,54881,178,50135
20,978,0,361
...,...,...,...
155316,122,1,102
155317,194,1,165
155318,214,2,168
155319,336,0,294


In [595]:
recommendation_data = classified_top_genre.merge(track_recommendation_data,left_index=True,right_index=True)

In [485]:
recommendation_data.to_csv("optimized_final_recommendation_data.csv")

In [488]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515,4656,2,1293
3,0.045891,0.044842,0.106042,0.785288,0.017937,1470,1,514
5,0.005435,0.020162,0.036643,0.930348,0.007411,1933,6,1151
10,0.162003,0.133598,0.663075,0.021528,0.019796,54881,178,50135
134,0.030814,0.061230,0.042618,0.854423,0.010914,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475,153,1,128
155316,0.995178,0.000831,0.001596,0.000050,0.002345,122,1,102
155317,0.970530,0.010954,0.010181,0.000799,0.007536,194,1,165
155318,0.994592,0.002137,0.002263,0.000290,0.000719,214,2,168


In [597]:
recomendation_data = pd.read_csv("final_recommendation_data.csv")

In [599]:
recomendation_data

Unnamed: 0,track_id,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
0,2,0.003639,0.010459,0.009752,0.974635,0.001515,4656,2,1293
1,3,0.045891,0.044842,0.106042,0.785288,0.017937,1470,1,514
2,5,0.005435,0.020162,0.036643,0.930348,0.007411,1933,6,1151
3,10,0.162003,0.133598,0.663075,0.021528,0.019796,54881,178,50135
4,134,0.030814,0.061230,0.042618,0.854423,0.010914,1126,3,943
...,...,...,...,...,...,...,...,...,...
49589,155308,0.783553,0.067902,0.105080,0.019759,0.023706,90,1,82
49590,155309,0.351662,0.071414,0.294607,0.037861,0.244455,90,0,79
49591,155310,0.411248,0.029858,0.400344,0.057910,0.100640,94,0,84
49592,155311,0.616869,0.030322,0.085457,0.014142,0.253210,187,0,171


In [603]:
bank_acc_prob = pd.read_csv("bank_acc_prob.csv",index_col=[0])

In [605]:
bank_acc_prob

Unnamed: 0,0,1,2,3,4
0,0.007922,0.043217,0.003676,0.943913,0.001272


In [607]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515,4656,2,1293
3,0.045891,0.044842,0.106042,0.785288,0.017937,1470,1,514
5,0.005435,0.020162,0.036643,0.930348,0.007411,1933,6,1151
10,0.162003,0.133598,0.663075,0.021528,0.019796,54881,178,50135
134,0.030814,0.061230,0.042618,0.854423,0.010914,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475,153,1,128
155316,0.995178,0.000831,0.001596,0.000050,0.002345,122,1,102
155317,0.970530,0.010954,0.010181,0.000799,0.007536,194,1,165
155318,0.994592,0.002137,0.002263,0.000290,0.000719,214,2,168


In [609]:
from sklearn.metrics.pairwise import cosine_similarity

new_song_similarities = cosine_similarity(bank_acc_prob,recommendation_data[classified_top_genre.columns[:5]])
recommendation_data["cosine_similarity"] = new_song_similarities[0]

In [611]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515,4656,2,1293,0.999357
3,0.045891,0.044842,0.106042,0.785288,0.017937,1470,1,514,0.990057
5,0.005435,0.020162,0.036643,0.930348,0.007411,1933,6,1151,0.999056
10,0.162003,0.133598,0.663075,0.021528,0.019796,54881,178,50135,0.045363
134,0.030814,0.061230,0.042618,0.854423,0.010914,1126,3,943,0.998176
...,...,...,...,...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475,153,1,128,0.010702
155316,0.995178,0.000831,0.001596,0.000050,0.002345,122,1,102,0.008482
155317,0.970530,0.010954,0.010181,0.000799,0.007536,194,1,165,0.009772
155318,0.994592,0.002137,0.002263,0.000290,0.000719,214,2,168,0.008783


In [613]:
top_recommendations = recommendation_data.nlargest(10, "cosine_similarity").copy()

In [615]:
top_recommendations

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
103797,0.003923,0.040953,0.00486,0.948422,0.001841,440,1,255,0.999987
74951,0.005891,0.044285,0.008651,0.939741,0.001432,566,1,429,0.999983
149692,0.002657,0.042196,0.005802,0.948075,0.001271,3906,6,3375,0.999981
91311,0.003305,0.045523,0.00628,0.942835,0.002057,1235,2,1038,0.999981
61161,0.003688,0.045894,0.007368,0.942443,0.000607,3390,1,1939,0.999978
149542,0.004728,0.040163,0.008269,0.945529,0.001309,181,0,126,0.999977
123647,0.001766,0.045254,0.003556,0.948582,0.000842,1646,2,1370,0.999977
143979,0.002486,0.042233,0.007124,0.947312,0.000845,471,0,394,0.999976
46846,0.002799,0.046014,0.006531,0.944323,0.000332,2193,0,1374,0.999976
149539,0.005325,0.046472,0.008583,0.939262,0.000358,172,0,93,0.999975


In [617]:
top_recommendations["cosine_similarity"].mean()

0.9999790280562246

In [619]:
normalized_features = pd.DataFrame(columns=["normalized_listens","normalized_favorites","normalized_interest"],index=top_recommendations.index)

In [621]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
normalized_features['normalized_listens'] = scaler.fit_transform(top_recommendations[['listens']])[:, 0]
normalized_features['normalized_favorites'] = scaler.fit_transform(top_recommendations[['favorites']])[:, 0]
normalized_features['normalized_interest'] = scaler.fit_transform(top_recommendations[['interest']])[:, 0]


In [623]:
normalized_features

Unnamed: 0_level_0,normalized_listens,normalized_favorites,normalized_interest
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
103797,0.04936,0.166667,0.071773
74951,0.102377,0.166667,0.105517
149692,1.0,1.0,1.0
91311,0.287934,0.333333,0.284681
61161,0.562462,0.166667,0.86181
149542,0.010055,0.0,0.00241
123647,0.389092,0.333333,0.394751
143979,0.091712,0.0,0.080075
46846,0.390311,0.0,0.541243
149539,0.0,0.0,0.0


In [625]:
top_recommendations["final_score"] = (0.5 * top_recommendations["cosine_similarity"]
                + 0.3 * normalized_features["normalized_listens"]
               + 0.1 * normalized_features["normalized_favorites"]
               + 0.1 * normalized_features["normalized_interest"])

In [627]:
top_recommendations

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity,final_score
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
103797,0.003923,0.040953,0.00486,0.948422,0.001841,440,1,255,0.999987,0.538645
74951,0.005891,0.044285,0.008651,0.939741,0.001432,566,1,429,0.999983,0.557923
149692,0.002657,0.042196,0.005802,0.948075,0.001271,3906,6,3375,0.999981,0.999991
91311,0.003305,0.045523,0.00628,0.942835,0.002057,1235,2,1038,0.999981,0.648172
61161,0.003688,0.045894,0.007368,0.942443,0.000607,3390,1,1939,0.999978,0.771575
149542,0.004728,0.040163,0.008269,0.945529,0.001309,181,0,126,0.999977,0.503246
123647,0.001766,0.045254,0.003556,0.948582,0.000842,1646,2,1370,0.999977,0.689524
143979,0.002486,0.042233,0.007124,0.947312,0.000845,471,0,394,0.999976,0.535509
46846,0.002799,0.046014,0.006531,0.944323,0.000332,2193,0,1374,0.999976,0.671205
149539,0.005325,0.046472,0.008583,0.939262,0.000358,172,0,93,0.999975,0.499988


In [629]:
top_recommendations["final_score"].mean()

0.6415778697245847

In [631]:
final_recommendations = top_recommendations.sort_values("final_score", ascending=False)

In [633]:
final_recommendations.index

Index([149692, 61161, 123647, 46846, 91311, 74951, 103797, 143979, 149542,
       149539],
      dtype='int64', name='track_id')

In [635]:
tracks['track'].loc[final_recommendations.index]

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
149692,80000,0,,2017-01-20 16:05:41,NaT,266,6,Hip-Hop,[21],[21],,3906,,Attribution,3375,,12,,[],Drift
61161,320000,0,,2012-02-27 09:25:18,NaT,127,1,Hip-Hop,"[21, 811]","[811, 21]",,3390,,Attribution-ShareAlike 3.0 International,1939,,16,,[],DillA Remix
123647,320000,0,,2015-08-12 11:24:40,NaT,238,2,Hip-Hop,[21],[21],,1646,,Attribution-NonCommercial-ShareAlike,1370,,2,,[],Side B: Hall Of Dope / Chaos On The One (Part ...
46846,320000,0,,2011-04-16 01:11:15,NaT,212,0,Pop,[10],[10],,2193,,Attribution-NonCommercial,1374,,2,,[],Valentines Day Directions for the Lonely
91311,320000,0,,2013-09-25 14:23:06,NaT,223,2,Hip-Hop,[21],[21],,1235,,Attribution-NonCommercial-NoDerivatives (aka M...,1038,,5,,[],That's How It Goes (FunkDubb)
74951,320000,0,,2013-01-03 12:14:59,NaT,200,1,Hip-Hop,[21],[21],,566,,Attribution-NonCommercial-NoDerivatives (aka M...,429,,2,,[],Braggadocio (Dandimite Riddum Mixx)
103797,320000,0,,2014-04-15 10:46:46,NaT,350,1,Hip-Hop,[21],[21],,440,,Creative Commons Attribution-NonCommercial-NoD...,255,,5,,[],Master Plan (Chad's Original Mix)
143979,320000,0,,2016-09-27 15:44:06,NaT,198,0,Hip-Hop,"[100, 539]","[539, 100, 21]",,471,,Attribution-NonCommercial-ShareAlike,394,,10,,[],Barba
149542,321338,0,,2017-01-18 22:22:04,NaT,179,0,Hip-Hop,[539],"[539, 21]",,181,,Creative Commons Attribution-NonCommercial-NoD...,126,,6,,[],512/Spittin
149539,321353,0,,2017-01-18 22:20:18,NaT,167,0,Hip-Hop,[539],"[539, 21]",,172,,Creative Commons Attribution-NonCommercial-NoD...,93,,3,,[],Nobody


In [637]:
track_name = tracks['track'].loc[final_recommendations.index]["title"]

In [639]:
artist_name = tracks['artist'].loc[final_recommendations.index]["name"]

In [641]:
genre_name = tracks['track'].loc[final_recommendations.index]["genre_top"]

In [643]:
track_name

track_id
149692                                                Drift
61161                                           DillA Remix
123647    Side B: Hall Of Dope / Chaos On The One (Part ...
46846              Valentines Day Directions for the Lonely
91311                         That's How It Goes (FunkDubb)
74951                   Braggadocio (Dandimite Riddum Mixx)
103797                    Master Plan (Chad's Original Mix)
143979                                                Barba
149542                                          512/Spittin
149539                                               Nobody
Name: title, dtype: object

In [645]:
artist_name

track_id
149692            K. Sparks
61161     Alaclair Ensemble
123647                C-Doc
46846           Monk Turner
91311         Mported Flows
74951                 C-Doc
103797          DJ Def Chad
143979         Monster Jinx
149542              K.I.R.K
149539              K.I.R.K
Name: name, dtype: object

In [647]:
genre_name

track_id
149692    Hip-Hop
61161     Hip-Hop
123647    Hip-Hop
46846         Pop
91311     Hip-Hop
74951     Hip-Hop
103797    Hip-Hop
143979    Hip-Hop
149542    Hip-Hop
149539    Hip-Hop
Name: genre_top, dtype: category
Categories (16, object): ['Blues', 'Classical', 'Country', 'Easy Listening', ..., 'Pop', 'Rock', 'Soul-RnB', 'Spoken']