In [349]:
import pandas as pd
import os
import ast

In [351]:
def load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


In [353]:
tracks = load("../../../fma_metadata/tracks.csv")

In [354]:
tracks['track']

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2,256000,0,,2008-11-26 01:48:12,2008-11-26,168,2,Hip-Hop,[21],[21],,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
3,256000,0,,2008-11-26 01:48:14,2008-11-26,237,1,Hip-Hop,[21],[21],,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
5,256000,0,,2008-11-26 01:48:20,2008-11-26,206,6,Hip-Hop,[21],[21],,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World
10,192000,0,Kurt Vile,2008-11-25 17:49:06,2008-11-26,161,178,Pop,[10],[10],,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
20,256000,0,,2008-11-26 01:48:56,2008-01-01,311,0,,"[76, 103]","[17, 10, 76, 103]",,978,en,Attribution-NonCommercial-NoDerivatives (aka M...,361,,3,,[],Spiritual Level
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155316,320000,0,,2017-03-30 15:23:34,NaT,162,1,Rock,[25],"[25, 12]",,122,,Creative Commons Attribution-NonCommercial-NoD...,102,,3,,[],The Auger
155317,320000,0,,2017-03-30 15:23:36,NaT,217,1,Rock,[25],"[25, 12]",,194,,Creative Commons Attribution-NonCommercial-NoD...,165,,4,,[],Let's Skin Ruby
155318,320000,0,,2017-03-30 15:23:37,NaT,404,2,Rock,[25],"[25, 12]",,214,,Creative Commons Attribution-NonCommercial-NoD...,168,,6,,[],My House Smells Like Kim Deal/Pulp
155319,320000,0,,2017-03-30 15:23:39,NaT,146,0,Rock,[25],"[25, 12]",,336,,Creative Commons Attribution-NonCommercial-NoD...,294,,5,,[],The Man With Two Mouths


In [467]:
classified_top_genre = pd.read_csv("top_genre_prob_by_optimized_classification.csv",index_col=[0])

In [469]:
classified_top_genre

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515
3,0.045891,0.044842,0.106042,0.785288,0.017937
5,0.005435,0.020162,0.036643,0.930348,0.007411
10,0.162003,0.133598,0.663075,0.021528,0.019796
134,0.030814,0.061230,0.042618,0.854423,0.010914
...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475
155316,0.995178,0.000831,0.001596,0.000050,0.002345
155317,0.970530,0.010954,0.010181,0.000799,0.007536
155318,0.994592,0.002137,0.002263,0.000290,0.000719


In [471]:
classified_remaining_genre = pd.read_csv("../reclassified_remaining_genres.csv",index_col=[0])

In [473]:
all_genre_probs = pd.concat([classified_top_genre,classified_remaining_genre])

In [475]:
track_recommendation_data = pd.DataFrame(tracks['track'][["interest","favorites","listens"]])

In [477]:
track_recommendation_data

Unnamed: 0_level_0,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,4656,2,1293
3,1470,1,514
5,1933,6,1151
10,54881,178,50135
20,978,0,361
...,...,...,...
155316,122,1,102
155317,194,1,165
155318,214,2,168
155319,336,0,294


In [479]:
recommendation_data = classified_top_genre.merge(track_recommendation_data,left_index=True,right_index=True)

In [485]:
recommendation_data.to_csv("optimized_final_recommendation_data.csv")

In [488]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515,4656,2,1293
3,0.045891,0.044842,0.106042,0.785288,0.017937,1470,1,514
5,0.005435,0.020162,0.036643,0.930348,0.007411,1933,6,1151
10,0.162003,0.133598,0.663075,0.021528,0.019796,54881,178,50135
134,0.030814,0.061230,0.042618,0.854423,0.010914,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475,153,1,128
155316,0.995178,0.000831,0.001596,0.000050,0.002345,122,1,102
155317,0.970530,0.010954,0.010181,0.000799,0.007536,194,1,165
155318,0.994592,0.002137,0.002263,0.000290,0.000719,214,2,168


In [490]:
recomendation_data = pd.read_csv("final_recommendation_data.csv")

In [492]:
recomendation_data

Unnamed: 0,track_id,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
0,2,0.003639,0.010459,0.009752,0.974635,0.001515,4656,2,1293
1,3,0.045891,0.044842,0.106042,0.785288,0.017937,1470,1,514
2,5,0.005435,0.020162,0.036643,0.930348,0.007411,1933,6,1151
3,10,0.162003,0.133598,0.663075,0.021528,0.019796,54881,178,50135
4,134,0.030814,0.061230,0.042618,0.854423,0.010914,1126,3,943
...,...,...,...,...,...,...,...,...,...
49589,155308,0.783553,0.067902,0.105080,0.019759,0.023706,90,1,82
49590,155309,0.351662,0.071414,0.294607,0.037861,0.244455,90,0,79
49591,155310,0.411248,0.029858,0.400344,0.057910,0.100640,94,0,84
49592,155311,0.616869,0.030322,0.085457,0.014142,0.253210,187,0,171


In [494]:
bank_acc_prob = pd.read_csv("../bank_acc_prob.csv",index_col=[0])

In [496]:
bank_acc_prob

Unnamed: 0,0,1,2,3,4
0,0.018674,0.081087,0.020478,0.877553,0.002207


In [498]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515,4656,2,1293
3,0.045891,0.044842,0.106042,0.785288,0.017937,1470,1,514
5,0.005435,0.020162,0.036643,0.930348,0.007411,1933,6,1151
10,0.162003,0.133598,0.663075,0.021528,0.019796,54881,178,50135
134,0.030814,0.061230,0.042618,0.854423,0.010914,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475,153,1,128
155316,0.995178,0.000831,0.001596,0.000050,0.002345,122,1,102
155317,0.970530,0.010954,0.010181,0.000799,0.007536,194,1,165
155318,0.994592,0.002137,0.002263,0.000290,0.000719,214,2,168


In [500]:
from sklearn.metrics.pairwise import cosine_similarity

new_song_similarities = cosine_similarity(bank_acc_prob,recommendation_data[classified_top_genre.columns[:5]])
recommendation_data["cosine_similarity"] = new_song_similarities[0]

In [502]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515,4656,2,1293,0.996450
3,0.045891,0.044842,0.106042,0.785288,0.017937,1470,1,514,0.992405
5,0.005435,0.020162,0.036643,0.930348,0.007411,1933,6,1151,0.997259
10,0.162003,0.133598,0.663075,0.021528,0.019796,54881,178,50135,0.075549
134,0.030814,0.061230,0.042618,0.854423,0.010914,1126,3,943,0.999278
...,...,...,...,...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475,153,1,128,0.024036
155316,0.995178,0.000831,0.001596,0.000050,0.002345,122,1,102,0.021350
155317,0.970530,0.010954,0.010181,0.000799,0.007536,194,1,165,0.023296
155318,0.994592,0.002137,0.002263,0.000290,0.000719,214,2,168,0.021721


In [504]:
top_recommendations = recommendation_data.nlargest(10, "cosine_similarity").copy()

In [506]:
top_recommendations

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
90135,0.014396,0.081954,0.017012,0.884212,0.002427,499,1,393,0.999979
63854,0.012927,0.077315,0.022762,0.883064,0.003932,744,1,375,0.999962
70208,0.026665,0.081254,0.018679,0.870781,0.00262,8473,4,6089,0.999955
130456,0.011975,0.084107,0.015618,0.886861,0.001439,2832,2,2185,0.99995
130156,0.009753,0.084204,0.020548,0.88382,0.001675,364,2,191,0.999944
11791,0.010922,0.076924,0.022336,0.887023,0.002794,5694,6,1729,0.999942
60020,0.00898,0.08683,0.021992,0.881091,0.001108,825,0,333,0.999918
136469,0.017945,0.071139,0.017675,0.889853,0.003387,5263,4,4192,0.999916
113408,0.008246,0.084976,0.017344,0.888599,0.000835,722,1,509,0.999915
61528,0.00787,0.084635,0.019144,0.883476,0.004876,1148,2,383,0.999913


In [508]:
top_recommendations["cosine_similarity"].mean()

0.9999394748188013

In [510]:
normalized_features = pd.DataFrame(columns=["normalized_listens","normalized_favorites","normalized_interest"],index=top_recommendations.index)

In [512]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
normalized_features['normalized_listens'] = scaler.fit_transform(top_recommendations[['listens']])[:, 0]
normalized_features['normalized_favorites'] = scaler.fit_transform(top_recommendations[['favorites']])[:, 0]
normalized_features['normalized_interest'] = scaler.fit_transform(top_recommendations[['interest']])[:, 0]


In [514]:
normalized_features

Unnamed: 0_level_0,normalized_listens,normalized_favorites,normalized_interest
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
90135,0.034249,0.166667,0.016648
63854,0.031197,0.166667,0.046862
70208,1.0,0.666667,1.0
130456,0.338081,0.333333,0.304353
130156,0.0,0.333333,0.0
11791,0.260766,1.0,0.657294
60020,0.024076,0.0,0.05685
136469,0.678366,0.666667,0.604144
113408,0.053917,0.166667,0.044148
61528,0.032553,0.333333,0.096683


In [516]:
top_recommendations["final_score"] = (0.5 * top_recommendations["cosine_similarity"]
                + 0.3 * normalized_features["normalized_listens"]
               + 0.1 * normalized_features["normalized_favorites"]
               + 0.1 * normalized_features["normalized_interest"])

In [518]:
top_recommendations

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity,final_score
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
90135,0.014396,0.081954,0.017012,0.884212,0.002427,499,1,393,0.999979,0.528596
63854,0.012927,0.077315,0.022762,0.883064,0.003932,744,1,375,0.999962,0.530693
70208,0.026665,0.081254,0.018679,0.870781,0.00262,8473,4,6089,0.999955,0.966644
130456,0.011975,0.084107,0.015618,0.886861,0.001439,2832,2,2185,0.99995,0.665168
130156,0.009753,0.084204,0.020548,0.88382,0.001675,364,2,191,0.999944,0.533305
11791,0.010922,0.076924,0.022336,0.887023,0.002794,5694,6,1729,0.999942,0.743931
60020,0.00898,0.08683,0.021992,0.881091,0.001108,825,0,333,0.999918,0.512867
136469,0.017945,0.071139,0.017675,0.889853,0.003387,5263,4,4192,0.999916,0.830549
113408,0.008246,0.084976,0.017344,0.888599,0.000835,722,1,509,0.999915,0.537214
61528,0.00787,0.084635,0.019144,0.883476,0.004876,1148,2,383,0.999913,0.552724


In [520]:
top_recommendations["final_score"].mean()

0.6401690286782732

In [522]:
final_recommendations = top_recommendations.sort_values("final_score", ascending=False)

In [524]:
final_recommendations.index

Index([70208, 136469, 11791, 130456, 61528, 113408, 130156, 63854, 90135,
       60020],
      dtype='int64', name='track_id')

In [526]:
tracks['track'].loc[final_recommendations.index]

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
70208,192000,0,,2012-09-19 15:35:43,NaT,191,4,Hip-Hop,"[21, 539]","[539, 21]",,8473,,Attribution-Noncommercial-No Derivative Works ...,6089,,2,,[],Bittersweet Insomnia
136469,320000,0,,2016-05-03 14:01:12,NaT,240,4,Hip-Hop,[21],[21],,5263,,Attribution-NonCommercial-ShareAlike,4192,,5,,[],Don't (Bonus Non LP Track)
11791,192000,0,,2009-04-30 15:03:51,NaT,114,6,Hip-Hop,[21],[21],,5694,en,Attribution-NonCommercial 3.0 International,1729,,22,,[],The Cuchifritos Groove Instrumental {Wilda...
130456,320000,0,,2016-01-08 13:04:00,NaT,189,2,Hip-Hop,[21],[21],,2832,,Attribution-NonCommercial-ShareAlike,2185,,15,,[],Til It Hurts (Munkee Mixx)
61528,320000,0,,2012-02-29 16:37:40,NaT,332,2,Hip-Hop,[21],[21],,1148,,Attribution-NonCommercial-ShareAlike 3.0 Inter...,383,,2,,[],Missing yoU
113408,320000,0,,2015-01-20 06:07:33,NaT,185,1,Hip-Hop,"[21, 539]","[539, 21]",,722,,Attribution-ShareAlike,509,,10,,[],Brick Wall
130156,320000,0,,2016-01-05 10:28:57,NaT,201,2,Hip-Hop,[21],[21],,364,,Attribution-NonCommercial-ShareAlike,191,,2,,[],Funky Motion (AUG 8th Mixx)
63854,320000,1,,2012-04-20 15:59:36,NaT,217,1,Hip-Hop,[21],[21],,744,,Attribution-NonCommercial-NoDerivatives (aka M...,375,,1,,[],The F***iteer
90135,320000,0,,2013-09-04 09:04:06,NaT,164,1,Hip-Hop,[21],[21],,499,,Attribution-NonCommercial-NoDerivatives (aka M...,393,,13,,[],A Mission (For My Sins)
60020,64000,0,,2012-02-07 16:20:29,NaT,272,0,Hip-Hop,[21],[21],,825,,Attribution-Noncommercial-Share Alike 3.0 Unit...,333,,0,,[],"Temptations- PeeWee, Anjelly Nice, CAPITAL, Faith"


In [528]:
track_name = tracks['track'].loc[final_recommendations.index]["title"]

In [530]:
artist_name = tracks['artist'].loc[final_recommendations.index]["name"]

In [532]:
genre_name = tracks['track'].loc[final_recommendations.index]["genre_top"]

In [534]:
track_name

track_id
70208                                  Bittersweet Insomnia
136469                           Don't (Bonus Non LP Track)
11791     The Cuchifritos Groove Instrumental     {Wilda...
130456                           Til It Hurts (Munkee Mixx)
61528                                           Missing yoU
113408                                           Brick Wall
130156                          Funky Motion (AUG 8th Mixx)
63854                                         The F***iteer
90135                               A Mission (For My Sins)
60020     Temptations- PeeWee, Anjelly Nice, CAPITAL, Faith
Name: title, dtype: object

In [536]:
artist_name

track_id
70208                 Spray Lodge
136469                      C-Doc
11791                   6th Sense
130456                      Pot-C
61528            Vlooper . Modlee
113408            Simon Panrucker
130156             Cheese N Pot-C
63854              Just Plain Ant
90135     Twenty-One: Twenty-Four
60020                     ZuKrewe
Name: name, dtype: object

In [538]:
genre_name

track_id
70208     Hip-Hop
136469    Hip-Hop
11791     Hip-Hop
130456    Hip-Hop
61528     Hip-Hop
113408    Hip-Hop
130156    Hip-Hop
63854     Hip-Hop
90135     Hip-Hop
60020     Hip-Hop
Name: genre_top, dtype: category
Categories (16, object): ['Blues', 'Classical', 'Country', 'Easy Listening', ..., 'Pop', 'Rock', 'Soul-RnB', 'Spoken']