In [73]:
import pandas as pd
import os
import ast

In [75]:
def load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


In [124]:
tracks = load("../../../fma_metadata/tracks.csv")

In [126]:
tracks['track']

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2,256000,0,,2008-11-26 01:48:12,2008-11-26,168,2,Hip-Hop,[21],[21],,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
3,256000,0,,2008-11-26 01:48:14,2008-11-26,237,1,Hip-Hop,[21],[21],,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
5,256000,0,,2008-11-26 01:48:20,2008-11-26,206,6,Hip-Hop,[21],[21],,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World
10,192000,0,Kurt Vile,2008-11-25 17:49:06,2008-11-26,161,178,Pop,[10],[10],,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
20,256000,0,,2008-11-26 01:48:56,2008-01-01,311,0,,"[76, 103]","[17, 10, 76, 103]",,978,en,Attribution-NonCommercial-NoDerivatives (aka M...,361,,3,,[],Spiritual Level
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155316,320000,0,,2017-03-30 15:23:34,NaT,162,1,Rock,[25],"[25, 12]",,122,,Creative Commons Attribution-NonCommercial-NoD...,102,,3,,[],The Auger
155317,320000,0,,2017-03-30 15:23:36,NaT,217,1,Rock,[25],"[25, 12]",,194,,Creative Commons Attribution-NonCommercial-NoD...,165,,4,,[],Let's Skin Ruby
155318,320000,0,,2017-03-30 15:23:37,NaT,404,2,Rock,[25],"[25, 12]",,214,,Creative Commons Attribution-NonCommercial-NoD...,168,,6,,[],My House Smells Like Kim Deal/Pulp
155319,320000,0,,2017-03-30 15:23:39,NaT,146,0,Rock,[25],"[25, 12]",,336,,Creative Commons Attribution-NonCommercial-NoD...,294,,5,,[],The Man With Two Mouths


In [582]:
classified_top_genre = pd.read_csv("top_genre_prob_by_optimized_classification.csv",index_col=[0])

In [585]:
classified_top_genre

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515
3,0.045891,0.044842,0.106042,0.785288,0.017937
5,0.005435,0.020162,0.036643,0.930348,0.007411
10,0.162003,0.133598,0.663075,0.021528,0.019796
134,0.030814,0.061230,0.042618,0.854423,0.010914
...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475
155316,0.995178,0.000831,0.001596,0.000050,0.002345
155317,0.970530,0.010954,0.010181,0.000799,0.007536
155318,0.994592,0.002137,0.002263,0.000290,0.000719


In [587]:
classified_remaining_genre = pd.read_csv("../reclassified_remaining_genres.csv",index_col=[0])

In [589]:
all_genre_probs = pd.concat([classified_top_genre,classified_remaining_genre])

In [591]:
track_recommendation_data = pd.DataFrame(tracks['track'][["interest","favorites","listens"]])

In [593]:
track_recommendation_data

Unnamed: 0_level_0,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,4656,2,1293
3,1470,1,514
5,1933,6,1151
10,54881,178,50135
20,978,0,361
...,...,...,...
155316,122,1,102
155317,194,1,165
155318,214,2,168
155319,336,0,294


In [595]:
recommendation_data = classified_top_genre.merge(track_recommendation_data,left_index=True,right_index=True)

In [798]:
# recommendation_data.to_csv("base_recommendation_data_logistic.csv")

In [800]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,0.003639,0.010459,0.009752,0.974635,0.001515,4656,2,1293,0.999357
3,0.045891,0.044842,0.106042,0.785288,0.017937,1470,1,514,0.990057
5,0.005435,0.020162,0.036643,0.930348,0.007411,1933,6,1151,0.999056
10,0.162003,0.133598,0.663075,0.021528,0.019796,54881,178,50135,0.045363
134,0.030814,0.061230,0.042618,0.854423,0.010914,1126,3,943,0.998176
...,...,...,...,...,...,...,...,...,...
155315,0.973578,0.005273,0.014719,0.001956,0.004475,153,1,128,0.010702
155316,0.995178,0.000831,0.001596,0.000050,0.002345,122,1,102,0.008482
155317,0.970530,0.010954,0.010181,0.000799,0.007536,194,1,165,0.009772
155318,0.994592,0.002137,0.002263,0.000290,0.000719,214,2,168,0.008783


In [81]:
recomendation_data = pd.read_csv("basic_recommendation_data_logistic_new.csv",index_col="track_id")

In [83]:
recomendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.286368,0.016376,0.236220,0.124459,0.336576,4656,2,1293
3,0.317199,0.011163,0.440916,0.112801,0.117921,1470,1,514
5,0.550803,0.035311,0.181760,0.102528,0.129597,1933,6,1151
10,0.061826,0.014149,0.035781,0.052516,0.835728,54881,178,50135
134,0.264930,0.040877,0.170940,0.133790,0.389463,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.101480,0.056093,0.038208,0.052576,0.751643,153,1,128
155316,0.062029,0.079743,0.033572,0.039813,0.784843,122,1,102
155317,0.306270,0.099794,0.073537,0.114412,0.405988,194,1,165
155318,0.116667,0.054354,0.053267,0.055791,0.719921,214,2,168


In [85]:
bank_acc_prob = pd.read_csv("bank_acc_bad_prob_logistic.csv",index_col=[0])

In [87]:
bank_acc_prob

Unnamed: 0,0,1,2,3,4
0,0.322686,0.085995,0.198854,0.122059,0.270405


In [89]:
recomendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.286368,0.016376,0.236220,0.124459,0.336576,4656,2,1293
3,0.317199,0.011163,0.440916,0.112801,0.117921,1470,1,514
5,0.550803,0.035311,0.181760,0.102528,0.129597,1933,6,1151
10,0.061826,0.014149,0.035781,0.052516,0.835728,54881,178,50135
134,0.264930,0.040877,0.170940,0.133790,0.389463,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.101480,0.056093,0.038208,0.052576,0.751643,153,1,128
155316,0.062029,0.079743,0.033572,0.039813,0.784843,122,1,102
155317,0.306270,0.099794,0.073537,0.114412,0.405988,194,1,165
155318,0.116667,0.054354,0.053267,0.055791,0.719921,214,2,168


In [91]:
from sklearn.metrics.pairwise import cosine_similarity

new_song_similarities = cosine_similarity(bank_acc_prob,recomendation_data[recomendation_data.columns[:5]])
recomendation_data["cosine_similarity"] = new_song_similarities[0]

In [93]:
recomendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,0.286368,0.016376,0.236220,0.124459,0.336576,4656,2,1293,0.977861
3,0.317199,0.011163,0.440916,0.112801,0.117921,1470,1,514,0.853205
5,0.550803,0.035311,0.181760,0.102528,0.129597,1933,6,1151,0.895323
10,0.061826,0.014149,0.035781,0.052516,0.835728,54881,178,50135,0.634270
134,0.264930,0.040877,0.170940,0.133790,0.389463,1126,3,943,0.961703
...,...,...,...,...,...,...,...,...,...
155315,0.101480,0.056093,0.038208,0.052576,0.751643,153,1,128,0.682786
155316,0.062029,0.079743,0.033572,0.039813,0.784843,122,1,102,0.646365
155317,0.306270,0.099794,0.073537,0.114412,0.405988,194,1,165,0.938143
155318,0.116667,0.054354,0.053267,0.055791,0.719921,214,2,168,0.707479


In [95]:
top_recommendations = recomendation_data.nlargest(10, "cosine_similarity").copy()

In [97]:
top_recommendations

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1715,0.319068,0.100115,0.179326,0.128773,0.272718,745,0,262,0.998679
36720,0.311592,0.102912,0.206564,0.103824,0.275107,952,2,727,0.998281
679,0.314719,0.084966,0.20281,0.104597,0.292907,1104,0,315,0.99821
678,0.319833,0.092378,0.192345,0.102934,0.29251,693,0,91,0.998094
32661,0.325471,0.076648,0.2014,0.102308,0.294173,314,2,199,0.998058
129696,0.326548,0.085713,0.222761,0.104147,0.260832,1349,2,1180,0.99795
54877,0.31541,0.069659,0.199862,0.115678,0.299391,1862,3,419,0.997672
99089,0.33722,0.062987,0.192335,0.114106,0.293351,864,0,723,0.997657
84609,0.323938,0.106623,0.172096,0.12487,0.272474,171,1,143,0.997592
17369,0.339687,0.08995,0.17533,0.109359,0.285673,143,0,95,0.997587


In [99]:
top_recommendations["cosine_similarity"].mean()

0.9979779318928858

In [103]:
normalized_features = pd.DataFrame(columns=["normalized_listens","normalized_favorites","normalized_interest"],index=top_recommendations.index)

In [105]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
normalized_features['normalized_listens'] = scaler.fit_transform(top_recommendations[['listens']])[:, 0]
normalized_features['normalized_favorites'] = scaler.fit_transform(top_recommendations[['favorites']])[:, 0]
normalized_features['normalized_interest'] = scaler.fit_transform(top_recommendations[['interest']])[:, 0]


In [107]:
normalized_features

Unnamed: 0_level_0,normalized_listens,normalized_favorites,normalized_interest
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1715,0.157025,0.0,0.350204
36720,0.584022,0.666667,0.470622
679,0.205693,0.0,0.559046
678,0.0,0.0,0.319953
32661,0.099174,0.666667,0.099476
129696,1.0,0.666667,0.701571
54877,0.301194,1.0,1.0
99089,0.580349,0.0,0.41943
84609,0.04775,0.333333,0.016289
17369,0.003673,0.0,0.0


In [109]:
top_recommendations["final_score"] = (0.5 * top_recommendations["cosine_similarity"]
                + 0.3 * normalized_features["normalized_listens"]
               + 0.1 * normalized_features["normalized_favorites"]
               + 0.1 * normalized_features["normalized_interest"])

In [111]:
top_recommendations

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens,cosine_similarity,final_score
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1715,0.319068,0.100115,0.179326,0.128773,0.272718,745,0,262,0.998679,0.581467
36720,0.311592,0.102912,0.206564,0.103824,0.275107,952,2,727,0.998281,0.788076
679,0.314719,0.084966,0.20281,0.104597,0.292907,1104,0,315,0.99821,0.616718
678,0.319833,0.092378,0.192345,0.102934,0.29251,693,0,91,0.998094,0.531042
32661,0.325471,0.076648,0.2014,0.102308,0.294173,314,2,199,0.998058,0.605396
129696,0.326548,0.085713,0.222761,0.104147,0.260832,1349,2,1180,0.99795,0.935799
54877,0.31541,0.069659,0.199862,0.115678,0.299391,1862,3,419,0.997672,0.789194
99089,0.33722,0.062987,0.192335,0.114106,0.293351,864,0,723,0.997657,0.714876
84609,0.323938,0.106623,0.172096,0.12487,0.272474,171,1,143,0.997592,0.548083
17369,0.339687,0.08995,0.17533,0.109359,0.285673,143,0,95,0.997587,0.499895


In [114]:
top_recommendations["final_score"].mean()

0.6610546008773801

In [118]:
final_recommendations = top_recommendations.sort_values("final_score", ascending=False)

In [120]:
final_recommendations.index

Index([129696, 54877, 36720, 99089, 679, 32661, 1715, 84609, 678, 17369], dtype='int64', name='track_id')

In [128]:
tracks['track'].loc[final_recommendations.index]

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
129696,256256,0,,2015-12-22 15:15:11,NaT,145,2,Folk,"[17, 103]","[17, 103]",,1349,,Creative Commons Attribution-NonCommercial-NoD...,1180,,3,,[],Laws
54877,160000,1,,2011-10-18 12:02:16,NaT,257,3,Electronic,"[181, 236]","[236, 181, 15]",,1862,,Attribution-Noncommercial-Share Alike 3.0 Unit...,419,,5,,[acid],Isered
36720,64000,0,,2010-10-05 03:50:07,NaT,170,2,Rock,"[31, 314]","[314, 12, 31]",,952,,Attribution-NonCommercial-NoDerivatives (aka M...,727,,14,,"[horror, halloween]",RAW
99089,320000,0,,2014-02-13 12:07:00,NaT,100,0,Hip-Hop,[21],[21],,864,,Creative Commons Attribution-NonCommercial-NoD...,723,,16,,[],Give It to the DJ
679,256000,0,,2008-11-26 02:12:08,2008-11-26,340,0,Hip-Hop,[21],[21],,1104,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,315,,6,,[],Change Thoughts
32661,192000,0,,2010-07-09 12:31:20,NaT,209,2,Hip-Hop,[21],[21],,314,,Attribution-NonCommercial-NoDerivatives (aka M...,199,,4,,[],Cocaine Dreamers feat. Shane Laden
1715,256000,0,,2008-11-26 03:04:01,NaT,111,0,Rock,[12],[12],,745,en,Attribution-Noncommercial-Share Alike 3.0 Unit...,262,,6,,[],"Hell Yeah, Fuck Em"
84609,192000,0,,2013-05-22 16:30:23,NaT,395,1,Pop,[362],"[362, 10]",,171,,Attribution-Noncommercial-No Derivative Works ...,143,,5,,"[earth-enemy, synth pop, electronic]",Hills and Troughs
678,256000,0,,2008-11-26 02:12:04,2007-01-01,341,0,Hip-Hop,[21],[21],,693,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,91,,6,,[],Change Thoughts
17369,320000,0,,2009-07-27 00:05:31,NaT,127,0,Rock,[12],[12],,143,en,Attribution-NonCommercial-NoDerivatives (aka M...,95,,2,,"[clinical archives, rock, experimental]",Feast on your flesh


In [130]:
track_name = tracks['track'].loc[final_recommendations.index]["title"]

In [132]:
artist_name = tracks['artist'].loc[final_recommendations.index]["name"]

In [134]:
genre_name = tracks['track'].loc[final_recommendations.index]["genre_top"]

In [136]:
track_name

track_id
129696                                  Laws
54877                                 Isered
36720                                    RAW
99089                      Give It to the DJ
679                          Change Thoughts
32661     Cocaine Dreamers feat. Shane Laden
1715                      Hell Yeah, Fuck Em
84609                      Hills and Troughs
678                          Change Thoughts
17369                    Feast on your flesh
Name: title, dtype: object

In [138]:
artist_name

track_id
129696         Derek Clegg
54877          Global Goon
36720       Art Of Empathy
99089     The Impossebulls
679                Fanatic
32661        Fidel Cutstro
1715            Stress Ape
84609          EARTH-ENEMY
678                Fanatic
17369           Zack Kouns
Name: name, dtype: object

In [140]:
genre_name

track_id
129696          Folk
54877     Electronic
36720           Rock
99089        Hip-Hop
679          Hip-Hop
32661        Hip-Hop
1715            Rock
84609            Pop
678          Hip-Hop
17369           Rock
Name: genre_top, dtype: category
Categories (16, object): ['Blues', 'Classical', 'Country', 'Easy Listening', ..., 'Pop', 'Rock', 'Soul-RnB', 'Spoken']