In [3]:
import pandas as pd
import os
import ast

In [5]:
def load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


In [7]:
tracks = load("../../fma_metadata/tracks.csv")

In [8]:
tracks['artist','location'].head()

track_id
2             New Jersey
3             New Jersey
5             New Jersey
10                   NaN
20    Colchester England
Name: (artist, location), dtype: object

In [9]:
tracks.columns

MultiIndex([( 'album',          'comments'),
            ( 'album',      'date_created'),
            ( 'album',     'date_released'),
            ( 'album',          'engineer'),
            ( 'album',         'favorites'),
            ( 'album',                'id'),
            ( 'album',       'information'),
            ( 'album',           'listens'),
            ( 'album',          'producer'),
            ( 'album',              'tags'),
            ( 'album',             'title'),
            ( 'album',            'tracks'),
            ( 'album',              'type'),
            ('artist', 'active_year_begin'),
            ('artist',   'active_year_end'),
            ('artist', 'associated_labels'),
            ('artist',               'bio'),
            ('artist',          'comments'),
            ('artist',      'date_created'),
            ('artist',         'favorites'),
            ('artist',                'id'),
            ('artist',          'latitude'),
          

In [10]:
classified_genre = pd.read_csv("classified_genre_prob.csv",index_col=[0])

In [11]:
classified_genre

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.005851,0.013714,0.023583,0.955960,0.000892
3,0.021971,0.014995,0.019521,0.936926,0.006587
5,0.006948,0.024298,0.015383,0.950364,0.003008
10,0.224020,0.200328,0.513168,0.040765,0.021719
20,0.205208,0.215821,0.210499,0.039462,0.329010
...,...,...,...,...,...
155315,0.930538,0.010578,0.040912,0.013743,0.004229
155316,0.978195,0.003671,0.011807,0.000231,0.006095
155317,0.973773,0.013799,0.007568,0.000434,0.004426
155318,0.989820,0.005115,0.003643,0.000593,0.000830


In [165]:
X_features_all = pd.read_csv("X_features_all.csv",header=[0,1,2],index_col=[0])

In [167]:
X_features_all

Unnamed: 0_level_0,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec
Unnamed: 0_level_1,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,std,std,std,std,std,std,std
Unnamed: 0_level_2,01,02,03,04,05,06,07,08,09,10,...,90,91,92,93,94,95,96,97,98,99
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2,7.180653,5.230309,0.249321,1.347620,1.482478,0.531371,1.481593,2.691455,0.866868,1.341231,...,0.005500,0.004066,0.003965,0.002347,0.001663,0.002632,0.001452,0.000114,0.000002,0.000002
3,1.888963,0.760539,0.345297,2.295201,1.654031,0.067592,1.366848,1.054094,0.108103,0.619185,...,0.014708,0.019320,0.078528,0.072792,0.069122,0.013616,0.008447,0.001303,0.000003,0.000002
5,0.527563,-0.077654,-0.279610,0.685883,1.937570,0.880839,-0.923192,-0.927232,0.666617,1.038546,...,0.000010,0.000009,0.000008,0.000006,0.000005,0.000007,0.000004,0.000002,0.000002,0.000001
10,3.702245,-0.291193,2.196742,-0.234449,1.367364,0.998411,1.770694,1.604566,0.521217,1.982386,...,0.018939,0.015547,0.010535,0.006567,0.006368,0.003965,0.003788,0.002343,0.001213,0.000316
20,-0.193837,-0.198527,0.201546,0.258556,0.775204,0.084794,-0.289294,-0.816410,0.043851,-0.804761,...,0.008917,0.008339,0.006849,0.006172,0.005579,0.004760,0.004246,0.004115,0.003781,0.003704
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155315,-0.319850,0.653044,1.300360,-0.606061,-1.109302,-1.006683,0.059643,0.472580,-0.856436,-0.524676,...,0.000922,0.000430,0.000138,0.000019,0.000003,0.000004,0.000004,0.000003,0.000003,0.000002
155316,-0.490129,0.463834,2.321970,-0.084352,1.662914,2.115189,-0.237794,5.695442,0.830353,1.951819,...,0.001007,0.000663,0.000429,0.000345,0.000208,0.000123,0.000121,0.000038,0.000002,0.000001
155317,-0.461559,-0.229601,-0.496632,-0.422033,0.130612,-0.263825,-0.628103,-0.082687,-0.229483,-0.492753,...,0.002338,0.001594,0.001051,0.000818,0.000603,0.000557,0.000391,0.000214,0.000067,0.000098
155318,0.552473,-0.110498,-0.532014,0.263131,-0.224011,-0.530972,1.713526,1.418444,1.325197,0.120333,...,0.003115,0.002223,0.001846,0.001351,0.000899,0.000688,0.000458,0.000194,0.000042,0.000019


In [264]:
top2 = classified_genre.apply(lambda row: row.nlargest(2).index, axis=1)

# Now create a boolean DataFrame
top2_classified_genre = pd.DataFrame(0, index=classified_genre.index, columns=classified_genre.columns)

for i in classified_genre.index:
    top2_classified_genre.loc[i, top2.loc[i]] = 1

In [268]:
track_recommendation_data = pd.DataFrame(tracks[tracks['track','genre_top'].isin(classified_genre.columns)]['track'][["interest","favorites","listens"]])

In [270]:
track_recommendation_data

Unnamed: 0_level_0,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,4656,2,1293
3,1470,1,514
5,1933,6,1151
10,54881,178,50135
134,1126,3,943
...,...,...,...
155315,153,1,128
155316,122,1,102
155317,194,1,165
155318,214,2,168


In [14]:
# track_recommendation_data_bool = pd.DataFrame(index=track_recommendation_data.index,columns = track_recommendation_data.columns)
# for col in track_recommendation_data.columns:
#     track_recommendation_data_bool[col] = track_recommendation_data[col].map(lambda x: 1 if x>=track_recommendation_data[col].mean() else 0)

In [15]:
# track_recommendation_data_bool


In [272]:
recommendation_data = top2_classified_genre.merge(track_recommendation_data,left_index=True,right_index=True)

In [274]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0,0,1,1,0,4656,2,1293
3,1,0,0,1,0,1470,1,514
5,0,1,0,1,0,1933,6,1151
10,1,0,1,0,0,54881,178,50135
134,0,1,0,1,0,1126,3,943
...,...,...,...,...,...,...,...,...
155315,1,0,1,0,0,153,1,128
155316,1,0,1,0,0,122,1,102
155317,1,1,0,0,0,194,1,165
155318,1,1,0,0,0,214,2,168


In [277]:
recommendation_data.isna().sum()

Rock          0
Electronic    0
Pop           0
Hip-Hop       0
Folk          0
interest      0
favorites     0
listens       0
dtype: int64

In [279]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0,0,1,1,0,4656,2,1293
3,1,0,0,1,0,1470,1,514
5,0,1,0,1,0,1933,6,1151
10,1,0,1,0,0,54881,178,50135
134,0,1,0,1,0,1126,3,943
...,...,...,...,...,...,...,...,...
155315,1,0,1,0,0,153,1,128
155316,1,0,1,0,0,122,1,102
155317,1,1,0,0,0,194,1,165
155318,1,1,0,0,0,214,2,168


In [309]:
bank_acc = pd.read_csv("bank_acc.csv",index_col=[0],header=[0,1,2])

In [2]:
from sklearn.metrics.pairwise import cosine_similarity

# genre_columns=["Rock","Electronic","Pop","Hip-Hop","Folk"]
new_song_similarities = cosine_similarity(bank_acc,L=\)

SyntaxError: unexpected character after line continuation character (1986641518.py, line 4)

In [4]:
pd.DataFrame(new_song_similarities)

NameError: name 'pd' is not defined

In [146]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
recommendation_data['normalized_listens'] = scaler.fit_transform(recommendation_data[['listens']])
recommendation_data['normalized_favorites'] = scaler.fit_transform(recommendation_data[['favorites']])
recommendation_data['normalized_interest'] = scaler.fit_transform(recommendation_data[['interest']])

In [149]:
final_scores = (0.6 * new_song_similarities[0] +
                0.1 * recommendation_data['normalized_listens'] +
                0.2 * recommendation_data['normalized_favorites'] +
                0.1 * recommendation_data['normalized_interest'])

In [315]:
top_indices = new_song_similarities[0].argsort()[-5:][::-1]
recommendations = X_features_all.iloc[top_indices]

In [317]:
recommendations.index

Index([33690, 73539, 88090, 88047, 84520], dtype='int64', name='track_id')

In [319]:
tracks['track'].loc[recommendations.index]

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
33690,320000,0,,2010-08-05 14:43:26,NaT,5,0,,"[15, 27, 183]","[183, 27, 12, 15]",,2476,,Attribution-NonCommercial-ShareAlike 3.0 Inter...,440,,5,,[],fortloop
73539,73324,0,,2012-12-06 21:17:00,NaT,353,3,,"[27, 94, 107]","[107, 12, 17, 1235, 27, 94]",,25340,,Attribution-Noncommercial-Share Alike 3.0 Unit...,24004,,5,,[],autiste de tourisme
88090,242920,0,,2013-08-04 10:33:06,NaT,1,1,,"[17, 362, 440]","[362, 10, 12, 17, 440]",,1411,,Attribution-Noncommercial-Share Alike 3.0 Unit...,856,,17,,[],M.R.T.G.B.P.S.D.T.V.N.C.
88047,247912,0,,2013-08-04 09:53:16,NaT,1,0,,"[17, 362, 440]","[362, 10, 12, 17, 440]",,6164,,Attribution-Noncommercial-Share Alike 3.0 Unit...,5036,,18,,[],C'est bientôt noël ou quoi ?
84520,320000,0,,2013-05-21 13:48:27,NaT,131,3,Hip-Hop,[21],[21],,1223,,Attribution-NonCommercial-NoDerivatives (aka M...,961,,6,,[],Slow Head B4 Bed


In [321]:
tracks['artist'].loc[recommendations.index]

Unnamed: 0_level_0,active_year_begin,active_year_end,associated_labels,bio,comments,date_created,favorites,id,latitude,location,longitude,members,name,related_projects,tags,website,wikipedia_page
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
33690,NaT,NaT,,<p>Brooklyn based producer of lo-fi ship shape...,0,2010-08-05 14:43:46,12,8345,40.714353,New York,-74.005973,,Don Trust,,[don trust],http://dontrust.bandcamp.com/,
73539,NaT,NaT,,,0,2012-12-06 21:18:11,0,14960,,,,,Vespre,,[vespre],,
88090,NaT,NaT,,,0,2010-06-19 02:20:09,14,7824,,Montreal,,,RÉGIS VICTOR,,[],http://vegisrictor.vacau.com/,
88047,NaT,NaT,,,0,2010-06-19 02:20:09,14,7824,,Montreal,,,RÉGIS VICTOR,,[],http://vegisrictor.vacau.com/,
84520,NaT,NaT,,"<p>Originally from Luton, UK - now living in M...",6,2009-05-19 09:15:44,59,3593,42.358431,"Boston, MA",-71.059773,,Tha Silent Partner,,[tha silent partner],http://thasilentpartner.net/,
