In [7]:
import pandas as pd
import os
import ast

In [9]:
def load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


In [15]:
tracks = load("../../fma_metadata/tracks.csv")

In [17]:
classified_genre = pd.read_csv("../../Important Data/optimized_data_prob_final.csv",index_col=[0])

In [19]:
classified_genre

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.003317,0.011074,0.015218,0.968625,0.001766
3,0.043605,0.034076,0.113798,0.770304,0.038217
5,0.007539,0.017850,0.028582,0.936075,0.009954
10,0.187353,0.137673,0.620792,0.023519,0.030663
134,0.020442,0.027186,0.029139,0.916430,0.006803
...,...,...,...,...,...
155315,0.977319,0.005022,0.012689,0.001987,0.002984
155316,0.994743,0.000778,0.002519,0.000065,0.001896
155317,0.971450,0.009835,0.008622,0.000944,0.009149
155318,0.996034,0.001604,0.001805,0.000137,0.000421


In [21]:
track_recommendation_data = pd.DataFrame(tracks[tracks['track','genre_top'].isin(classified_genre.columns)]['track'][["interest","favorites","listens"]])

In [23]:
track_recommendation_data

Unnamed: 0_level_0,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,4656,2,1293
3,1470,1,514
5,1933,6,1151
10,54881,178,50135
134,1126,3,943
...,...,...,...
155315,153,1,128
155316,122,1,102
155317,194,1,165
155318,214,2,168


In [25]:
recommendation_data = classified_genre.merge(track_recommendation_data,left_index=True,right_index=True)

In [27]:
# recommendation_data.to_csv("optimized_recommendation_data.csv")

In [30]:
recommendation_data

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.003317,0.011074,0.015218,0.968625,0.001766,4656,2,1293
3,0.043605,0.034076,0.113798,0.770304,0.038217,1470,1,514
5,0.007539,0.017850,0.028582,0.936075,0.009954,1933,6,1151
10,0.187353,0.137673,0.620792,0.023519,0.030663,54881,178,50135
134,0.020442,0.027186,0.029139,0.916430,0.006803,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.977319,0.005022,0.012689,0.001987,0.002984,153,1,128
155316,0.994743,0.000778,0.002519,0.000065,0.001896,122,1,102
155317,0.971450,0.009835,0.008622,0.000944,0.009149,194,1,165
155318,0.996034,0.001604,0.001805,0.000137,0.000421,214,2,168


In [32]:
bank_acc_prob = pd.read_csv("bank_acc_prob.csv",index_col=[0])

In [34]:
bank_acc_prob

Unnamed: 0,0,1,2,3,4
0,0.037912,0.15467,0.036653,0.764886,0.005878


In [36]:
# interaction_score = recommendation_data["listens"] +  recommendation_data["favorites"] +  recommendation_data["interest"]

In [38]:
# interaction_score

In [40]:
# classified_genre.shape

In [42]:
from sklearn.metrics.pairwise import cosine_similarity

new_song_similarities = cosine_similarity(bank_acc_prob,classified_genre)

In [70]:
normalized_features["normalized_listens"].shape

(1,)

In [82]:
normalized_features = pd.DataFrame(columns=["normalized_listens","normalized_favorites","normalized_interest"])

In [86]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
normalized_features['normalized_listens'] = scaler.fit_transform(recommendation_data[['listens']])[:, 0]
normalized_features['normalized_favorites'] = scaler.fit_transform(recommendation_data[['favorites']])[:, 0]
normalized_features['normalized_interest'] = scaler.fit_transform(recommendation_data[['interest']])[:, 0]


In [92]:
final_scores = (0.5 * new_song_similarities[0]
                + 0.3 * normalized_features["normalized_listens"]
               + 0.1 * normalized_features["normalized_favorites"]
               + 0.1 * normalized_features["normalized_interest"])

In [116]:
top_indices = new_song_similarities[0].argsort()[-5:][::-1]
recommendations = recommendation_data.iloc[top_indices]

In [98]:
# np.argsort(new_song_similarities)[::-1][:10]

In [100]:
# interaction_score

In [102]:
# np.argsort(new_song_similarities)[::-1][0][:10].shape

In [104]:
# import numpy as np

# def mean_score_top_n(similarity_scores, interactions, N=10):
#     top_indices = np.argsort(new_song_similarities)[::-1][0][:10]
#     return pd.DataFrame(interactions).iloc[top_indices].mean()

# mean_score_model1 = mean_score_top_n(final_scores, interaction_score)
# # mean_score_model2 = mean_score_top_n(sim2, weighted_interactions)


In [106]:
# mean_score_model1

In [108]:
# import seaborn as sns
# sns.histplot(new_song_similarities.flatten())

In [120]:
recommendations

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
107024,0.02622,0.156436,0.050469,0.763469,0.003407,1656,4,1018
11780,0.033051,0.146381,0.056441,0.75763,0.006497,5087,4,1963
53997,0.016767,0.158677,0.036482,0.786197,0.001877,245,1,121
146681,0.048417,0.16641,0.04202,0.735476,0.007677,1601,2,1390
60518,0.025413,0.13954,0.040134,0.788245,0.006668,180,1,70


In [122]:
track_name = tracks['track'].loc[recommendations.index]["title"]

In [124]:
artist_name = tracks['artist'].loc[recommendations.index]["name"]

In [126]:
track_name

track_id
107024                                 LES GNOMES
11780     What Kinda Instrumental     {Both Nice}
53997                                  Phantastic
146681                                  moonrawks
60518                                 Eeekin Days
Name: title, dtype: object

In [128]:
artist_name

track_id
107024     Alaclair Ensemble
11780              6th Sense
53997         Just Plain Ant
146681    Tha Silent Partner
60518                   MUTE
Name: name, dtype: object