In [1]:
import pandas as pd

In [39]:
df = pd.read_csv("base_recommendation_data.csv",index_col=[0])

In [41]:
df

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.003852,0.005550,0.005266,0.984290,0.001042,4656,2,1293
3,0.005080,0.005721,0.008029,0.976267,0.004902,1470,1,514
5,0.001798,0.011358,0.007218,0.978299,0.001327,1933,6,1151
10,0.077985,0.412050,0.500906,0.002317,0.006742,54881,178,50135
134,0.016695,0.003444,0.001296,0.978429,0.000135,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.996664,0.002313,0.000518,0.000307,0.000198,153,1,128
155316,0.999386,0.000134,0.000190,0.000005,0.000285,122,1,102
155317,0.987135,0.010368,0.000806,0.000102,0.001588,194,1,165
155318,0.999207,0.000713,0.000058,0.000012,0.000011,214,2,168


In [116]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.frequent_patterns import apriori, association_rules



# Interaction score
df['interaction_score'] = df[['interest', 'favorites', 'listens']].sum(axis=1)

# Genre columns
genre_cols = ['Rock', 'Electronic', 'Pop', 'Hip-Hop', 'Folk']

genre_probs = df[genre_cols].copy()
def top2_one_hot(row):
    top2 = row.nlargest(3).index
    binary = pd.Series(0, index=row.index)
    binary[top2] = 1
    return binary

df_genres_encoded = genre_probs.apply(top2_one_hot, axis=1)

# Run Apriori
frequent_items = apriori(df_genres_encoded, min_support=0.001, use_colnames=True)
rules = association_rules(frequent_items, metric='confidence', min_threshold=0.2)

# Filter tracks matching any Apriori antecedent
def matches_rules(row):
    for _, rule in rules.iterrows():
        if all(row[genre] == 1 for genre in rule['antecedents']):
            return True
    return False

filtered_track_ids = df_genres_encoded[df_genres_encoded.apply(matches_rules, axis=1)].index
filtered_df = df.loc[filtered_track_ids]

# New song genre probabilities (sample Hip-Hop heavy)
new_song = pd.read_csv("central_cee_hiphop_prob.csv",index_col=[0])

# Cosine similarity
dataset_vecs = filtered_df[genre_cols].values
interaction_scores = filtered_df['interaction_score'].values





In [130]:
rules.sort_values(by='support', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Electronic),(Rock),0.795323,0.858968,0.655749,0.824507,0.95988,1.0,-0.027408,0.803629,-0.169579,0.656706,-0.244355,0.793961
1,(Rock),(Electronic),0.858968,0.795323,0.655749,0.763414,0.95988,1.0,-0.027408,0.86513,-0.228612,0.656706,-0.155896,0.793961
2,(Rock),(Pop),0.858968,0.773425,0.63413,0.738247,0.954516,1.0,-0.030217,0.865604,-0.252548,0.635234,-0.155263,0.779073
3,(Pop),(Rock),0.773425,0.858968,0.63413,0.819899,0.954516,1.0,-0.030217,0.783069,-0.173767,0.635234,-0.277026,0.779073
8,(Electronic),(Pop),0.795323,0.773425,0.573121,0.720615,0.931719,1.0,-0.042001,0.810976,-0.263652,0.575639,-0.233082,0.730816
9,(Pop),(Electronic),0.773425,0.795323,0.573121,0.741017,0.931719,1.0,-0.042001,0.790312,-0.244398,0.575639,-0.265324,0.730816
16,"(Electronic, Rock)",(Pop),0.655749,0.773425,0.435284,0.663797,0.858257,1.0,-0.071888,0.673923,-0.324208,0.43796,-0.483849,0.613299
21,(Pop),"(Electronic, Rock)",0.773425,0.655749,0.435284,0.562801,0.858257,1.0,-0.071888,0.787401,-0.421601,0.43796,-0.270001,0.613299
19,(Electronic),"(Rock, Pop)",0.795323,0.63413,0.435284,0.547305,0.86308,1.0,-0.069054,0.808204,-0.436645,0.437837,-0.237312,0.616866
18,"(Rock, Pop)",(Electronic),0.63413,0.795323,0.435284,0.686427,0.86308,1.0,-0.069054,0.652726,-0.302456,0.437837,-0.532036,0.616866


In [118]:
similarities = cosine_similarity(new_song, dataset_vecs)[0]

# Top-N Recommendations
N = 5
top_n_indices = np.argsort(similarities)[-N:][::-1]
top_n_tracks = filtered_df.iloc[top_n_indices]
top_n_scores = interaction_scores[top_n_indices]

# Evaluation
top_n_mean_interaction_score = np.mean(top_n_scores)

# Print
print("Top-N Mean Interaction Score:", top_n_mean_interaction_score)
print("\nRecommended Tracks:\n", top_n_tracks)

Top-N Mean Interaction Score: 8413.0

Recommended Tracks:
               Rock  Electronic       Pop   Hip-Hop      Folk  interest  \
track_id                                                                 
9854      0.364746    0.277586  0.156572  0.130872  0.070225       703   
128709    0.282202    0.280077  0.158949  0.246961  0.031811      9312   
64995     0.263233    0.200057  0.158836  0.282118  0.095756     14001   
83229     0.307430    0.163581  0.194597  0.228911  0.105481       517   
129234    0.373601    0.279812  0.162009  0.167174  0.017404      1852   

          favorites  listens  interaction_score  
track_id                                         
9854              2      292                997  
128709            7     4152              13471  
64995             8     9110              23119  
83229             0      438                955  
129234            0     1671               3523  
