In [2]:
import pandas as pd

In [4]:
df = pd.read_csv("optimized_recommendation_data.csv",index_col=[0])

In [6]:
df

Unnamed: 0_level_0,Rock,Electronic,Pop,Hip-Hop,Folk,interest,favorites,listens
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.003317,0.011074,0.015218,0.968625,0.001766,4656,2,1293
3,0.043605,0.034076,0.113798,0.770304,0.038217,1470,1,514
5,0.007539,0.017850,0.028582,0.936075,0.009954,1933,6,1151
10,0.187353,0.137673,0.620792,0.023519,0.030663,54881,178,50135
134,0.020442,0.027186,0.029139,0.916430,0.006803,1126,3,943
...,...,...,...,...,...,...,...,...
155315,0.977319,0.005022,0.012689,0.001987,0.002984,153,1,128
155316,0.994743,0.000778,0.002519,0.000065,0.001896,122,1,102
155317,0.971450,0.009835,0.008622,0.000944,0.009149,194,1,165
155318,0.996034,0.001604,0.001805,0.000137,0.000421,214,2,168


In [25]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.frequent_patterns import apriori, association_rules



# Interaction score
df['interaction_score'] = df[['interest', 'favorites', 'listens']].sum(axis=1)

# Genre columns
genre_cols = ['Rock', 'Electronic', 'Pop', 'Hip-Hop', 'Folk']

genre_probs = df[genre_cols].copy()
def top3_one_hot(row):
    top2 = row.nlargest(3).index
    binary = pd.Series(0, index=row.index)
    binary[top2] = 1
    return binary

df_genres_encoded = genre_probs.apply(top3_one_hot, axis=1)

# Run Apriori
frequent_items = apriori(df_genres_encoded, min_support=0.001, use_colnames=True)
rules = association_rules(frequent_items, metric='confidence', min_threshold=0.01)

# Filter tracks matching any Apriori antecedent
def matches_rules(row):
    for _, rule in rules.iterrows():
        if all(row[genre] == 1 for genre in rule['antecedents']):
            return True
    return False

filtered_track_ids = df_genres_encoded[df_genres_encoded.apply(matches_rules, axis=1)].index
filtered_df = df.loc[filtered_track_ids]

# New song genre probabilities (sample Hip-Hop heavy)
new_song = pd.read_csv("central_cee_hiphop_prob.csv",index_col=[0])

# Cosine similarity
dataset_vecs = filtered_df[genre_cols].values
interaction_scores = filtered_df['interaction_score'].values





In [41]:
rules.sort_values(by='support', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
2,(Rock),(Pop),0.695307,0.939235,0.636775,0.915819,0.975069,1.0,-0.016281,0.721837,-0.077419,0.638201,-0.385354,0.796896
3,(Pop),(Rock),0.939235,0.695307,0.636775,0.677972,0.975069,1.0,-0.016281,0.946170,-0.296159,0.638201,-0.056892,0.796896
9,(Electronic),(Pop),0.654642,0.939235,0.596265,0.910827,0.969754,1.0,-0.018597,0.681426,-0.082830,0.597693,-0.467511,0.772834
8,(Pop),(Electronic),0.939235,0.654642,0.596265,0.634841,0.969754,1.0,-0.018597,0.945776,-0.339185,0.597693,-0.057333,0.772834
0,(Rock),(Electronic),0.695307,0.654642,0.358479,0.515569,0.787559,1.0,-0.096698,0.712915,-0.469581,0.361563,-0.402691,0.531582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,(Hip-Hop),"(Pop, Folk)",0.349856,0.338131,0.008530,0.024382,0.072107,1.0,-0.109767,0.678410,-0.951907,0.012554,-0.474035,0.024804
51,"(Folk, Hip-Hop)",(Rock),0.013152,0.695307,0.002388,0.181604,0.261185,1.0,-0.006756,0.372305,-0.741362,0.003383,-1.685969,0.092519
50,"(Rock, Hip-Hop)",(Folk),0.089457,0.360960,0.002388,0.026699,0.073967,1.0,-0.029902,0.656569,-0.932202,0.005331,-0.523068,0.016658
64,"(Folk, Hip-Hop)",(Electronic),0.013152,0.654642,0.002233,0.169811,0.259396,1.0,-0.006376,0.416000,-0.743139,0.003356,-1.403849,0.086611


In [27]:
similarities = cosine_similarity(new_song, dataset_vecs)[0]

# Top-N Recommendations
N = 5
top_n_indices = np.argsort(similarities)[-N:][::-1]
top_n_tracks = filtered_df.iloc[top_n_indices]
top_n_scores = interaction_scores[top_n_indices]

# Evaluation
top_n_mean_interaction_score = np.mean(top_n_scores)

# Print
print("Top-N Mean Interaction Score:", top_n_mean_interaction_score)
print("\nRecommended Tracks:\n", top_n_tracks)

Top-N Mean Interaction Score: 4483.4

Recommended Tracks:
               Rock  Electronic       Pop   Hip-Hop      Folk  interest  \
track_id                                                                 
804       0.282178    0.268589  0.164715  0.174008  0.110510       608   
148250    0.354311    0.219883  0.135733  0.183532  0.106543       154   
129234    0.346686    0.238961  0.149725  0.188304  0.076324      1852   
47917     0.318186    0.265131  0.150428  0.215362  0.050893      3337   
55717     0.271854    0.252538  0.193714  0.207852  0.074042      7887   

          favorites  listens  interaction_score  
track_id                                         
804               3      218                829  
148250            0      117                271  
129234            0     1671               3523  
47917             8     1624               4969  
55717            21     4917              12825  
