In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import csc_matrix
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
import json 
from scipy.stats import pearsonr
from numpy import dot
from numpy.linalg import norm

In [None]:
df = pd.read_csv('filtered_triplets.csv')
# df = pd.read_csv('max_normalized_triplets.csv')
songs = sorted(list(set(df['song'])))
users = sorted(list(set(df['user'])))

In [None]:
a_file = open('item_item.json','r')
item_item = json.load(a_file)
a_file.close()  
a_file = open('user_user.json','r')
user_user = json.load(a_file)
a_file.close()   

In [None]:
user_id_index = {}
song_id_index = {}
c = 0
for i in users:
    user_id_index[i] = c
    c+=1 
c = 0
for i in songs:
    song_id_index[i] = c
    c+=1

In [None]:
a_file = open('song_search.json','r')
song_name = json.load(a_file)
a_file.close()

In [None]:
with open("/content/drive/MyDrive/Metadata/full_normalized_song_features_128.json") as fp:
    track_feature_dic = json.load(fp)
with open("/content/drive/MyDrive/Metadata/weighted_normalized_sum_user_features_128.json") as fp:
    user_feature_dic = json.load(fp)

Item-item Collabrative Filtering Implementation

In [None]:
def create_vec_song(song):
    vec = np.zeros((len(users),),dtype='float32')
    for i in item_item[song]:
        vec[user_id_index[i]] = item_item[song][i]
    return vec

def create_vec_user(user):
    vec = np.zeros((len(songs),),dtype='float32')
    for i in user_user[user]:
        vec[song_id_index[i]] = user_user[user][i]
    return vec


def pearson_correlation(song1_vec, song2_vec):
    return pearsonr(song1_vec,song2_vec)[0]

def cosine_sim(song1_vec, song2_vec):
    return dot(song1_vec, song2_vec)/(norm(song1_vec)*norm(song2_vec))
    
def song_vector_cosine(song1, song2, part="f"): 
    if part == "m":
        return cosine_sim(track_feature_dic[song1][:64], track_feature_dic[song2][:64])
    elif part == "a":
        return cosine_sim(track_feature_dic[song1][64:], track_feature_dic[song2][64:])
    return cosine_sim(track_feature_dic[song1], track_feature_dic[song2])

def user_vector_cosine(user1, user2):
    return cosine_sim(user_feature_dic[user1], user_feature_dic[user2])

def compute_score(user, song, met=1, mode="II"):
    score_num = 0
    score_denom = 0
    if mode == "II":
        song1_vec = create_vec_song(song)
        for i in user_user[user]:
            if i == song:
                continue
            if met == 0:
                sim = pearson_correlation(song1_vec,create_vec_song(i))
            elif met == 1:
                sim = cosine_sim(song1_vec,create_vec_song(i))
            else:
                sim = song_vector_cosine(song, i)
            score_num += sim * user_user[user][i]
            score_denom += sim
    else:
        user1_vector = create_vec_user(user)
        for i in item_item[song]:
            if i == user:
                continue
            if met == 0:
                sim = pearson_correlation(user1_vector,create_vec_user(i))
            elif met == 1:
                sim = cosine_sim(user1_vector,create_vec_user(i))
            else:
                sim = user_vector_cosine(user, i)
            score_num += sim * item_item[song][i]
            score_denom += sim
    if score_denom == 0:
        return 0
    return score_num / score_denom

In [None]:
song_song_similarity = cosine_similarity(np.array([track_feature_dic[i] for i in sorted(track_feature_dic)]))

Sort the recommendations based on CF score and mean similarity

In [None]:
def compute_metric(user):
    big_dic = {}
    count = 0
    for i in songs:
        big_dic[count] = {"song" : i, "play_count" : compute_score(user, i)}
        count += 1
    df_user = pd.DataFrame.from_dict(big_dic, orient='index')
    df_user = df_user.sort_values(by='play_count', ascending=False)
    actual = set(user_user[user])
    k=100
    actual_songs = user_user[user]
    num=len(actual_songs)
    mean_sim={}
    count=0
    for ind,row in df_user.iterrows():
        if count == k:
            break
        # print(ind)
        i = song_id_index[row['song']]
        sim=list(song_song_similarity[i])
        avg_sim=0
        for song in actual_songs:
            song_index = song_id_index[song]
            
            avg_sim+=sim[song_index]
            # print(avg_sim)
        mean_sim[row["song"]] = avg_sim/num  
        count+=1
    return mean_sim

In [None]:
with open("/content/drive/My Drive/Recommendations/content_top100.json","r") as f:
    content_rec = json.load(f)
def gen_top10(user):
    big_dic = []
    for i in songs:        
        big_dic.append((i,compute_score(user, i, met=2)))
    return sorted(big_dic, key = lambda x: x[1] , reverse = True)[:100]
global_count = 0
for user in tqdm(users , ncols = 100):
    if user in content_rec:
        continue
    content_rec[user] = gen_top10(user)   
    global_count += 1
    if global_count % 100 == 0:
        with open("/content/drive/My Drive/Recommendations/content_top100.json","w") as f:
            json.dump(content_rec,f)

In [None]:
with open("/content/drive/My Drive/Recommendations/content_top100.json","w") as f:
    json.dump(content_rec,f)

In [None]:
with open("/content/drive/My Drive/Recommendations/content_top100.json","r") as f:
    content_rec = json.load(f)
content_top10 = {}
for i in content_rec:
    content_top10[i] = [j[0] for j in content_rec[i][:10]]
with open("/content/drive/My Drive/Recommendations/content_top10.json","w") as f:
    json.dump(content_top10, f)

In [None]:
with open("/content/drive/My Drive/Recommendations/content_top100.json","w") as f:
    json.dump(content_rec,f)

In [None]:
len(content_rec)

In [None]:
with open("/content/drive/My Drive/Recommendations/cf_top10.json","w") as f:
    json.dump(cf_rec,f)

In [None]:
def top_10_songs(top_100):
    top_10_sorted = []
    count = 0
    for i in sorted(top_100.items(), key=lambda item: item[1], reverse=True):
        top_10_sorted.append(i[0])
        count += 1
        if count == 10:
            break
    return top_10_sorted

In [None]:
song_song_similarity = cosine_similarity(np.array([track_feature_dic[i] for i in sorted(track_feature_dic)]))
def make_recommendations(user,k_final):
    actual = set(user_user[user])
    k=100
    top_k_songs = [j[0] for j in final_recommendation_dic[user][:k]]
    actual_songs = user_user[user]
    num=len(actual_songs)
    mean_sim={}
    for row_song in top_k_songs:
        i = song_id_index[row_song]
        avg_sim = 0
        for song in actual_songs:

            avg_sim += song_song_similarity[i][song_id_index[song]]
            
        mean_sim[row_song] = avg_sim/num

    return [i[0] for i in sorted(mean_sim.items(), key=lambda item: item[1], reverse=True)[:k_final]]

Generating top 10 recommendations for all users for all models considered

In [None]:
with open("/content/drive/MyDrive/Recommendations/content_top100.json", "r") as fp:
    final_recommendation_dic = json.load(fp)
cf_top10_dic = {}
k = 10
for i in tqdm(final_recommendation_dic,ncols=100):
    cf_top10_dic[i] = make_recommendations(i,k)

with open("/content/drive/MyDrive/Recommendations/content_top10_sim.json", "w") as fp:
    json.dump(cf_top10_dic, fp)

In [None]:
with open("/content/drive/MyDrive/neural_graph_collaborative_filtering-master/NGCF/ngcf_embedding_recs.json", "r") as fp:
    final_recommendation_dic = json.load(fp)
ngcf_dic = {}
count = 0
k = 10
for i in tqdm(final_recommendation_dic,ncols=100):
    ngcf_dic[count] = {'user':i,'ngcf_embedding_predictions':make_recommendations(i,k)}
    count+=1
ngcf_recs = pd.DataFrame.from_dict(ngcf_dic, orient='index')
ngcf_recs.to_csv("ngcf_embedding_top10.csv", index=False)

In [None]:
with open("/content/drive/MyDrive/neural_graph_collaborative_filtering-master/NGCF/ngcf_recs.json", "r") as fp:
    final_recommendation_dic = json.load(fp)
ngcf_dic = {}
count = 0
k = 10
for i in tqdm(final_recommendation_dic,ncols=100):
    ngcf_dic[count] = {'user':i,'ngcf_predictions':[j[0] for j in final_recommendation_dic[i][:10]]}
    count+=1
ngcf_recs = pd.DataFrame.from_dict(ngcf_dic, orient='index')
ngcf_recs.to_csv("ngcf_top10_unsorted.csv", index=False)

In [None]:
with open("/content/drive/MyDrive/neural_graph_collaborative_filtering-master/NGCF/ngcf_embedding_recs.json", "r") as fp:
    final_recommendation_dic = json.load(fp)
ngcf_dic = {}
count = 0
k = 10
for i in tqdm(final_recommendation_dic,ncols=100):
    ngcf_dic[count] = {'user':i,'ngcf_embedding_predictions':[j[0] for j in final_recommendation_dic[i][:10]]}
    count+=1
ngcf_recs = pd.DataFrame.from_dict(ngcf_dic, orient='index')
ngcf_recs.to_csv("ngcf_embedding_top10_unsorted.csv", index=False)

In [None]:
with open("/content/drive/MyDrive/neural_graph_collaborative_filtering-master/NGCF/ngcf_recs.json", "r") as fp:
    final_recommendation_dic = json.load(fp)
ngcf_dic = {}
count = 0
k = 10
for i in tqdm(final_recommendation_dic,ncols=100):
    ngcf_dic[count] = {'user':i,'ngcf_predictions':[j[0] for j in final_recommendation_dic[i]]}
    count+=1
ngcf_recs = pd.DataFrame.from_dict(ngcf_dic, orient='index')
ngcf_recs.to_csv("ngcf_top100.csv", index=False)

In [None]:
with open("/content/drive/MyDrive/neural_graph_collaborative_filtering-master/NGCF/ngcf_embedding_recs.json", "r") as fp:
    final_recommendation_dic = json.load(fp)
ngcf_dic = {}
count = 0
k = 10
for i in tqdm(final_recommendation_dic,ncols=100):
    ngcf_dic[count] = {'user':i,'ngcf_embedding_predictions':[j[0] for j in final_recommendation_dic[i]]}
    count+=1
ngcf_recs = pd.DataFrame.from_dict(ngcf_dic, orient='index')
ngcf_recs.to_csv("ngcf_embedding_top100.csv", index=False)