# Implicit Recommendation System
In this script we are calculating the recommendations based on spotlights and saved music 

In [15]:
# Load packages
import pandas as pd
import numpy as np
from math import sqrt
import breakinghits_script as bh
import pickle
from scipy.sparse import csr_matrix

In [16]:
# first load the necessary data
spotlights = bh.get_spotlights() # dataframe of user spotlighted songs
saves = bh.get_saves()           # dataframe of user saved songs

In [17]:
def get_sparse(s):
    # function that creates the sparse matrix
    X = s
    s.replace('', np.nan,inplace=True)
    s.dropna(axis=0, inplace=True)
    X['rating'] = np.ones((len(X),1))
    
    user_list = bh.pull_dataframe('select * from users')['id']
    song_list = bh.pull_dataframe('select * from user_musics')['id']
    
    ratemat = pd.DataFrame(np.zeros((len(user_list), len(song_list))), index=list(user_list), columns=list(song_list))

    for i in range(len(s)):
        if (float(s.iloc[i,0]) in list(user_list)):
            if (float(s.iloc[i,1]) in list(song_list)):
                ratemat.loc[float(s.iloc[i,0]), float(s.iloc[i,1])] = 1;
    
    return(ratemat)

In [18]:
sparse_spots = get_sparse(spotlights)
sparse_saves = get_sparse(saves)

In [19]:
sparse_spots.shape

(407, 56)

In [20]:
sparse_saves.shape

(407, 56)

In [21]:
def jaccard_similarities(sparse_data_frame):
    mat = csr_matrix(sparse_data_frame)
    cols_sum = mat.getnnz(axis=0)
    ab = mat.T * mat

    # for rows
    aa = np.repeat(cols_sum, ab.getnnz(axis=0))
    # for columns
    bb = cols_sum[ab.indices]

    similarities = ab.copy()
    similarities.data = similarities.data / (aa + bb - ab.data)
    sim = pd.DataFrame(data=similarities.toarray(), index= sparse_data_frame.columns, columns= sparse_data_frame.columns)
    return(sim)

In [22]:
def jaccard_implicit_recommender(sparse_matrix,user):
    
    data_matrix2 = jaccard_similarities(sparse_matrix);
    data2 = sparse_matrix.copy()
    data2['user'] = sparse_matrix.index.values
    data_matrix2 = jaccard_similarities(sparse_matrix)
    data_items2 = sparse_matrix.copy()
    
    # The id of the user for whom we want to generate recommendations

    user_index = data2[data2.user == user].index.tolist()[0] # Get the frame index
    
    # Get the artists the user has likd
    known_user_likes = data_items2.loc[user_index,:]
    known_user_likes = known_user_likes[known_user_likes >0].index.values

    # Users likes for all items as a sparse vector.
    user_rating_vector = data_items2.loc[user_index,:]

    # Calculate the score.
    score = data_matrix2.dot(user_rating_vector).div(data_matrix2.sum(axis=1))

    # Remove the known likes from the recommendation.
    score = score.drop(known_user_likes)

    # Print the known likes and the top 20 recommendations.
    print(known_user_likes)
    return(score)

In [23]:
def ensemble_implicit_recommendations(a,b,user,drop=True):
    # initial calculations
    a_data_matrix2 = jaccard_similarities(a);
    a_data2 = a.copy()
    a_data2['user'] = a.index.values
    a_data_matrix2 = jaccard_similarities(a)
    a_data_items2 = a.copy()
    
    b_data_matrix2 = jaccard_similarities(b);
    b_data2 = b.copy()
    b_data2['user'] = b.index.values
    b_data_matrix2 = jaccard_similarities(b)
    b_data_items2 = b.copy()
    
    # get the scores
    a_user_index = a_data2[a_data2.user == user].index.tolist()[0]
    a_user_rating_vector = a_data_items2.loc[a_user_index,:]
    a_score = a_data_matrix2.dot(a_user_rating_vector).div(a_data_matrix2.sum(axis=1))
    
    b_user_index = b_data2[b_data2.user == user].index.tolist()[0]
    b_user_rating_vector = b_data_items2.loc[b_user_index,:]
    b_score = b_data_matrix2.dot(b_user_rating_vector).div(b_data_matrix2.sum(axis=1))
    
    # average score ensemble of the results of sparse a and sparse b
    ensemble = (a_score + b_score) / 2
    
    # Get the artists the user has likd
    a_known_user_likes = a.loc[a_user_index,:]
    a_known_user_likes = a_known_user_likes[a_known_user_likes >0].index.values
    
    b_known_user_likes = b.loc[b_user_index,:]
    b_known_user_likes = b_known_user_likes[b_known_user_likes >0].index.values
    
    known_user_likes = np.unique(np.append(a_known_user_likes,b_known_user_likes))
    
    # drop the known spotlights and known saves
    
    # if drop == True then we drop spotlights and saves that the user has already made
    if drop == True:
        ensemble = ensemble.drop(known_user_likes)
    
    ensemble.replace(np.nan, 0, inplace=True)
    
    print(known_user_likes)
    return(ensemble)

In [24]:
# do implicit average of models together... results for every single song right now
ensemble_implicit_recommendations(sparse_spots, sparse_saves,27,drop=True)

[ 7  9 10 11 12 18 19 29 53]


1     0.000000
2     0.000000
6     0.000000
8     0.000000
13    0.000000
14    0.205898
15    0.000000
16    0.000000
17    0.000000
20    0.000000
21    0.000000
22    0.000000
23    0.000000
24    0.000000
25    0.000000
26    0.000000
27    0.000000
28    0.000000
30    0.000000
31    0.000000
32    0.000000
33    0.000000
34    0.000000
35    0.000000
36    0.000000
37    0.000000
38    0.000000
39    0.000000
40    0.000000
41    0.000000
42    0.000000
43    0.000000
45    0.000000
47    0.000000
48    0.000000
49    0.000000
50    0.000000
51    0.000000
52    0.000000
54    0.191393
60    0.027548
61    0.000000
62    0.000000
63    0.000000
65    0.000000
66    0.000000
67    0.000000
dtype: float64

In [25]:
# top ten "recommendations"
ensemble_implicit_recommendations(sparse_spots, sparse_saves, 27,drop=True).nlargest(10)

[ 7  9 10 11 12 18 19 29 53]


14    0.205898
54    0.191393
60    0.027548
1     0.000000
2     0.000000
6     0.000000
8     0.000000
13    0.000000
15    0.000000
16    0.000000
dtype: float64

In [26]:
# top ten "recommendations" just the id
ensemble_implicit_recommendations(sparse_spots, sparse_saves, 27,drop=True).nlargest(10).index.values

[ 7  9 10 11 12 18 19 29 53]


array([14, 54, 60,  1,  2,  6,  8, 13, 15, 16])