In [None]:
import pandas as pd
import numpy as np
import scipy as sp
import scipy.sparse as sps
import scipy.io as io
import time
from scipy.sparse.linalg import svds
from sklearn.preprocessing import normalize

### Read needed files

In [None]:
# read interactions file
train_final = pd.read_csv('../input/train_final.csv', sep = '\t')
train_final['interaction'] = 1.0
train_final = train_final.sort_values(['playlist_id', 'track_id'], ascending=[True, True])
numberInteractions = train_final.shape[0]
print(train_final.iloc[:20,])

In [None]:
# read target playlists which should receive a recommendation
target_playlists = pd.read_csv('../input/target_playlists.csv', sep = '\t')
print(target_playlists.head())
print(target_playlists.shape)

In [None]:
# read target tracks
target_tracks = list(pd.read_csv('../input/target_tracks.csv', sep = '\t')['track_id'])
target_tracks

We seperate each column in a seperate list:

In [None]:
playlist_id = list(train_final['playlist_id'])
track_id = list(train_final['track_id'])
interaction = list(train_final['interaction'])
print(playlist_id[:10])
print(track_id[:10])
print(interaction[:10])

Create index mapping for playlist and track IDs

In [None]:
playlist_id_unique = list(set(playlist_id))
track_id_unique = list(set(track_id))
playlist_id_unique.sort()
playlist_id_unique = pd.DataFrame(playlist_id_unique)
playlist_id_unique.reset_index(level=0, inplace=True)
playlist_id_unique.columns = ['index_playlist', 'playlist_id']
track_id_unique.sort()
track_id_unique = pd.DataFrame(track_id_unique)
track_id_unique.reset_index(level=0, inplace=True)
track_id_unique.columns = ['index_track', 'track_id']
print(track_id_unique.head())
print(playlist_id_unique.head())

In [None]:
numPlaylists = len(playlist_id_unique)
numTracks = len(track_id_unique)

Translate interaction matrix to new indexes:

In [None]:
train_intermediate = train_final.merge(playlist_id_unique, how='inner', on='playlist_id')
train_index = train_intermediate.merge(track_id_unique, how='inner', on='track_id')
train_index = train_index.sort_values(['playlist_id', 'track_id'], ascending=[True, True])
train_index

In [None]:
index_playlist = list(train_index['index_playlist'])
index_track = list(train_index['index_track'])
interaction = list(train_index['interaction'])
print(index_playlist[:10])
print(index_track[:10])
print(interaction[:10])

In [None]:
URM_all = sps.coo_matrix((interaction, (index_playlist, index_track)))
URM_all.tocsr()
URM_all

In [None]:
# calcualte singular value decomposition
start_time = time.time()
U, s, Vt = svds(URM_all, k = 252)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
# make diagonal matrix from sigma values
s_diag = np.diag(s)

In [None]:
# reconstruct URM matrix as prediction
Us = np.dot(U, s_diag)
Us.shape

In [None]:
# reconstruct URM matrix as prediction
start_time = time.time()
URM_estm = np.dot(Us, Vt)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
# Look at result
print(URM_estm.shape)
print(URM_estm[:10,:10])
print(URM_estm[0,:].max())

In [None]:
# filter interaction dataframe, to retain only target playlists
train = train_index.merge(target_playlists, how='inner', on='playlist_id')
print(train.shape)
print(train['playlist_id'].nunique())

In [None]:
# aggregate to playlist level and coerce tracks in that playlist to list
train_agg1 = train.groupby(by='playlist_id').track_id.apply(list).to_frame()
train_agg1.reset_index(level=0, inplace=True)
train_agg2 = train.groupby(by='playlist_id').index_track.apply(list).to_frame()
train_agg2.reset_index(level=0, inplace=True)
train_agg = train_agg1.merge(train_agg2, how='inner', on='playlist_id')
train_agg = train_agg.merge(playlist_id_unique, how='inner', on='playlist_id')
train_agg['recommend'] = np.empty((len(train_agg), 0)).tolist()
train_agg

### Find tracks to recommend in URM_estm

In [None]:
start_time = time.time()
for index, row in train_agg.iterrows():
    # get row from URM_estm
    estm = pd.DataFrame(URM_estm[row['index_playlist'],:])
    estm.reset_index(level=0, inplace=True)
    estm.columns = ['index_track','pred']
    # filter tracks which are already in the playlist, so they can't be recommended
    estm = estm[-estm["index_track"].isin(row['index_track'])]
    # translate track index back to track_id
    estm = estm.merge(track_id_unique, how='inner', on='index_track')
    # filter on target track set
    estm = estm[estm['track_id'].isin(target_tracks)]
    estm = estm.sort_values('pred',ascending=False)
    # print(estm)
    count = 1
    for index2, row2 in estm.iterrows():
        # insert 5 top recommendations into dataframe
        if count < 6:
            row['recommend'].append(int(row2['track_id']))
            count += 1
        else:
            break
print("--- %s minutes ---" % ((time.time() - start_time)/60))

In [None]:
train_agg

In [None]:
train_agg.shape

In [None]:
# Convert list to string with spaces between track_ids
train_agg['recommend'] = train_agg['recommend'].apply(lambda x: " ".join(map(str, x)))

In [None]:
# rename columns for submission
final = train_agg[['playlist_id','recommend']]
final.columns = ['playlist_id','track_ids']

In [None]:
print(final.head())

In [None]:
# export file
final.to_csv('../submission/003_svd_basic_252fac.csv', index=False)