In [63]:
import numpy as np
import random
import logging
import json

from collections import Counter

In [14]:
class PopularRecommender:
    
    def __init__(self, freqs, song2ids):
        c = Counter(freqs)
        ids2song = {v:k for k,v in song2ids.items()}
        self.rec = [(ids2song[int(s)], f) for s,f in c.most_common(len(c))]
        self.rec = list(zip(*self.rec))
        
    def recommend(self, songs, k=500):
        return self.rec[0][:k], self.rec[1][:k]
    
    
from scipy.sparse import csr_matrix
    
class SparseKNNRecommender:
    
    def __init__(self, R, song2idx, idx2song, fallback, max_playlists=500):
        self.R = R
        self.song2idx = song2idx
        self.idx2song = idx2song
        self.known_songs = set(self.song2idx.keys())
        self.fallback = fallback
        self.max_playlists = max_playlists
        
    def _convert_playlist(self, indices):
        z = np.ones_like(indices, dtype=np.int64)
        return csr_matrix((z, (indices, z*0)), shape=(self.R.shape[1], 1))
        
    def recommend(self, songs, k=500):
        assert isinstance(songs, list), "Must provide list!"
        songs = list(filter(lambda song: song in self.known_songs, songs))
        if len(songs) == 0:
            #print("Using fallback")
            return self.fallback.recommend(songs)
        indices = [self.song2idx[song] for song in songs]
        songs = self._convert_playlist(indices)
        scores = self.R @ songs
        score_idx = scores.nonzero()[0]
        # select top n playlists
        if len(score_idx) > self.max_playlists:
            score_value = scores[score_idx, 0].todense()
            score_value = np.array(score_value).flatten()
            order = np.argsort(score_value)[::-1][:self.max_playlists]
            score_idx = score_idx[order]
        # collect songs
        # TODO: sort only nonzero elements
        z = self.R[score_idx].sum(axis=0)
        nz = z.nonzero()[1]
        values = np.array(z[0, nz]).flatten()
        order = np.argsort(values)[-k-len(indices):][::-1]
        rec = [idx for idx in nz[order] if idx not in indices][:k]
        return [self.idx2song[idx] for idx in rec], [1] * len(rec)
    
    
from functools import reduce
from collections import Counter
    
class DictionaryRecommender:
    
    def __init__(self, song2idx, idx2song, songs, playlists, fallback, max_playlists=500):
        self.song2idx = song2idx
        self.known_songs = set(song2idx.keys())
        self.idx2song = idx2song
        self.songs = songs
        self.playlists = playlists
        self.fallback = fallback
        self.max_playlists = max_playlists
        
    def recommend(self, songs, k=500):
        assert isinstance(songs, list), "Must provide list!"
        songs = list(filter(lambda song: song in self.known_songs, songs))
        if len(songs) == 0:
            return self.fallback.recommend(songs)
        indices = [self.song2idx[song] for song in songs]
        # find playlists that contain songs
        pls = reduce(list.__add__, (self.songs[idx] for idx in indices))
        pc = Counter(pls)
        common_pls = pc.most_common(self.max_playlists)
        # find songs in playlists
        common_songs = reduce(list.__add__, (self.playlists[pl] for pl,_ in common_pls))
        sc = Counter(common_songs)
        rsongs, rscores = list(zip(*sc.most_common(k)))
        rsongs = [self.idx2song[idx] for idx in rsongs]
        return rsongs, rscores

In [9]:
import os
import json

def load_dictionary_based(path):
    with open(os.path.join(path, "idx2song.json")) as f:
        idx2song = json.load(f)
        idx2song = {int(k):v for k,v in idx2song.items()}
    song2idx = {v:k for k,v in idx2song.items()}
    with open(os.path.join(path, "songs.json")) as f:
        songs = {int(k):[int(vi) for vi in v] for k,v in json.load(f).items()}
    with open(os.path.join(path, "playlists.json")) as f:
        playlists = {int(k):[int(vi) for vi in v] for k,v in json.load(f).items()}
    return song2idx, idx2song, songs, playlists 

model = load_dictionary_based("../models/sparse_dictionary/")
freqs = json.load(open("../models/sparse_full/frequencies.json"))

In [None]:
import os
from scipy.sparse import load_npz

def load_sparse(path):
    with open(os.path.join(path, "idx2song.json")) as f:
        idx2song = json.load(f)
        idx2song = {int(k):v for k,v in idx2song.items()}
    song2idx = {v:k for k,v in idx2song.items()}
    R = load_npz(os.path.join(path, "r.npz"))
    return R, song2idx, idx2song

model = load_sparse("../models/sparse_full")
freqs = json.load(open("../models/sparse_full/frequencies.json"))

NameError: name 'json' is not defined

In [11]:
r_pop = PopularRecommender(freqs, model[0])
#r = SparseKNNRecommender(*model, r_pop, 500)
r = DictionaryRecommender(*model, r_pop, 500)

In [12]:
import json
with open("../data/test/challenge_set.json") as f:
    test = json.load(f)

In [16]:
from tqdm import tqdm

result = []
for pl in tqdm(test["playlists"]):
    pid = pl["pid"]
    pl = pl["tracks"]
    if len(pl) > 0:
        tracks = [t["track_uri"] for t in pl]
    else:
        tracks = []
    recs, _ = r.recommend(tracks, k=500)
    assert len(recs) == 500, f"{pid}: {len(recs)}"
    row = ",".join([str(z) for z in [pid] + list(recs)])
    result.append(row)

 22%|████████▍                             | 2204/10000 [00:55<03:17, 39.40it/s]


KeyboardInterrupt: 

In [None]:
result = ["team info, team name,98kissricsi@gmail.com"] + result

In [13]:
Counter([len(z["tracks"]) for z in test["playlists"]])

Counter({0: 1000, 5: 2000, 10: 2000, 25: 2000, 100: 2000, 1: 1000})