In [1]:
import warnings
warnings.filterwarnings(action='ignore')

from implicit.evaluation import  *
from implicit.als import AlternatingLeastSquares as ALS
from implicit.bpr import BayesianPersonalizedRanking as BPR
import numpy as np
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
from scipy.sparse import *
import pandas as pd

In [2]:
from scipy import sparse

## Call preprocessed data

In [3]:
r = sparse.load_npz("./data/preprocessed/csr.npz")

In [5]:
import pickle
import os
from pathlib import Path
script_dir = os.getcwd()
data_dir = "data/preprocessed"
abs_data_path = Path(os.path.join(script_dir, data_dir))
if not os.path.exists(abs_data_path):
    os.makedirs(abs_data_path)
with open(abs_data_path/"extra.pickle", 'rb') as file:
    extra = pickle.load(file)
    
with open(abs_data_path/"song.pickle", "rb") as f:
    idx_to_song = pickle.load(f)

with open(abs_data_path/"tag.pickle", "rb") as f:
    idx_to_tag = pickle.load(f)

In [7]:
extra.keys()

dict_keys(['v_len', 'te_ids', 'n_items', 'SONG_TOP_X'])

In [8]:
te_len = extra["v_len"]
te_ids = extra["te_ids"]
n_items = extra["n_items"]
SONG_TOP_X = extra["SONG_TOP_X"]

## Run Model

In [9]:
te_r= r[:te_len]

In [10]:
tr_r = r[te_len:]

In [11]:
als_model = ALS(factors=128, regularization=0.08)
als_model.fit(r.T * 15.0)



HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




In [12]:
als_model.user_factors

array([[ 0.05293079,  0.06377667,  0.08461244, ..., -0.01540443,
         0.11774618,  0.09719596],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.00183251,  0.00861166, -0.00713021, ...,  0.00346161,
         0.00190079, -0.00284596],
       ...,
       [ 0.03752115, -0.00759496,  0.00530226, ...,  0.04920263,
        -0.05063215, -0.00895627],
       [-0.09927305, -0.05888007, -0.01682278, ...,  0.04795081,
         0.02027315, -0.00889514],
       [-0.04479675,  0.01561548, -0.0318751 , ..., -0.0409964 ,
        -0.0081107 ,  0.12591356]], dtype=float32)

In [13]:
item_model = ALS(use_gpu=False)
tag_model = ALS(use_gpu=False)
item_model.user_factors = als_model.user_factors
tag_model.user_factors = als_model.user_factors

In [14]:
item_model.item_factors = als_model.item_factors[:n_items]
tag_model.item_factors = als_model.item_factors[n_items:]

In [15]:
item_rec_csr = r[:, :n_items]
tag_rec_csr = r[:, n_items:]

In [16]:
item_ret = []

for u in range(te_r.shape[0]):
    item_rec = item_model.recommend(u, item_rec_csr, N=100)
    item_rec = [idx_to_song[x[0]] for x in item_rec if x[0] in idx_to_song]
    item_ret.append(item_rec)

In [17]:
tag_ret = []
for u in range(te_r.shape[0]):
    tag_rec = tag_model.recommend(u, tag_rec_csr, N=10)
    tag_rec = [idx_to_tag[x[0]+50000] for x in tag_rec if x[0]+50000 in idx_to_tag]
    tag_ret.append(tag_rec)

In [48]:
returnval = []
for _id, rec, tag_rec in zip(te_ids, item_ret, tag_ret):
    returnval.append({
        "id": _id,
        "songs": rec[:100],
        "tags": tag_rec[:10]
    })

In [49]:
import json
with open('ret.json', 'w', encoding='utf-8') as f:
    f.write(json.dumps(returnval, ensure_ascii=False))