In [None]:
# https://github.com/LaceyChen17/DREAM

In [None]:
# Make Recommendation Using DREAM.ipynb

# using trained DREAM model to generate predictors for <u,p>

In [1]:
import os
import constants
from config import Config
from eval import eval_batch
from data import Dataset, BasketConstructor
from utils import repackage_hidden, batchify

import torch
import pickle
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
from sklearn.decomposition import PCA

ModuleNotFoundError: No module named 'constants'

In [None]:
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
# os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3"

In [4]:
bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR)
ub_basket = bc.get_baskets('prior', reconstruct = False)
ub_rbks = bc.get_baskets('prior', reconstruct = False, reordered = True)
ub_ihis = bc.get_item_history('prior', reconstruct = False)

# ub = Dataset(ub_basket) 
ub = Dataset(ub_basket, ub_rbks, ub_ihis)
up = bc.get_users_products('prior')

In [None]:
# Load model and calculate <u,p>score
# <u,p> score can be used to predict whether the user u will buy product p or not

with open(constants.DREAM_MODEL_DIR + 'reorder-next-dream-05-164.6961.model', 'rb') as f:
    dr_model  = torch.load(f)

dr_model.config.cuda = True

# 1 hour 4mins
id_u, item_u, score_u, dynamic_u = eval_batch(dr_model, ub, up, 512, is_reordered = True)

In [None]:
len_u = [i.shape[0] for i in item_u] # number of products for each user
flatten_id = np.repeat(id_u, len_u) # repeat id_u to ensure the same length as item_u
flatten_item = [i for u in item_u for i in u]
flatten_score = [s for u in score_u for s in u]
res = pd.DataFrame({'user_id': flatten_id, 'product_id': flatten_item, 'dream_score': flatten_score})
res.drop_duplicates(inplace=True)
res = res.groupby(['user_id', 'product_id'])['dream_score'].mean().reset_index()

res.columns = ['user_id', 'product_id', 'reorder_dream_score_next']

# dump results
with open(constants.FEAT_DATA_DIR + 'reorder_dream_score_next.pkl', 'wb') as f:
    pickle.dump(res, f, pickle.HIGHEST_PROTOCOL)

In [None]:
# <font color=blue> Dynamic User </font>
# dynamic users' representations are also useful features for recommendations

du = pd.DataFrame(dynamic_u, columns = ['reorder_dynamic_u_{}'.format(i) for i in range(128)])
du['user_id'] = id_u
du = du.groupby(['user_id']).mean().reset_index()
# dump results
with open(constants.FEAT_DATA_DIR + 'reorder_dream_dynamic_u.pkl', 'wb') as f:
    pickle.dump(du, f, pickle.HIGHEST_PROTOCOL)

In [None]:
# <font color=red> PCA Compression of Dynamic User Representations</font>

with open(constants.FEAT_DATA_DIR + 'dream_dynamic_u.pkl', 'rb') as f:
    du = pickle.load(f)

pca = PCA(n_components=8)
pca.fit(du[['reorder_dynamic_u_{}'.format(i) for i in range(128)]])

plt.plot(pca.explained_variance_ratio_[:10])
plt.show()
pca.explained_variance_ratio_[:10]

compressed_du = pca.transform(du[['reorder_dynamic_u_{}'.format(i) for i in range(128)]])
compressed_du = pd.DataFrame(compressed_du, columns=['reorder_dynamic_u_pc_{}'.format(i) for i in range(8)])
compressed_du['user_id'] = du['user_id']

with open(constants.FEAT_DATA_DIR + 'reorder_dream_dynamic_u_pc.pkl', 'wb') as f:
    pickle.dump(compressed_du, f, pickle.HIGHEST_PROTOCOL)

In [None]:
# <font color=black> Item embedding </font>

item_embedding = dr_model.encode.weight.data.cpu().numpy()
item_embedding = pd.DataFrame(item_embedding, columns=['reorder_prod_dim_{}'.format(i) for i in range(128)])
item_embedding['product_id'] = np.arange(0, len(item_embedding))

with open(constants.FEAT_DATA_DIR + 'reorder_dream_item_embed.pkl', 'wb') as f:
    pickle.dump(item_embedding, f, pickle.HIGHEST_PROTOCOL)

pca = PCA(n_components=8)
pca.fit(item_embedding[['reorder_prod_dim_{}'.format(i) for i in range(128)]])

compressed_item_embedding = pca.transform(item_embedding[['reorder_prod_dim_{}'.format(i) for i in range(128)]])
compressed_ie = pd.DataFrame(compressed_item_embedding, columns=['reorder_prod_dim_{}'.format(i) for i in range(8)])
compressed_ie['product_id'] = item_embedding['product_id']

with open(constants.FEAT_DATA_DIR + 'reorder_dream_item_embed_pc.pkl', 'wb') as f:
    pickle.dump(compressed_ie, f, pickle.HIGHEST_PROTOCOL)

plt.plot(pca.explained_variance_ratio_[:20])
plt.show()
pca.explained_variance_ratio_[:10]

In [None]:
# <font color=lime> Pack all DREAM related features </font>

fp = ['reorder_dream_score_next.pkl', 'reorder_dream_dynamic_u_pc.pkl', 'reorder_dream_item_embed_pc.pkl']

dreams = []
for file in fp:
    with open(constants.FEAT_DATA_DIR + file, 'rb') as f:
        dreams.append(pickle.load(f))

dream_final = pd.merge(dreams[0], dreams[1], on=['user_id'], how='left')

dream_final = pd.merge(dream_final, dreams[2], on=['product_id'], how='left')
dream_final = dream_final.groupby(['user_id', 'product_id']).mean().reset_index()

with open(constants.FEAT_DATA_DIR + 'reorder_dream_final.pkl', 'wb') as f:
    pickle.dump(dream_final, f, pickle.HIGHEST_PROTOCOL)