In [3]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
from google.colab import drive
drive.mount('/content/gdrive')


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:


import os

import constants
from config import Config
from eval import eval_batch
from data import Dataset, BasketConstructor
from utils import repackage_hidden, batchify

import torch
import pickle
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
from sklearn.decomposition import PCA

In [0]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3"

# Input preparation

In [8]:
bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR)
ub_basket = bc.get_baskets('prior', reconstruct = True)
ub_rbks = bc.get_baskets('prior', reconstruct = True, reordered = True)
ub_ihis = bc.get_item_history('prior', reconstruct = True)

# ub = Dataset(ub_basket) 
ub = Dataset(ub_basket, ub_rbks, ub_ihis)
up = bc.get_users_products('prior')

FileNotFoundError: ignored

# Load model and calculate `<u,p>`score
- `<u,p>` score can be used to predict whether the user u will buy product p or not 

In [0]:
with open(constants.DREAM_MODEL_DIR + 'reorder-next-dream-05-164.6961.model', 'rb') as f:
    dr_model  = torch.load(f)

In [0]:
dr_model.config.cuda = True

In [0]:
# 1 hour 4mins
id_u, item_u, score_u, dynamic_u = eval_batch(dr_model, ub, up, 512, is_reordered = True)

In [0]:
len_u = [i.shape[0] for i in item_u] # number of products for each user
flatten_id = np.repeat(id_u, len_u) # repeat id_u to ensure the same length as item_u
flatten_item = [i for u in item_u for i in u]
flatten_score = [s for u in score_u for s in u]
res = pd.DataFrame({'user_id': flatten_id, 'product_id': flatten_item, 'dream_score': flatten_score})
res.drop_duplicates(inplace=True)
res = res.groupby(['user_id', 'product_id'])['dream_score'].mean().reset_index()

In [0]:
res.columns = ['user_id', 'product_id', 'reorder_dream_score_next']

In [0]:
# dump results
with open(constants.FEAT_DATA_DIR + 'reorder_dream_score_next.pkl', 'wb') as f:
    pickle.dump(res, f, pickle.HIGHEST_PROTOCOL)

# <font color=blue> Dynamic User </font> 
- dynamic users' representations are also useful features for recommendations

In [0]:
du = pd.DataFrame(dynamic_u, columns = ['reorder_dynamic_u_{}'.format(i) for i in range(128)])
du['user_id'] = id_u
du = du.groupby(['user_id']).mean().reset_index()
# dump results
with open(constants.FEAT_DATA_DIR + 'reorder_dream_dynamic_u.pkl', 'wb') as f:
    pickle.dump(du, f, pickle.HIGHEST_PROTOCOL)

##  <font color=red> PCA Compression of Dynamic User Representations</font>

In [0]:
with open(constants.FEAT_DATA_DIR + 'dream_dynamic_u.pkl', 'rb') as f:
    du = pickle.load(f)

In [0]:
pca = PCA(n_components=8)
pca.fit(du[['reorder_dynamic_u_{}'.format(i) for i in range(128)]])

In [0]:
plt.plot(pca.explained_variance_ratio_[:10])
plt.show()
pca.explained_variance_ratio_[:10]

In [0]:
compressed_du = pca.transform(du[['reorder_dynamic_u_{}'.format(i) for i in range(128)]])
compressed_du = pd.DataFrame(compressed_du, columns=['reorder_dynamic_u_pc_{}'.format(i) for i in range(8)])
compressed_du['user_id'] = du['user_id']

In [0]:
with open(constants.FEAT_DATA_DIR + 'reorder_dream_dynamic_u_pc.pkl', 'wb') as f:
    pickle.dump(compressed_du, f, pickle.HIGHEST_PROTOCOL)

# <font color=black> Item embedding </font>

In [0]:
item_embedding = dr_model.encode.weight.data.cpu().numpy()
item_embedding = pd.DataFrame(item_embedding, columns=['reorder_prod_dim_{}'.format(i) for i in range(128)])
item_embedding['product_id'] = np.arange(0, len(item_embedding))

In [0]:
with open(constants.FEAT_DATA_DIR + 'reorder_dream_item_embed.pkl', 'wb') as f:
    pickle.dump(item_embedding, f, pickle.HIGHEST_PROTOCOL)

In [0]:
pca = PCA(n_components=8)
pca.fit(item_embedding[['reorder_prod_dim_{}'.format(i) for i in range(128)]])

In [0]:
compressed_item_embedding = pca.transform(item_embedding[['reorder_prod_dim_{}'.format(i) for i in range(128)]])
compressed_ie = pd.DataFrame(compressed_item_embedding, columns=['reorder_prod_dim_{}'.format(i) for i in range(8)])
compressed_ie['product_id'] = item_embedding['product_id']


In [0]:
with open(constants.FEAT_DATA_DIR + 'reorder_dream_item_embed_pc.pkl', 'wb') as f:
    pickle.dump(compressed_ie, f, pickle.HIGHEST_PROTOCOL)

In [0]:
plt.plot(pca.explained_variance_ratio_[:20])
plt.show()
pca.explained_variance_ratio_[:10]

# <font color=lime> Pack all DREAM related features </font>

In [0]:
fp = ['reorder_dream_score_next.pkl', 'reorder_dream_dynamic_u_pc.pkl', 'reorder_dream_item_embed_pc.pkl']

In [0]:
dreams = []
for file in fp:
    with open(constants.FEAT_DATA_DIR + file, 'rb') as f:
        dreams.append(pickle.load(f))

In [0]:
dream_final = pd.merge(dreams[0], dreams[1], on=['user_id'], how='left')

In [0]:
dream_final = pd.merge(dream_final, dreams[2], on=['product_id'], how='left')
dream_final = dream_final.groupby(['user_id', 'product_id']).mean().reset_index()

In [0]:
with open(constants.FEAT_DATA_DIR + 'reorder_dream_final.pkl', 'wb') as f:
    pickle.dump(dream_final, f, pickle.HIGHEST_PROTOCOL)