In [1]:
import warnings
warnings.filterwarnings("ignore")

from utils.io import load_numpy
from utils.modelnames import models

# Dataset Parameters

In [2]:
DATA_DIR = "data/beer/"
TEST_SET = "Rtest.npz"
TEST_KEYPHRASE_SET = "Rtest_keyphrase.npz"
TRAIN_SET = "Rtrain.npz"
TRAIN_KEYPHRASE_SET = "Rtrain_keyphrase.npz"
VALID_SET = "Rvalid.npz"
VALID_KEYPHRASE_SET = "Rvalid_keyphrase.npz"

# Algorithm Parameters

In [3]:
CORRUPTION = 0.5
ENABLE_EVALUATION = True
ENABLE_VALIDATION = True
ENABLE_KEYPHRASE_BINARIZATION = True
EPOCH = 100
LAMB = 1.0
LEARNING_RATE = 0.0001
MODEL = "E-CDE-VAE"
#OPTIMIZER = "RMSProp"
OPTIMIZER = "SGD"
PREDICT_BATCH_SIZE = 128
RANK = 200
TRAIN_BATCH_SIZE = 128
TOP_K = 10

# Load Dataset

In [4]:
R_train = load_numpy(path=DATA_DIR, name=TRAIN_SET)
R_train_keyphrase = load_numpy(path=DATA_DIR, name=TRAIN_KEYPHRASE_SET).toarray()

if ENABLE_VALIDATION:
    R_valid = load_numpy(path=DATA_DIR, name=VALID_SET)
    R_valid_keyphrase = load_numpy(path=DATA_DIR, name=VALID_KEYPHRASE_SET).toarray()
else:
    R_valid = load_numpy(path=DATA_DIR, name=TEST_SET)
    R_valid_keyphrase = load_numpy(path=DATA_DIR, name=TEST_KEYPHRASE_SET).toarray()

# Preprocess Keyphrase Frequency

In [5]:
if ENABLE_KEYPHRASE_BINARIZATION:
    R_train_keyphrase[R_train_keyphrase != 0] = 1
    R_valid_keyphrase[R_valid_keyphrase != 0] = 1
else:
    R_train_keyphrase = R_train_keyphrase/R_train_keyphrase.sum(axis=1, keepdims=True)
    R_valid_keyphrase = R_valid_keyphrase/R_valid_keyphrase.sum(axis=1, keepdims=True)

    R_train_keyphrase[np.isnan(R_train_keyphrase)] = 0
    R_valid_keyphrase[np.isnan(R_valid_keyphrase)] = 0

In [6]:
model = models[MODEL](matrix_train=R_train, epoch=EPOCH, lamb=LAMB,learning_rate=LEARNING_RATE, rank=RANK,
                      corruption=CORRUPTION, optimizer=OPTIMIZER, matrix_train_keyphrase=R_train_keyphrase)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


100%|██████████| 100/100 [00:48<00:00,  2.07it/s]


In [28]:
test_user_id = 0
critiqued_kp_id = 20

In [35]:
obs_predict, kp_predict = model.inference(R_train[test_user_id].todense())
print(obs_predict.shape, kp_predict.shape)

(1, 3668) (1, 75)


In [30]:
obs_predict

array([[0.00163807, 0.00273792, 0.00899216, ..., 0.0084057 , 0.00447585,
        0.00379748]], dtype=float32)

In [31]:
kp_predict

array([[0.05894641, 0.06316086, 0.0604354 , 0.03281329, 0.04420486,
        0.05163432, 0.0609065 , 0.03441197, 0.05963526, 0.04969142,
        0.04576517, 0.05530015, 0.06146105, 0.041384  , 0.06315634,
        0.03950001, 0.0630089 , 0.04351932, 0.05509656, 0.04724025,
        0.04450922, 0.06005137, 0.04930573, 0.0380764 , 0.04920959,
        0.03000186, 0.04432834, 0.03085965, 0.03395664, 0.03491366,
        0.050824  , 0.04798291, 0.0279957 , 0.01502237, 0.04792813,
        0.02774025, 0.05326298, 0.01941372, 0.04163877, 0.06022866,
        0.0498877 , 0.04545897, 0.05772493, 0.06232624, 0.06258562,
        0.06126428, 0.01720895, 0.05172535, 0.02532583, 0.00958497,
        0.05993828, 0.05294066, 0.04010186, 0.02218165, 0.02836389,
        0.06135029, 0.04495427, 0.02052977, 0.06057613, 0.0280016 ,
        0.04003042, 0.03000054, 0.0465077 , 0.03346527, 0.04361808,
        0.04611693, 0.03201109, 0.04333023, 0.04769679, 0.0300342 ,
        0.02711027, 0.04327786, 0.03341189, 0.02

In [32]:
kp_predict[0][critiqued_kp_id] = 0

In [33]:
predict_after_critique = model.critiquing(R_train[test_user_id].todense(), kp_predict)

In [34]:
predict_after_critique

array([[0.00171375, 0.0027337 , 0.00907784, ..., 0.00849446, 0.00447073,
        0.00373947]], dtype=float32)