In [1]:
from utils.io import load_numpy
from utils.modelnames import models

# Dataset Parameters

In [2]:
DATA_DIR = "data/beer/"
TEST_SET = "Rtest.npz"
TEST_KEYPHRASE_SET = "Rtest_keyphrase.npz"
TRAIN_SET = "Rtrain.npz"
TRAIN_KEYPHRASE_SET = "Rtrain_keyphrase.npz"
VALID_SET = "Rvalid.npz"
VALID_KEYPHRASE_SET = "Rvalid_keyphrase.npz"

# Algorithm Parameters

In [3]:
CORRUPTION = 0.5
ENABLE_EVALUATION = True
ENABLE_VALIDATION = True
ENABLE_KEYPHRASE_BINARIZATION = True
EPOCH = 100
LAMB = 1.0
LEARNING_RATE = 0.0001
MODEL = "CDE-VAE"
OPTIMIZER = "RMSProp"
PREDICT_BATCH_SIZE = 128
RANK = 200
TRAIN_BATCH_SIZE = 128
TOP_K = 10

# Load Dataset

In [4]:
R_train = load_numpy(path=DATA_DIR, name=TRAIN_SET)
R_train_keyphrase = load_numpy(path=DATA_DIR, name=TRAIN_KEYPHRASE_SET).toarray()

if ENABLE_VALIDATION:
    R_valid = load_numpy(path=DATA_DIR, name=VALID_SET)
    R_valid_keyphrase = load_numpy(path=DATA_DIR, name=VALID_KEYPHRASE_SET).toarray()
else:
    R_valid = load_numpy(path=DATA_DIR, name=TEST_SET)
    R_valid_keyphrase = load_numpy(path=DATA_DIR, name=TEST_KEYPHRASE_SET).toarray()

# Preprocess Keyphrase Frequency

In [5]:
if ENABLE_KEYPHRASE_BINARIZATION:
    R_train_keyphrase[R_train_keyphrase != 0] = 1
    R_valid_keyphrase[R_valid_keyphrase != 0] = 1
else:
    R_train_keyphrase = R_train_keyphrase/R_train_keyphrase.sum(axis=1, keepdims=True)
    R_valid_keyphrase = R_valid_keyphrase/R_valid_keyphrase.sum(axis=1, keepdims=True)

    R_train_keyphrase[np.isnan(R_train_keyphrase)] = 0
    R_valid_keyphrase[np.isnan(R_valid_keyphrase)] = 0

In [6]:
model = models[MODEL](matrix_train=R_train, epoch=EPOCH, lamb=LAMB,learning_rate=LEARNING_RATE, rank=RANK,
                      corruption=CORRUPTION, optimizer=OPTIMIZER, matrix_train_keyphrase=R_train_keyphrase)

100%|██████████| 100/100 [01:46<00:00,  1.08s/it]


In [7]:
test_user_id = 0
critiqued_item_id = 20

In [8]:
predict = model.inference(R_train[test_user_id].todense())

In [9]:
predict[0][critiqued_item_id]

0.0050382284

In [10]:
predict[0][critiqued_item_id] = -1

In [11]:
predict_after_critique = model.critiquing(R_train[test_user_id].todense(), predict)

In [12]:
predict_after_critique[0][critiqued_item_id]

0.0050011026