In [None]:
!pip install --quiet cornac

[K     |████████████████████████████████| 12.4 MB 30 kB/s 
[?25h  Building wheel for powerlaw (setup.py) ... [?25l[?25hdone


In [None]:
import cornac
print(cornac.__version__)

1.13.5


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
print(os.getcwd())
path = "/content/drive/MyDrive/RecSys Project 2/"
os.chdir(path)

/content


In [None]:
import os
import sys
import re
from collections import defaultdict

import numpy as np
import pandas as pd; pd.set_option("max_colwidth", 0);
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

from cornac.utils import cache
from cornac.datasets import filmtrust, amazon_clothing
from cornac.eval_methods import RatioSplit
from cornac.models import PMF, SoRec, WMF, CTR, BPR, VBPR
from cornac.data import GraphModality, TextModality, ImageModality
from cornac.data.text import BaseTokenizer
from PIL import Image
import requests


%tensorflow_version 1.x
import tensorflow as tf

print(f"System version: {sys.version}")
print(f"Cornac version: {cornac.__version__}")
print(f"Tensorflow version: {tf.__version__}")

SEED = 42
VERBOSE = True

TensorFlow 1.x selected.
System version: 3.7.11 (default, Jul  3 2021, 18:01:19) 
[GCC 7.5.0]
Cornac version: 1.13.5
Tensorflow version: 1.15.2


In [None]:
r = pd.read_csv('goodbooks-10k-dataset/ratings.csv')
tr = pd.read_csv('goodbooks-10k-dataset/to_read.csv')
b = pd.read_csv('goodbooks-10k-dataset/books.csv')

t = pd.read_csv('goodbooks-10k-dataset/tags.csv')
bt = pd.read_csv('goodbooks-10k-dataset/book_tags.csv')
bg = pd.read_csv('goodbooks-10k-dataset/book_genre.csv')

books = b
ratings = r

In [None]:
data = r
data = data[['user_id','book_id','rating']]
# data['rating'] = data.rating.apply(float)
ratings = data.values

In [None]:
print(f'Unique User: {len(r.user_id.unique())}')
print(f'Unique Book: {len(r.book_id.unique())}')
print(f'Unique Book with Tags: {len(bt.goodreads_book_id.unique())}')
print(f'Matrix Density: {len(r)/len(r.user_id.unique())/len(r.book_id.unique()):2.2%}')

Unique User: 53424
Unique Book: 10000
Unique Book with Tags: 10000
Matrix Density: 0.18%


In [None]:
n =20
auc = cornac.metrics.AUC()
rec_20 = cornac.metrics.Recall(n)
rmse = cornac.metrics.RMSE()
pre_20 = cornac.metrics.Precision(n)
ncrr_20 = cornac.metrics.NCRR(n)
ndcg_20 = cornac.metrics.NDCG(n)


# CTR

In [None]:
book_genre = books.merge(bg,how = 'left',left_on = 'best_book_id',right_on = 'goodreads_book_id')[['id','goodreads_book_id','genre']]

In [None]:
book_genre['genre'].unique()

array(['fantasy', 'young adult', 'fiction', 'romance', 'contemporary',
       'thriller', 'science', 'suspense', 'classics', 'paranormal',
       'mystery', 'horror', 'chick lit', 'history', 'crime', 'art',
       'religion', 'philosophy', 'nonfiction', 'memoir', 'biography',
       'travel', 'psychology', 'poetry', nan, 'self help', 'spirituality',
       'christian', 'business', 'sports', 'music', 'graphic novels',
       'manga', 'comics', 'cookbooks', 'gay and lesbian',
       'humor and comedy'], dtype=object)

In [None]:
tag_join = book_genre.groupby('goodreads_book_id').apply(lambda x: ' '.join(x.genre))

In [None]:
tag_join

goodreads_book_id
1.0           fantasy young adult fiction classics paranormal mystery romance contemporary science                            
2.0           fantasy young adult fiction paranormal mystery romance science                                                  
3.0           fantasy young adult fiction classics paranormal mystery contemporary science                                    
5.0           fantasy young adult fiction classics paranormal mystery contemporary science travel                             
6.0           fantasy young adult fiction classics paranormal mystery contemporary science                                    
                                                  ...                                                                         
31538647.0    fantasy young adult fiction paranormal                                                                          
31845516.0    fiction contemporary philosophy nonfiction memoir biography self help religion 

In [None]:
book_genre.groupby('goodreads_book_id').count()

Unnamed: 0_level_0,id,genre
goodreads_book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,9,9
2.0,7,7
3.0,8,8
5.0,9,9
6.0,8,8
...,...,...
31538647.0,4,4
31845516.0,11,11
32075671.0,5,5
32848471.0,4,4


In [None]:
def intro_generate(row):
  title = row['title']
  original_title = row['original_title']
  author = row['authors']
  try: 
    tags = tag_join[row['best_book_id']]
  except:
    tags = ''


  intro = 'The title of the book is %s. The original_title of the book is %s.The authors of the book is %s.%s'%(title,original_title,author,tags)
  # intro = str(title)+str(original_title)
  return intro

In [None]:
test = books.apply(lambda x:intro_generate(x),axis=1)

In [None]:
test

0       The title of the book is The Hunger Games (The Hunger Games, #1). The original_title of the book is The Hunger Games.The authors of the book is Suzanne Collins.fantasy young adult fiction romance contemporary thriller science suspense                                                                                                                            
1       The title of the book is Harry Potter and the Sorcerer's Stone (Harry Potter, #1). The original_title of the book is Harry Potter and the Philosopher's Stone.The authors of the book is J.K. Rowling, Mary GrandPré.fantasy young adult fiction classics paranormal mystery contemporary science                                                                     
2       The title of the book is Twilight (Twilight, #1). The original_title of the book is Twilight.The authors of the book is Stephenie Meyer.fantasy young adult fiction paranormal contemporary science horror chick lit romance                                      

In [None]:
ratings

array([[  314,     1,     5],
       [  439,     1,     3],
       [  588,     1,     5],
       ...,
       [49383, 10000,     5],
       [50124, 10000,     5],
       [51328, 10000,     1]])

In [None]:
docs = test
item_ids = books.id

item_text_modality = TextModality(
    corpus=docs,
    ids=item_ids,
    tokenizer=BaseTokenizer(sep=" ", stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)

ratio_split = RatioSplit(
    data=ratings,
    val_size = 0.1,
    test_size=0.1,
    rating_threshold=4.0,
    exclude_unknowns=True,
    item_text=item_text_modality,
    verbose=VERBOSE,
    seed=SEED,
)


rating_threshold = 4.0
exclude_unknowns = True




---
Training data:
Number of users = 53000
Number of items = 10000
Number of ratings = 783923
Max rating = 5.0
Min rating = 1.0
Global mean = 3.9




---
Test data:
Number of users = 31297
Number of items = 9999
Number of ratings = 97682
Number of unknown users = 0
Number of unknown items = 0




---
Validation data:
Number of users = 31241
Number of items = 9999
Number of ratings = 97717
---
Total users = 53000
Total items = 10000


In [None]:
K = 100
ctr = CTR(k=K, max_iter=20, a=1, b=0.05, lambda_u=0.02, lambda_v=0.02, verbose=VERBOSE, seed=SEED)
cornac.Experiment(eval_method=ratio_split, models=[ctr], metrics=[auc,rmse,ncrr_20,ndcg_20,pre_20,rec_20]).run()


[CTR] Training started!


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Learning completed!

[CTR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=97927.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=31522.0, style=ProgressStyle(description_wi…



TEST:
...
    |   RMSE |    AUC | NCRR@20 | NDCG@20 | Precision@20 | Recall@20 | Train (s) | Test (s)
--- + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 2.3448 | 0.9306 |  0.0658 |  0.1087 |       0.0240 |    0.2403 |  519.5691 |  71.1058



In [None]:
K = 300
ctr = CTR(k=K, max_iter=20, a=1, b=0.05, lambda_u=0.02, lambda_v=0.02, verbose=VERBOSE, seed=SEED)
cornac.Experiment(eval_method=ratio_split, models=[ctr], metrics=[auc,rmse,ncrr_20,ndcg_20,pre_20,rec_20]).run()


[CTR] Training started!


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Learning completed!

[CTR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=97927.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=31522.0, style=ProgressStyle(description_wi…



TEST:
...
    |   RMSE |    AUC | NCRR@20 | NDCG@20 | Precision@20 | Recall@20 | Train (s) | Test (s)
--- + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 2.2641 | 0.9039 |  0.0553 |  0.1025 |       0.0231 |    0.2501 | 4551.0983 | 102.3134



In [None]:
ctr.save('model')

CTR model is saved to model/CTR/2021-07-19_13-35-12-003297.pkl


'model/CTR/2021-07-19_13-35-12-003297.pkl'

In [None]:
vocab = ctr.train_set.item_text.vocab
topic_word_dist = ctr.model.beta.T[:, -ctr.train_set.item_text.max_vocab:] 
top_word_inds = np.argsort(topic_word_dist, axis=1) + 4  # ingore 4 special tokens

topic_words = {}
topic_df = defaultdict(list)
print("WORD TOPICS:")
for t in range(len(topic_word_dist)):
  top_words = vocab.to_text(top_word_inds[t][-10:][::-1], sep=", ")
  topic_words[t+1] = top_words
  topic_df["Topic"].append(t + 1)
  topic_df["Top words"].append(top_words)
topic_df = pd.DataFrame(topic_df)
topic_df

WORD TOPICS:


Unnamed: 0,Topic,Top words
0,1,"war, stars, talk, mitch, preacher, risingthe, different, survivors, test, carson"
1,2,"death, hard, breath, bag, worksthe, smithyoung, inferno, merchant, salmon, nineteen"
2,3,"new, soul, jan, ones, diethe, tea, mercedes, rivers, reborn, crows"
3,4,"self, nanthe, graphic, travel, music, hamilton, tall, attack, memoir, yearsthe"
4,5,"science, devil, womens, want, n, hollows, summerthe, london, kings, pilgrims"
...,...,...
295,296,"white, assassin, veronica, moonthe, picoultyoung, takeshi, childrenthe, crash, rk, burnthe"
296,297,"best, body, wizard, gardnerfiction, english, dante, citythe, birth, theory, maxwell"
297,298,"tracy, philip, ancient, witches, craisfiction, pooh, decade, perkins, ceecee, erik"
298,299,"peter, margaret, secret, water, marriage, meadfantasy, witch, thoughts, pendergast, herman"


# VBPR

In [None]:
temp = np.load(path+'coding/coding1.npy',allow_pickle=True)
for i in range(2,11):
  temp = np.append(temp,np.load(path+'coding/coding'+str(i)+'.npy',allow_pickle=True),axis =0)
  print(temp.shape)



(2000, 1000)
(3000, 1000)
(4000, 1000)
(5000, 1000)
(6000, 1000)
(7000, 1000)
(8000, 1000)
(9000, 1000)
(10000, 1000)


In [None]:
label_list = bg.genre.unique()
label_dictionary = {label: i+1 for i, label in enumerate(label_list)}
output_dictionary = {i+1: labels for i, labels in enumerate(label_list)}

In [None]:
y = np.zeros((10000,1,37))
def ass_cat(x):
  try:
    y[int(x['id']-1),0,label_dictionary[x['genre']]]=1
  except:
    pass


In [None]:
books.merge(bg,how = 'left',left_on = 'best_book_id',right_on = 'goodreads_book_id').apply(ass_cat,axis = 1)

0        None
1        None
2        None
3        None
4        None
         ... 
63483    None
63484    None
63485    None
63486    None
63487    None
Length: 63488, dtype: object

In [None]:
y.shape

(10000, 1, 37)

In [None]:
y

array([[[0., 1., 1., ..., 0., 0., 0.]],

       [[0., 1., 1., ..., 0., 0., 0.]],

       [[0., 1., 1., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.]]])

In [None]:
total_genre = 36+1

## Tag as output

In [None]:
from keras.models import Model, Sequential
from keras.layers import Dense, Flatten, Reshape, Input, InputLayer

model = Sequential()
model.add(InputLayer(input_shape = (1,1000)))
model.add(Dense(units=512,activation="relu"))
model.add(Dense(units=512,activation="relu"))
model.add(Dense(units=total_genre, activation="sigmoid"))
model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1, 512)            512512    
_________________________________________________________________
dense_2 (Dense)              (None, 1, 512)            262656    
_________________________________________________________________
dense_3 (Dense)              (None, 1, 37)             18981     
Total params: 794,149
Trainable params: 794,149
Non-trainable params: 0
_________________________________________________________________


In [None]:
temp = temp.reshape((10000,1,1000))

In [None]:
batchsize = 64

In [None]:
optim = 'adam'

model.compile(loss='binary_crossentropy',
              optimizer=optim,
              metrics=["binary_accuracy"])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [None]:
np.array(y).reshape((-1,1,37)).shape

(63247, 1, 37)

In [None]:
model.fit(temp,y,batch_size=batchsize,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f3a204169d0>

In [None]:
encoder_model = Model(input = model.input,outputs=model.get_layer('dense_4').output)
coding = encoder_model.predict(temp)
np.save('codings',coding)

  """Entry point for launching an IPython kernel.


In [None]:
coding = np.load('codings.npy',allow_pickle=True)

In [None]:
# img_features = books['im']
item_ids = books.id
item_image_modality = ImageModality(features=y.reshape(10000,37), ids=item_ids, normalized=True)

ratio_split = RatioSplit(
    data=ratings,
    val_size = 0.1,
    test_size=0.1,
    rating_threshold=4.0,
    exclude_unknowns=True,
    item_image=item_image_modality,
    verbose=VERBOSE,
    seed=SEED,
)

rating_threshold = 4.0
exclude_unknowns = True




---
Training data:
Number of users = 53000
Number of items = 10000
Number of ratings = 783923
Max rating = 5.0
Min rating = 1.0
Global mean = 3.9




---
Test data:
Number of users = 31297
Number of items = 9999
Number of ratings = 97682
Number of unknown users = 0
Number of unknown items = 0




---
Validation data:
Number of users = 31241
Number of items = 9999
Number of ratings = 97717
---
Total users = 53000
Total items = 10000


In [None]:
K = 150
vbpr = VBPR(k=300, k2=37, n_epochs=5, batch_size=1024, learning_rate=0.003, lambda_w=1, lambda_b=0.05, lambda_e=0.5, use_gpu=True)

cornac.Experiment(eval_method=ratio_split, models=[vbpr], metrics=[auc,rmse,ncrr_20,ndcg_20,pre_20,rec_20]).run()


[VBPR] Training started!


HBox(children=(FloatProgress(value=0.0, description='Epoch 1/5', max=766.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Epoch 2/5', max=766.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Epoch 3/5', max=766.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Epoch 4/5', max=766.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Epoch 5/5', max=766.0, style=ProgressStyle(description_wi…


Optimization finished!

[VBPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=97682.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=31297.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Rating', max=97717.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=31241.0, style=ProgressStyle(description_wi…



VALIDATION:
...
     |   RMSE |    AUC | NCRR@20 | NDCG@20 | Precision@20 | Recall@20 | Time (s)
---- + ------ + ------ + ------- + ------- + ------------ + --------- + --------
VBPR | 1.5250 | 0.9138 |  0.0268 |  0.0525 |       0.0129 |    0.1333 |  74.9313

TEST:
...
     |   RMSE |    AUC | NCRR@20 | NDCG@20 | Precision@20 | Recall@20 | Train (s) | Test (s)
---- + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
VBPR | 1.5234 | 0.9143 |  0.0272 |  0.0534 |       0.0133 |    0.1352 |  124.3099 |  75.9482



In [None]:
cornac.Experiment(eval_method=ratio_split, models=[vbpr], metrics=[auc,rmse,ncrr_20,ndcg_20,pre_20,rec_20]).run()


[VBPR] Training started!


HBox(children=(FloatProgress(value=0.0, description='Epoch 1/5', max=766.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Epoch 2/5', max=766.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Epoch 3/5', max=766.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Epoch 4/5', max=766.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Epoch 5/5', max=766.0, style=ProgressStyle(description_wi…


Optimization finished!

[VBPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=97682.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=31297.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Rating', max=97717.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=31241.0, style=ProgressStyle(description_wi…



VALIDATION:
...
     |   RMSE |    AUC | NCRR@20 | NDCG@20 | Precision@20 | Recall@20 | Time (s)
---- + ------ + ------ + ------- + ------- + ------------ + --------- + --------
VBPR | 1.5043 | 0.9212 |  0.0256 |  0.0513 |       0.0127 |    0.1328 |  73.7122

TEST:
...
     |   RMSE |    AUC | NCRR@20 | NDCG@20 | Precision@20 | Recall@20 | Train (s) | Test (s)
---- + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
VBPR | 1.5051 | 0.9212 |  0.0266 |  0.0528 |       0.0131 |    0.1353 |  123.8354 |  75.8416



In [None]:
K = 100
vbpr = VBPR(k=K, k2=K, n_epochs=25, batch_size=1024, learning_rate=0.003, lambda_w=1, lambda_b=0.05, lambda_e=0.0, use_gpu=True)

cornac.Experiment(eval_method=ratio_split, models=[vbpr], metrics=[auc,rmse,ncrr_20,ndcg_20,pre_20,rec_20]).run()


[VBPR] Training started!


HBox(children=(FloatProgress(value=0.0, description='Epoch 1/25', max=766.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 2/25', max=766.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 3/25', max=766.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 4/25', max=766.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 5/25', max=766.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 6/25', max=766.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 7/25', max=766.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 8/25', max=766.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 9/25', max=766.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 10/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 11/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 12/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 13/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 14/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 15/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 16/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 17/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 18/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 19/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 20/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 21/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 22/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 23/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 24/25', max=766.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 25/25', max=766.0, style=ProgressStyle(description_…


Optimization finished!

[VBPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=97682.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=31297.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Rating', max=97717.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=31241.0, style=ProgressStyle(description_wi…



VALIDATION:
...
     |   RMSE |    AUC | NCRR@20 | NDCG@20 | Precision@20 | Recall@20 | Time (s)
---- + ------ + ------ + ------- + ------- + ------------ + --------- + --------
VBPR | 1.4264 | 0.9276 |  0.0313 |  0.0573 |       0.0145 |    0.1389 |  68.8151

TEST:
...
     |   RMSE |    AUC | NCRR@20 | NDCG@20 | Precision@20 | Recall@20 | Train (s) | Test (s)
---- + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
VBPR | 1.4322 | 0.9268 |  0.0307 |  0.0571 |       0.0146 |    0.1395 |  524.9002 |  72.8996



In [None]:
vbpr.save('\model')

VBPR model is saved to \model/VBPR/2021-07-16_16-24-00-218423.pkl


'\\model/VBPR/2021-07-16_16-24-00-218423.pkl'

## Image as output

In [None]:
from keras.layers import Dense, Flatten, Reshape, Input, InputLayer
from keras.models import Sequential, Model

def build_autoencoder(img_shape, code_size):
    # The encoder
    encoder = Sequential()
    encoder.add(InputLayer(img_shape))
    encoder.add(Flatten())
    encoder.add(Dense(code_size))

    # The decoder
    decoder = Sequential()
    decoder.add(InputLayer((code_size,)))
    decoder.add(Dense(np.prod(img_shape))) # np.prod(img_shape) is the same as 32*32*3, it's more generic than saying 3072
    decoder.add(Reshape(img_shape))

    return encoder, decoder

Using TensorFlow backend.


In [None]:
n_train = 10000
train_X = np.asarray([tmp[i].reshape((-1,1)) for i in range(n_train)])

In [None]:
X =train_X.reshape((10000,32,32,3))

In [None]:
# Same as (32,32,3), we neglect the number of instances from shape
IMG_SHAPE = X.shape[1:]
encoder, decoder = build_autoencoder((32,32,3), 64)

inp = Input(IMG_SHAPE)
code = encoder(inp)
reconstruction = decoder(code)

autoencoder = Model(inp,reconstruction)
autoencoder.compile(optimizer='adamax', loss='mse')

print(autoencoder.summary())

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
sequential_1 (Sequential)    (None, 64)                196672    
_________________________________________________________________
sequential_2 (Sequential)    (None, 32, 32, 3)         199680    
Total params: 396,352
Trainable params: 396,352
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
# result
cornac.Experiment(eval_method=ratio_split, models=[vbpr], metrics=[auc,rmse,ncrr_10,ndcg_10,pre_10,rec_10]).run()


[VBPR] Training started!


HBox(children=(FloatProgress(value=0.0, description='Epoch 1/10', max=6125.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 2/10', max=6125.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 3/10', max=6125.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 4/10', max=6125.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 5/10', max=6125.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 6/10', max=6125.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 7/10', max=6125.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 8/10', max=6125.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 9/10', max=6125.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Epoch 10/10', max=6125.0, style=ProgressStyle(description…


Optimization finished!

[VBPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=195359.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=40071.0, style=ProgressStyle(description_wi…



TEST:
...
     |   RMSE |    AUC | NCRR@20 | NDCG@20 | Precision@20 | Recall@20 | Train (s) | Test (s)
---- + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
VBPR | 1.9156 | 0.9221 |  0.0389 |  0.0615 |       0.0183 |    0.1268 |  525.4130 |  89.7837

