-
Notifications
You must be signed in to change notification settings - Fork 126
/
rnn_one_hot.py
106 lines (80 loc) · 4.69 KB
/
rnn_one_hot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from __future__ import print_function
import numpy as np
import theano
import theano.tensor as T
import lasagne
import cPickle
import random
from time import time
import rnn_base as rnn
from sparse_lstm import *
class RNNOneHot(rnn.RNNBase):
"""RNNOneHot are recurrent neural networks that do not depend on the factorization: they are based on one-hot encoding.
The parameters specific to the RNNOneHot are:
diversity_bias: a float in [0, inf) that tunes how the cost function of the network is biased towards less seen movies.
In practice, the classification error given by the categorical cross-entropy is divided by exp(diversity_bias * popularity (on a scale from 1 to 10)).
This will reduce the error associated to movies with a lot of views, putting therefore more importance on the ability of the network to correctly predict the rare movies.
A diversity_bias of 0 produces the normal behavior, with no bias.
"""
def __init__(self, diversity_bias=0.0, regularization=0.0, **kwargs):
super(RNNOneHot, self).__init__(**kwargs)
self.diversity_bias = np.cast[theano.config.floatX](diversity_bias)
self.regularization = regularization
self.name = "RNN with categorical cross entropy"
def _get_model_filename(self, epochs):
'''Return the name of the file to save the current model
'''
filename = "rnn_cce_db"+str(self.diversity_bias)+"_r"+str(self.regularization)+"_"+self._common_filename(epochs)
return filename
def _prepare_networks(self, n_items):
''' Prepares the building blocks of the RNN, but does not compile them:
'''
self.n_items = n_items
# The input is composed of to parts : the on-hot encoding of the movie, and the features of the movie
self.l_in = lasagne.layers.InputLayer(shape=(self.batch_size, self.max_length, self._input_size()))
# The input is completed by a mask to inform the LSTM of the length of the sequence
self.l_mask = lasagne.layers.InputLayer(shape=(self.batch_size, self.max_length))
# recurrent layer
if not self.use_movies_features:
l_recurrent = self.recurrent_layer(self.l_in, self.l_mask, true_input_size=self.n_items + self._n_optional_features(), only_return_final=True)
else:
l_recurrent = self.recurrent_layer(self.l_in, self.l_mask, true_input_size=None, only_return_final=True)
# l_last_slice gets the last output of the recurrent layer
l_last_slice = l_recurrent
# l_last_slice = lasagne.layers.SliceLayer(l_recurrent, -1, 1)
# Theano tensor for the targets
target = T.ivector('target_output')
target_popularity = T.fvector('target_popularity')
self.exclude = T.fmatrix('excluded_items')
self.theano_inputs = [self.l_in.input_var, self.l_mask.input_var, target, target_popularity, self.exclude]
# The sliced output is then passed through linear layer to obtain the right output size
self.l_out = lasagne.layers.DenseLayer(l_last_slice, num_units=self.n_items, nonlinearity=lasagne.nonlinearities.softmax)
# lasagne.layers.get_output produces a variable for the output of the net
network_output = lasagne.layers.get_output(self.l_out)
# loss function
self.cost = (T.nnet.categorical_crossentropy(network_output, target) / target_popularity).mean()
if self.regularization > 0.:
self.cost += self.regularization * lasagne.regularization.l2(self.l_out.b)
# self.cost += self.regularization * lasagne.regularization.regularize_layer_params(self.l_out, lasagne.regularization.l2)
elif self.regularization < 0.:
self.cost -= self.regularization * lasagne.regularization.l1(self.l_out.b)
# self.cost -= self.regularization * lasagne.regularization.regularize_layer_params(self.l_out, lasagne.regularization.l1)
def _prepare_input(self, sequences):
''' Sequences is a list of [user_id, input_sequence, targets]
'''
batch_size = len(sequences)
# Shape return variables
X = np.zeros((batch_size, self.max_length, self._input_size()), dtype=self._input_type) # input of the RNN
mask = np.zeros((batch_size, self.max_length)) # mask of the input (to deal with sequences of different length)
Y = np.zeros((batch_size,), dtype='int32') # output target
pop = np.zeros((batch_size,)) # output target
exclude = np.zeros((batch_size, self.n_items), dtype=theano.config.floatX)
for i, sequence in enumerate(sequences):
user_id, in_seq, target = sequence
seq_features = np.array(map(lambda x: self._get_features(x, user_id), in_seq))
X[i, :len(in_seq), :] = seq_features # Copy sequences into X
mask[i, :len(in_seq)] = 1
Y[i] = target[0][0] # id of the first and only target
pop[i] = self.dataset.item_popularity[target[0][0]] ** self.diversity_bias
exclude[i, [j[0] for j in in_seq]] = 1
return (X, mask.astype(theano.config.floatX), Y, pop.astype(theano.config.floatX), exclude)