forked from zbystd/test
-
Notifications
You must be signed in to change notification settings - Fork 0
/
function.py
190 lines (161 loc) · 10.6 KB
/
function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import tensorflow as tf
from load import load_data
import pickle
# 调用load.py, 加载数据并进行预处理
# 加载数据
title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = load_data()
# print(title_count)
# print(title_set)
# print(genres2int)
# print(features)
# print(targets_values)
# print(ratings)
# print(movies)
# print(movies_orig)
sentences_size = title_count # 电影名称的长度
embed_dim = 32 # 嵌入矩阵的维度
num_epochs = 10 # 迭代次数
batch_size = 256 # batch的大小
dropout_keep = 0.5 # dropout比例
learning_rate = 0.0001 # 学习率
save_dir = './model/save' # 生成模型的保存路径
# 用户信息
user_gender = {'M':'男性','F':'女性'}
user_age = {1:"Under 18",18: "18-24",25: "25-34",35: "35-44",45: "45-49", 50: "50-55",56: "56+"}
user_occupation = {0: "other" , 1: "academic/educator",2: "artist",
3: "clerical/admin",4: "college/grad student",5: "customer service",
6: "doctor/health care",7: "executive/managerial",8: "farmer",
9: "homemaker",10: "K-12 student",11: "lawyer",
12: "programmer",13: "retired",14: "sales/marketing",
15: "scientist",16: "self-employed",17: "technician/engineer",
18: "tradesman/craftsman",19: "unemployed",20: "writer"}
# 获取输入信息
def get_inputs():
uid = tf.placeholder(tf.int32, [None, 1], name="uid")
user_gender = tf.placeholder(tf.int32, [None, 1], name="user_gender")
user_age = tf.placeholder(tf.int32, [None, 1], name="user_age")
user_job = tf.placeholder(tf.int32, [None, 1], name="user_job")
movie_id = tf.placeholder(tf.int32, [None, 1], name="movie_id")
movie_categories = tf.placeholder(tf.int32, [None, 18], name="movie_categories")
movie_titles = tf.placeholder(tf.int32, [None, 15], name="movie_titles")
targets = tf.placeholder(tf.int32, [None, 1], name="targets")
LearningRate = tf.placeholder(tf.float32, name="LearningRate")
dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
return uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, LearningRate, dropout_keep_prob
# 定义User的嵌入矩阵
def get_user_embedding(uid, user_gender, user_age, user_job):
with tf.name_scope("user_embedding"):
uid_max = max(features.take(0, 1)) + 1 # 用户ID数量
gender_max = max(features.take(2, 1)) + 1 # 性别类别数量
age_max = max(features.take(3, 1)) + 1 # 年龄类别数量
job_max = max(features.take(4, 1)) + 1 # 职业类别数量
uid_embed_matrix = tf.Variable(tf.random_uniform([uid_max, embed_dim], -1, 1), name="uid_embed_matrix")
uid_embed_layer = tf.nn.embedding_lookup(uid_embed_matrix, uid, name="uid_embed_layer")
gender_embed_matrix = tf.Variable(tf.random_uniform([gender_max, embed_dim // 2], -1, 1), name="gender_embed_matrix")
gender_embed_layer = tf.nn.embedding_lookup(gender_embed_matrix, user_gender, name="gender_embed_layer")
age_embed_matrix = tf.Variable(tf.random_uniform([age_max, embed_dim // 2], -1, 1), name="age_embed_matrix")
age_embed_layer = tf.nn.embedding_lookup(age_embed_matrix, user_age, name="age_embed_layer")
job_embed_matrix = tf.Variable(tf.random_uniform([job_max, embed_dim // 2], -1, 1), name="job_embed_matrix")
job_embed_layer = tf.nn.embedding_lookup(job_embed_matrix, user_job, name="job_embed_layer")
return uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer
# 将User的嵌入矩阵一起全连接生成User的特征
def get_user_feature_layer(uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer):
with tf.name_scope("user_fc"):
# 第一层全连接
uid_fc_layer = tf.layers.dense(uid_embed_layer, embed_dim, name="uid_fc_layer", activation=tf.nn.relu)
gender_fc_layer = tf.layers.dense(gender_embed_layer, embed_dim, name="gender_fc_layer", activation=tf.nn.relu)
age_fc_layer = tf.layers.dense(age_embed_layer, embed_dim, name="age_fc_layer", activation=tf.nn.relu)
job_fc_layer = tf.layers.dense(job_embed_layer, embed_dim, name="job_fc_layer", activation=tf.nn.relu)
# 第二层全连接
user_combine_layer = tf.concat([uid_fc_layer, gender_fc_layer, age_fc_layer, job_fc_layer], 2) # (?, 1, 128)
user_combine_layer = tf.contrib.layers.fully_connected(user_combine_layer, 200, tf.tanh) # (?, 1, 200)
user_combine_layer_flat = tf.reshape(user_combine_layer, [-1, 200])
return user_combine_layer, user_combine_layer_flat
# 将User的嵌入矩阵一起全连接生成User的特征
def get_movie_id_embed_layer(movie_id):
with tf.name_scope("movie_embedding"):
movie_id_max = max(features.take(1, 1)) + 1 # 电影ID数量
movie_id_embed_matrix = tf.Variable(tf.random_uniform([movie_id_max, embed_dim], -1, 1), name = "movie_id_embed_matrix")
movie_id_embed_layer = tf.nn.embedding_lookup(movie_id_embed_matrix, movie_id, name = "movie_id_embed_layer")
return movie_id_embed_layer
# 对电影类型的多个嵌入向量做加和
def get_movie_categories_layers(movie_categories):
with tf.name_scope("movie_categories_layers"):
movie_categories_max = max(genres2int.values()) + 1 # 电影类型个数
movie_categories_embed_matrix = tf.Variable(tf.random_uniform([movie_categories_max, embed_dim], -1, 1), name = "movie_categories_embed_matrix")
movie_categories_embed_layer = tf.nn.embedding_lookup(movie_categories_embed_matrix, movie_categories, name = "movie_categories_embed_layer")
movie_categories_embed_layer = tf.reduce_sum(movie_categories_embed_layer, axis=1, keepdims=True)
return movie_categories_embed_layer
# 对电影类型的多个嵌入向量做加法
def get_movie_cnn_layer(dropout_keep_prob, movie_titles):
# 从嵌入矩阵中得到电影名对应的各个单词的嵌入向量
with tf.name_scope("movie_embedding"):
movie_title_max = len(title_set) # 电影名称单词个数
movie_title_embed_matrix = tf.Variable(tf.random_uniform([movie_title_max, embed_dim], -1, 1), name="movie_title_embed_matrix")
movie_title_embed_layer = tf.nn.embedding_lookup(movie_title_embed_matrix, movie_titles, name="movie_title_embed_layer")
movie_title_embed_layer_expand = tf.expand_dims(movie_title_embed_layer, -1)
# 对文本嵌入层使用不同尺寸的卷积核做卷积和最大池化
pool_layer_lst = []
filter_num = 8 # 文本卷积核数量
window_sizes = {2, 3, 4, 5} # 文本卷积滑动窗口,可滑动2, 3, 4, 5个单词
for window_size in window_sizes:
with tf.name_scope("movie_txt_conv_maxpool_{}".format(window_size)):
filter_weights = tf.Variable(tf.truncated_normal([window_size, embed_dim, 1, filter_num], stddev=0.1), name="filter_weights")
filter_bias = tf.Variable(tf.constant(0.1, shape=[filter_num]), name="filter_bias")
conv_layer = tf.nn.conv2d(movie_title_embed_layer_expand, filter_weights, [1, 1, 1, 1], padding="VALID", name="conv_layer")
relu_layer = tf.nn.relu(tf.nn.bias_add(conv_layer, filter_bias), name="relu_layer")
maxpool_layer = tf.nn.max_pool(relu_layer, [1, sentences_size - window_size + 1, 1, 1], [1, 1, 1, 1], padding="VALID", name="maxpool_layer")
pool_layer_lst.append(maxpool_layer)
# Dropout层
with tf.name_scope("pool_dropout"):
pool_layer = tf.concat(pool_layer_lst, 3, name="pool_layer")
max_num = len(window_sizes) * filter_num
pool_layer_flat = tf.reshape(pool_layer, [-1, 1, max_num], name="pool_layer_flat")
dropout_layer = tf.nn.dropout(pool_layer_flat, dropout_keep_prob, name="dropout_layer")
return pool_layer_flat, dropout_layer
# 获取电影特征
def get_movie_feature_layer(movie_id_embed_layer, movie_categories_embed_layer, dropout_layer):
with tf.name_scope("movie_fc"):
# 第一层全连接
movie_id_fc_layer = tf.layers.dense(movie_id_embed_layer, embed_dim, name="movie_id_fc_layer", activation=tf.nn.relu)
movie_categories_fc_layer = tf.layers.dense(movie_categories_embed_layer, embed_dim, name="movie_categories_fc_layer", activation=tf.nn.relu)
# 第二层全连接
movie_combine_layer = tf.concat([movie_id_fc_layer, movie_categories_fc_layer, dropout_layer], 2) # (?, 1, 96)
movie_combine_layer = tf.contrib.layers.fully_connected(movie_combine_layer, 200, tf.tanh) # (?, 1, 200)
movie_combine_layer_flat = tf.reshape(movie_combine_layer, [-1, 200])
return movie_combine_layer, movie_combine_layer_flat
# 获取batch批处理
def get_batches(Xs, ys, batch_size):
for start in range(0, len(Xs), batch_size):
end = min(start + batch_size, len(Xs))
yield Xs[start:end], ys[start:end]
# 获取tensor
def get_tensors(loaded_graph):
uid = loaded_graph.get_tensor_by_name("uid:0")
user_gender = loaded_graph.get_tensor_by_name("user_gender:0")
user_age = loaded_graph.get_tensor_by_name("user_age:0")
user_job = loaded_graph.get_tensor_by_name("user_job:0")
movie_id = loaded_graph.get_tensor_by_name("movie_id:0")
movie_categories = loaded_graph.get_tensor_by_name("movie_categories:0")
movie_titles = loaded_graph.get_tensor_by_name("movie_titles:0")
targets = loaded_graph.get_tensor_by_name("targets:0")
dropout_keep_prob = loaded_graph.get_tensor_by_name("dropout_keep_prob:0")
lr = loaded_graph.get_tensor_by_name("LearningRate:0")
inference = loaded_graph.get_tensor_by_name("inference/ExpandDims:0")
movie_combine_layer_flat = loaded_graph.get_tensor_by_name("movie_fc/Reshape:0")
user_combine_layer_flat = loaded_graph.get_tensor_by_name("user_fc/Reshape:0")
return uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, inference, movie_combine_layer_flat, user_combine_layer_flat
# 保存模型参数
def save_params(params):
pickle.dump(params, open('./model/params.p', 'wb'))
# 加载模型参数
def load_params():
return pickle.load(open('./model/params.p', mode='rb'))
# 获取用户信息
def get_user_info(user):
info = []
info.append(user[0])
info.append(user_gender[user[1]])
info.append(user_age[user[2]])
info.append(user_occupation[user[3]])
return info