-
Notifications
You must be signed in to change notification settings - Fork 3
/
main.py
181 lines (159 loc) · 9.14 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import numpy as np
import jieba
import random
import multiprocessing
from tqdm import tqdm
import sys
import matplotlib.pyplot as plt
from load_data import *
# from data_preprocess.make_dataset import * # 制作数据集
from data_preprocess.generate_npy import *
from train_model import *
from evaluate import fx_calc_map_label
from utils import *
from compare import *
import pickle
from tensorflow.keras.callbacks import TensorBoard
np.set_printoptions(threshold=np.inf)
np.set_printoptions(suppress=True)
np.random.seed(1337) # For Reproducibility
sys.setrecursionlimit(1000000)
cpu_count = multiprocessing.cpu_count()
parent_path = os.path.dirname(__file__) # 获取当前文件路径
if __name__ == "__main__":
CD = False # 商品数据集
Fasion_200k = False # 因缺少硬件资源,所以未做实验,若切换此数据集需要将所有的38更改为5,You need to do it yourself
vocab_dim = 100 # 词向量的维度
n_iterations = 5 # ideally more..
n_exposures = 3 # 所有频数超过3的词语
window_size = 5
input_length = 25 # 输入序列的长度
max_len = 25 # 经过测试,每个句子的最大长度不超过21
num_classes = 38 # 类别总数
old_method = False # 选择旧方法进行训练
new_method = False # 选择新方法训练
draw = False
compared = True # 对比实验用
out_dim = 512
demonstration = True # 演示用
# weight_path = 'mnt/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
text_load_path = './data/text'
power_data_path = "./data/power_img_data_npy"
data_path = "./data/original_img_data_npy"
# 读入顺序,对应数据集每个类的顺序,同时对应文本的读取顺序
list_name = ['休闲裤', '半身裙', '女牛仔外套', '女牛仔裤', '女衬衫', '女西装', '文胸套装', '无帽卫衣', '棉衣棉服', '毛呢大衣',
'皮草', '睡袍', '背心吊带', '渔夫帽', '鸭舌帽', '卫衣', '棉衣', '牛仔外套', '牛仔裤', '短袖T恤', '衬衫', '西装',
'风衣', '马甲', '单肩包', '双肩包', '手提包', '腰包', '钱包', '吊坠', '戒指', '手镯', '中长靴', '商务鞋', '板鞋', '运动鞋', '雪地靴', '高跟鞋']
# generate_npy(use_power_dataset=True) # 生成npy格式图像数据和标签
text_pre, t_label, tt_label = get_loader(text_load_path, list_name) # 纯文本, 数据集标签, 增强数据集标签
# 文本分词
text_after = [jieba.lcut(document.replace('\n', '')) for document in text_pre]
model = Word2Vec(size=vocab_dim, # 建立一个空的模型对象,设置词向量的维度为100
min_count=n_exposures,
window=window_size,
workers=cpu_count,
iter=n_iterations)
# text_data原文本数据(词向量的索引), power_text_data增强文本数据(词向量的索引)
w2indx, w2vec, text_data, power_text_data = text_w2model(model, text_after, max_len)
print('You will succeed...')
n_symbols, embedding_weights = get_data(w2indx, w2vec, vocab_dim)
# 需要进行对比试验吗
if compared:
text_train, text_test, image_train, image_test, train_onehot_label, test_onehot_label = \
load_data_set(data_path, power_data_path, text_data, power_text_data,
t_label, tt_label)
if demonstration:
Image_Only(test_onehot_label)
Text_Only(test_onehot_label)
cross_loss(test_onehot_label)
Ours(test_onehot_label)
else:
word_vec_test_mAP(text_test, vocab_dim, n_symbols, embedding_weights, input_length, test_onehot_label)
txt_result(text_train, text_test, train_onehot_label, test_onehot_label,
vocab_dim, n_symbols, embedding_weights, input_length)
img_result(text_data, t_label)
if old_method:
# 训练集和测试集的划分
text_train, text_test, image_train, image_test, train_onehot_label, test_onehot_label = \
load_data_set(data_path, power_data_path, text_data, power_text_data,
t_label, tt_label, use_power_data=False)
# 构建多模态模型
multi_model = MultiModel(vocab_dim, n_symbols, embedding_weights, input_length, out_dim)
# run_eagerly指示模型是否应急切运行的可设置属性,这对于自定义的损失函数和张量的流动很有用
# 急切地运行意味着您的模型将逐步运行,就像 Python 代码一样。您的模型可能运行得较慢,但您应该更容易通过进入各个层调用来调试它。
# 默认情况下,我们会尝试将您的模型编译为静态图以提供最佳执行性能
# 默认为False. 如果True,thisModel的逻辑将不会被包装在 a 中tf.function。建议将其保留为None除非您Model无法在 tf.function. 使用时不支持
multi_model.compile(loss=Myloss(), optimizer='adam', run_eagerly=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min')
history = multi_model.fit([image_train, text_train], train_onehot_label, batch_size=64, epochs=3,
validation_split=0.3,
verbose=1, callbacks=[early_stopping])
if new_method:
# 训练集:测试集 = 8:2
# 训练集:验证集 = 9:1
if CD:
imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \
Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot = \
generate_method(LoadData(), text_data, t_label)
train_size = len(imgTrain)
val_size = len(imgVal)
multi_model = MultiModel(vocab_dim, n_symbols, embedding_weights, input_length, out_dim)
multi_model.compile(loss=Myloss(), optimizer='adam', metrics=[class_metric], run_eagerly=True)
Tensorboard = TensorBoard(log_dir="./model", histogram_freq=1, write_grads=True)
if Fasion_200k:
from fasion_utils import *
vocab_dim = 500
input_length = 15
num_classes = 5
img_path, text_data, label, n_symbols, embedding_weights = initialization()
imgTrain, imgVal, txtTrain, txtVal, imgTest, txtTest, \
Train_label_one_hot, Val_label_one_hot, Tst_label_one_hot = \
generate_method(img_path, text_data, label, 0.3, 0.3, True)
multi_model = MultiModel(vocab_dim, n_symbols, embedding_weights, input_length, out_dim)
multi_model.compile(loss=Myloss(), optimizer='adam', metrics=[class_metric], run_eagerly=True)
Tensorboard = TensorBoard(log_dir="./model", histogram_freq=1, write_grads=True)
history = multi_model.fit(BatchGen(batch_size=3, image_path=imgTrain,
text=txtTrain, label=Train_label_one_hot),
steps_per_epoch=2, epochs=2,
validation_data=BatchGen(batch_size=1, image_path=imgVal,
text=txtVal, label=Val_label_one_hot),
validation_steps=2,verbose=1)
if CD:
early_stopping = EarlyStopping(monitor='val_loss', patience=0, verbose=2, mode='min')
history = multi_model.fit(batchGen(batch_size=3, image_path=imgTrain,
text=txtTrain, label=Train_label_one_hot),
steps_per_epoch=2, epochs=2,
validation_data=batchGen(batch_size=1, image_path=imgVal,
text = txtVal, label=Val_label_one_hot),validation_steps=2,
verbose=1, callbacks=[early_stopping])
# 提取测试集特征
if old_method:
multi_feature = multi_model.predict([imgTest, txtTest], batch_size=32)
label = tf.argmax(test_onehot_label, axis=1)
result = fx_calc_map_label(multi_feature[:,num_classes:], label)
print('...多模态图像检索 MAP = {}'.format(result))
if new_method:
Batch_size = 3
n = len(imgTest)
multi_feature = np.empty((n, out_dim), dtype=np.float32)
num = n // Batch_size + 1 # 为了防止内存溢出
for i in range(num):
start = i * Batch_size
end = (i + 1) * Batch_size
end = min(end, n)
print(f"正在进行{start} to {end}的预测...")
feature = multi_model.predict(Img_Txt(imgTest, txtTest, start, end))
multi_feature[start:end] = feature[:, num_classes:]
# mAP计算
label = tf.argmax(Tst_label_one_hot, axis=1)
result = fx_calc_map_label(multi_feature, label)
print('...多模态图像检索 MAP = {}'.format(result))
if draw:
# 绘制训练和验证的损失图像
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()