-
Notifications
You must be signed in to change notification settings - Fork 0
/
data2array.py
121 lines (98 loc) · 4.4 KB
/
data2array.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from keras.preprocessing.image import load_img, img_to_array
import pandas as pd
import pickle
import os
base_path = 'D:/lyb/'
# base_path = '/Users/mahaoyang/Downloads/'
def data2array_b(path):
with open(path + 'DatasetA_train_20180813/label_list.txt', 'r') as f:
label_list = dict()
label_map = []
for line in f:
line = line.strip('\n').split('\t')
label_list[line[0]] = line[1]
label_map.append(line[0])
print('label_list', len(label_list))
with open(path + 'DatasetA_train_20180813/class_wordembeddings.txt', 'r') as f:
class_wordembeddings = dict()
for i in f.readlines():
ii = i.strip('\n').split(' ')
class_wordembeddings[ii[0]] = ii[1:]
print('class_wordembeddings', len(class_wordembeddings))
with open(path + 'DatasetA_train_20180813/attributes_per_class.txt', 'r') as f:
attributes_per_class = dict()
for i in f.readlines():
ii = i.strip('\n').split('\t')
attributes_per_class[ii[0]] = ii[1:]
print('attributes_per_class', len(attributes_per_class))
with open(path + 'DatasetA_train_20180813/attribute_list.txt', 'r') as f:
attribute_list = dict()
for i in f.readlines():
ii = i.strip('\n').split('\t')
attribute_list[int(ii[0])] = ii[1:]
print('attribute_list', len(attribute_list))
if not os.path.exists('train_list.pickle'):
with open(path + 'DatasetA_train_20180813/train.txt', 'r') as f:
train_list = dict()
for line in f:
line = line.strip('\n').split('\t')
train_list[line[0]] = dict()
train_list[line[0]]['label'] = line[1]
train_list[line[0]]['label_array'] = label_map.index(line[1])
for img in train_list:
pic = load_img(path + 'DatasetA_train_20180813/train/' + img, target_size=(64, 64))
pic = img_to_array(pic)
pic = pic.reshape((pic.shape[0], pic.shape[1], pic.shape[2]))
train_list[img]['img_array'] = pic
for i in train_list:
label = train_list[i]['label']
train_list[i]['label_real_name'] = label_list[label]
train_list[i]['label_real_name_class_wordembeddings'] = class_wordembeddings[label_list[label]]
train_list[i]['label_attribute'] = attributes_per_class[label]
with open('train_list.pickle', 'wb') as f:
pickle.dump(train_list, f)
else:
with open('train_list.pickle', 'rb') as f:
train_list = pickle.load(f)
print('train_list', len(train_list))
if not os.path.exists('test_list.pickle'):
with open(path + 'DatasetA_test_20180813/DatasetA_test/image.txt', 'r') as f:
test_list = dict()
for line in f:
line = line.strip('\n').split('\t')
test_list[line[0]] = dict()
for img in test_list:
pic = load_img(path + 'DatasetA_test_20180813/DatasetA_test/test/' + img, target_size=(64, 64))
pic = img_to_array(pic)
pic = pic.reshape((pic.shape[0], pic.shape[1], pic.shape[2]))
test_list[img]['img_array'] = pic
with open('test_list.pickle', 'wb') as f:
pickle.dump(test_list, f)
else:
with open('test_list.pickle', 'rb') as f:
test_list = pickle.load(f)
print('test_list', len(test_list))
test_list_name, test_list_array = [], []
for i in test_list:
test_list_name.append(i)
test_list_array.append(test_list[i]['img_array'])
data = {'label_list': label_list, 'label_map': label_map, 'train_list': train_list,
'attributes_per_class': attributes_per_class, 'attribute_list': attribute_list,
'class_wordembeddings': class_wordembeddings, 'test_list': test_list, }
reverse_label_list = {v: k for k, v in data['label_list'].items()}
data['reverse_label_list'] = reverse_label_list
data['test_list_name'] = test_list_name
data['test_list_array'] = test_list_array
# data = pd.DataFrame(data)
return data
def data2array(path):
if os.path.exists('data.pickle'):
with open('data.pickle', 'rb') as f:
data = pickle.load(f)
else:
data = data2array_b(path=path)
with open('data.pickle', 'wb') as f:
pickle.dump(data, f)
return data
if __name__ == '__main__':
data2array(base_path)