-
Notifications
You must be signed in to change notification settings - Fork 3
/
Create_sample.py
148 lines (118 loc) · 7.39 KB
/
Create_sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
from PIL import Image
import numpy as np
import utils
import traceback
anno_src = r"D:\ACelebA\Gen2.txt"
img_dir = r"D:\ACelebA\Gen_image2"
save_path = r"D:\ACelebA\Gen_image3"
for face_size in [48]:
print("gen %i image" % face_size)
# 样本图片存储路径
positive_image_dir = os.path.join(save_path, str(face_size), "positive") # D:\CelebA\48\positive
negative_image_dir = os.path.join(save_path, str(face_size), "negative") # D:\CelebA\48\negative
part_image_dir = os.path.join(save_path, str(face_size), "part") # D:\CelebA\48\part
# 如果文件夹不存在,就创建相应文件夹。
for image_dir in [positive_image_dir, negative_image_dir, part_image_dir]:
if not os.path.exists(image_dir):
os.makedirs(image_dir)
# 样本标签存储路径
positive_anno_filename = os.path.join(save_path, str(face_size), "positive.txt") # D:\CelebA\48\positive.txt
negative_anno_filename = os.path.join(save_path, str(face_size), "negative.txt") # D:\CelebA\48\negative.txt
part_anno_filename = os.path.join(save_path, str(face_size), "part.txt") # D:\CelebA\48\part.txt
positive_count = 0
negative_count = 0
part_count = 0
try:
positive_anno_file = open(positive_anno_filename, "w") # 打开正样本txt文件,开始准备写入标签文件
negative_anno_file = open(negative_anno_filename, "w") # # 打开负样本txt文件,开始准备写入标签文件
part_anno_file = open(part_anno_filename, "w") # 打开部分样本txt文件,开始准备写入标签文件
for i, line in enumerate(open(anno_src)): # 打开原始标签txt文件开始进行枚举, i表示索引第几行,line表示相应行的数据。
if i < 2: # 跳过前面两行数据
continue
try:
strs = line.strip().split(" ") # ['000001.jpg', '', '', '', '95', '', '71', '226', '313']
strs = list(filter(bool, strs)) # ['000001.jpg', '95', '71', '226', '313']
image_filename = strs[0].strip() # 000001.jpg
image_file = os.path.join(img_dir, image_filename) # D:\CelebA\img_celeba\000001.jpg
with Image.open(image_file) as img:
img_w, img_h = img.size # 得到图片的宽和高
# 得到标签框左上角和右下角的坐标值,并转为浮点型
x1 = int(strs[1].strip())
y1 = int(strs[2].strip())
w = int(strs[3].strip())
h = int(strs[4].strip())
x2 = int(x1 + w)
y2 = int(y1 + h)
if max(w, h) < 40 or x1 < 0 or y1 < 0 or w < 0 or h < 0:
continue # 将标签框不符合条件的跳过
boxes = [[x1, y1, x2, y2]] # 拿到标签框的四个坐标值,比如[[95, 71, 321, 384]]
# 计算出人脸中心点位置
cx = x1 + w / 2
cy = y1 + h / 2
# 生成正样本、部分样本、负样本(很少)
for _ in range(200):
# 让人脸中心点有少许的偏移
w_ = np.random.randint(int(-w * 0.2), int(w * 0.2))
h_ = np.random.randint(int(-h * 0.2), int(h * 0.2))
cx_ = cx + w_
cy_ = cy + h_
# 形成正方形边框,并且让坐标也有少许的偏离
side_len = np.random.randint(int(min(w, h) * 0.5), np.ceil(0.8 * max(w, h)))
x1_ = np.max(cx_ - side_len / 2, 0)
y1_ = np.max(cy_ - side_len / 2, 0)
x2_ = x1_ + side_len
y2_ = y1_ + side_len
crop_box = np.array([x1_, y1_, x2_, y2_]) # 裁剪框
# 计算坐标的偏移值:标签框的横坐标减去裁剪框的横坐标再除以裁剪框的边长
offset_x1 = (x1 - x1_) / side_len
offset_y1 = (y1 - y1_) / side_len
offset_x2 = (x2 - x2_) / side_len
offset_y2 = (y2 - y2_) / side_len
# 剪切下图片,并进行大小缩放
face_crop = img.crop(crop_box)
face_resize = face_crop.resize((face_size, face_size))
# print(utils.iou(crop_box, np.array(boxes))) # [0.21736549]
iou = utils.iou(crop_box, np.array(boxes))[0] # 裁剪框与标签框做IOU取零轴,得到0.2173654895529984
if iou > 0.65: # 正样本
positive_anno_file.write( # 往正样本txt文件写入信息
"positive/{0}.jpg {1} {2} {3} {4} {5}\n".format(
positive_count, 1, offset_x1, offset_y1,
offset_x2, offset_y2))
positive_anno_file.flush() # 释放内存
face_resize.save(os.path.join(positive_image_dir, "{0}.jpg".format(positive_count)))
positive_count += 1
elif 0.1 < iou < 0.2: # 部分样本
part_anno_file.write(
"part/{0}.jpg {1} {2} {3} {4} {5}\n".format(
part_count, 2, offset_x1, offset_y1, offset_x2,
offset_y2))
part_anno_file.flush()
face_resize.save(os.path.join(part_image_dir, "{0}.jpg".format(part_count)))
part_count += 1
elif iou < 0.05:
negative_anno_file.write(
"negative/{0}.jpg {1} 0 0 0 0\n".format(negative_count, 0))
negative_anno_file.flush()
face_resize.save(os.path.join(negative_image_dir, "{0}.jpg".format(negative_count)))
negative_count += 1
# 拿到标签框,用新的算法生成更多的负样本
_boxes = np.array(boxes)
for i in range(200):
side_len = np.random.randint(face_size, min(img_w, img_h) / 1.5)
x_ = np.random.randint(0, img_w - side_len)
y_ = np.random.randint(0, img_h - side_len)
crop_box = np.array([x_, y_, x_ + side_len, y_ + side_len])
if np.max(utils.iou(crop_box, _boxes)) < 0.01: # 裁剪框和标签框作比较
face_crop = img.crop(crop_box)
face_resize = face_crop.resize((face_size, face_size), Image.ANTIALIAS) # 防止图像变形
negative_anno_file.write("negative/{0}.jpg {1} 0 0 0 0\n".format(negative_count, 0))
negative_anno_file.flush()
face_resize.save(os.path.join(negative_image_dir, "{0}.jpg".format(negative_count)))
negative_count += 1
except Exception as e:
traceback.print_exc()
finally:
positive_anno_file.close()
negative_anno_file.close()
part_anno_file.close()