In [1]:
from imutils import paths
import face_recognition
import argparse
import pickle
import cv2
import os


args = {}
args['dataset'] = "./images"
args['encodings'] = "./result"
args['detection_method'] = "cnn"

In [2]:


# 对指定路径下的图像人脸进行编码，并存入到磁盘
#　--dataset：  数据集的路径（利用search_bing_api.py创建的数据集）；
#　--encodings：面部编码将被写到该参数所指的文件中；
#　--detection-method：首先需要检测到图像中的面部，才能对其进行编码。两种面部检测方法为hog或cnn，因此该参数只接受这两个值
# ap = argparse.ArgumentParser()
# ap.add_argument("-i", "--dataset", required=True, help="path to input directory of faces + images")
# ap.add_argument("-e", "--encodings", required=True, help="path to serialized db of facial encodings")
# ap.add_argument("-d", "--detection-method", type=str, default="cnn", help="face detection model to use: either `hog` or `cnn`")
# args = vars(ap.parse_args())

args = {}
args['dataset'] = "./images"
args['encodings'] = "./result"
args['detection_method'] = "hog"


# 获取每一张图像的路径
imagePaths = list(paths.list_images(args["dataset"]))

# 把每一张图片的名称和编码放到两个数组中
knownEncodings = []
knownNames = []


# OpenCV中的颜色通道排列顺序为BGR，但dlib要求的顺序为RGB。对每一个图像进行编码。
for (i, imagePath) in enumerate(imagePaths):
    # 从文件的路径中获取图片的名称
    name = imagePath.split(os.path.sep)[-2]
    
    # 从文件的路径中获取图片的名称
    print(imagePath)
    image = cv2.imread(imagePath)
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     
    # 在图片中框出人脸，一幅图像可能有多张人脸
    boxes = face_recognition.face_locations(rgb, model=args["detection_method"])
     
    # 对图片中框处的每张人脸进行编码
    encodings = face_recognition.face_encodings(rgb, boxes)
     
    # 把编码结果放到数组中
    for encoding in encodings:
        knownEncodings.append(encoding)
        knownNames.append(name)
    
# 结果存到磁盘
data = {"encodings": knownEncodings, "names": knownNames}
f = open(args["encodings"], "wb")
f.write(pickle.dumps(data))
f.close()

./images/tonyx/810757517.jpg
./images/tonyx/1124417198.jpg
./images/tonyx/862179643.jpg
./images/tonyx/1009093441.jpg
./images/jack_ma/timg2.jpg
./images/jack_ma/timg1.jpg
./images/jack_ma/timg.jpg
./images/jack_ma/jackma.jpg


In [None]:
# 找出图像中的人脸，并进行识别
# --encodings：包含面部编码的pickle文件的路径；
# --image：    需要进行面部识别的图像；
# --detection-method：选择hog或cnn之一。追求速度的话就选择hog，追求准确度就选择cnn。
#ap = argparse.ArgumentParser()
#ap.add_argument("-e", "--encodings", required=True,　help="path to serialized db of facial encodings")
#ap.add_argument("-i", "--image", required=True,　help="path to input image")
#ap.add_argument("-d", "--detection-method", type=str, default="cnn",　help="face detection model to use: either `hog` or `cnn`")
#args = vars(ap.parse_args())

args["image"] = "test.jpg"

# 加载已经存储的人脸识别编码数据
data = pickle.loads(open(args["encodings"], "rb").read())

# 加载需要识别的图像，并转换为RGB通道
image = cv2.imread(args["image"])
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# 检测人脸的位置，并进行编码
boxes = face_recognition.face_locations(rgb, model=args["detection_method"])
encodings = face_recognition.face_encodings(rgb, boxes)

# 保存识别出的人脸的名称
names = []


# 尝试利用face_recognition.compare_faces将输入图像中的每个面部（encoding）对应到已知的编码数据集（保存在data["encodings"]中）上。
# 该函数会返回一个True/False值的列表，每个值对应于数据集中的一张图像。对于我们的侏罗纪公园的例子，数据集中有218张图像，因此返回的列表将包含218个布尔值。
# compare_faces函数内部会计算待判别图像的嵌入和数据集中所有面部的嵌入之间的欧几里得距离。
# 如果距离位于容许范围内（容许范围越小，面部识别系统就越严格），则返回True，表明面部吻合。否则，如果距离大于容许范围，则返回False表示面部不吻合。
for encoding in encodings:
    # 当前人脸的编码和库里边的所有的人脸编码进行比对，每一次比对都会返回True，False
    matches = face_recognition.compare_faces(data["encodings"], encoding)
    
    # 如果距离位于容许范围内（容许范围越小，面部识别系统就越严格），则返回True，表明面部吻合。否则，如果距离大于容许范围，则返回False表示面部不吻合。
    name = "Unknown"
    if True in matches:
        # 把匹配到的人脸的index筛选出来
        matchedIdxs = [i for (i, b) in enumerate(matches) if b]
        counts = {}
        # 查询出匹配到的人脸的名称
        for i in matchedIdxs:
            name = data["names"][i]
            counts[name] = counts.get(name, 0) + 1
        # 找到匹配度最高的人的名称，作为最终匹配结果
        name = max(counts, key=counts.get)

    # 添加识别出的人的名称
    names.append(name)


# 循环每个人的边界盒和名字，然后将名字画在输出图像上以供展示之用：
for ((top, right, bottom, left), name) in zip(boxes, names):
    # 画出检测到人脸的矩形框
    cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)
    
    # 展示人名，如果边界盒位于图像顶端，则将文本移到边界盒下方，否则文本就被截掉了。
    y = top - 15 if top - 15 > 15 else top + 15
    cv2.putText(image, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2)


# 展示输出结果
cv2.imshow("Image", image)
cv2.waitKey(0)

In [None]:
from imutils.video import VideoStream
import time
# 输入参数解析
# --output：视频输出路径；
# --display：指示是否将视频帧输出到屏幕的标志。1表示显示到屏幕，0表示不显示。
#ap = argparse.ArgumentParser()
#ap.add_argument("-e", "--encodings", required=True,　help="path to serialized db of facial encodings")
#ap.add_argument("-o", "--output", type=str,　help="path to output video")
#ap.add_argument("-y", "--display", type=int, default=1, help="whether or not to display output frame to screen")
#ap.add_argument("-d", "--detection-method", type=str, default="cnn",　help="face detection model to use: either `hog` or `cnn`")
#args = vars(ap.parse_args())
args["display"]=1
args["output"]="output.mp4"
args["detection-method"]="cnn"

# 加载已经保存的编码
print("[INFO] loading encodings...")
data = pickle.loads(open(args["encodings"], "rb").read())


# 初始化摄像头，启动视频流。如果系统中有多个摄像头（如内置摄像头和外置USB摄像头），可以将src=0改成src=1等，sleep两秒让摄像头预热。
print("[INFO] starting video stream...")

vs = VideoStream(src=0).start()
writer = None
time.sleep(2.0)


# 抓取视频帧并进行处理
while True:
    # grab the frame from the threaded video stream
    frame = vs.read()
    
    # convert the input frame from BGR to RGB then resize it to have
    # a width of 750px (to speedup processing)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    rgb = imutils.resize(frame, width=750)
    r = frame.shape[1] / float(rgb.shape[1])

    # detect the (x, y)-coordinates of the bounding boxes
    # corresponding to each face in the input frame, then compute
    # the facial embeddings for each face
    boxes = face_recognition.face_locations(rgb, model=args["detection_method"])
    encodings = face_recognition.face_encodings(rgb, boxes)
    names = []
    # loop over the facial embeddings
    for encoding in encodings:
        # attempt to match each face in the input image to our known
        # encodings
        matches = face_recognition.compare_faces(data["encodings"],  encoding)
        name = "Unknown"
        # check to see if we have found a match
        if True in matches:
            # find the indexes of all matched faces then initialize a
            # dictionary to count the total number of times each face
            # was matched
            matchedIdxs = [i for (i, b) in enumerate(matches) if b]
            counts = {}
            #loop over the matched indexes and maintain a count for each recognized face face
            for i in matchedIdxs:
                name = data["names"][i]
                counts[name] = counts.get(name, 0) + 1
            # determine the recognized face with the largest number
            # of votes (note: in the event of an unlikely tie Python
            # will select first entry in the dictionary)
            name = max(counts, key=counts.get)
        # update the list of names
        names.append(name)



    for ((top, right, bottom, left), name) in zip(boxes, names):
        # rescale the face coordinates
        top = int(top * r)
        right = int(right * r)
        bottom = int(bottom * r)
        left = int(left * r)
        # draw the predicted face name on the image
        cv2.rectangle(frame, (left, top), (right, bottom),(0, 255, 0), 2)
        y = top - 15 if top - 15 > 15 else top + 15
        cv2.putText(frame, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), 2)
        # 保存输出到硬盘，VideoWriter_fourcc。FourCC是一种四字符编码，在这里就是MJPG 四字符编码。
        # 接下来将对象、输出路径、每秒帧数的目标值和帧尺寸传递给VideoWriter（行5和6）。
        # if the video writer is None *AND* we are supposed to write
        # the output video to disk initialize the writer
        if writer is None and args["output"] is not None:
            fourcc = cv2.VideoWriter_fourcc(*"MJPG")
            writer = cv2.VideoWriter(args["output"], fourcc, 20,(frame.shape[1], frame.shape[0]), True)
 
    # if the writer is not None, write the frame with recognized
    # faces t odisk
    if writer is not None:
        writer.write(frame)


     # 把视频帧输出到屏幕
     # check to see if we are supposed to display the output frame to
    # the screen
    if args["display"] > 0:
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break


# 最后的一些清理工作
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()

# check to see if the video writer point needs to be released
if writer is not None:
    writer.release()

In [None]:
#from imutils.video import VideoStream
import time
import imutils

# 输入参数解析
# --output：视频输出路径；
# --display：指示是否将视频帧输出到屏幕的标志。1表示显示到屏幕，0表示不显示。
#ap = argparse.ArgumentParser()
#ap.add_argument("-e", "--encodings", required=True,　help="path to serialized db of facial encodings")
#ap.add_argument("-o", "--output", type=str,　help="path to output video")
#ap.add_argument("-y", "--display", type=int, default=1, help="whether or not to display output frame to screen")
#ap.add_argument("-d", "--detection-method", type=str, default="cnn",　help="face detection model to use: either `hog` or `cnn`")
#args = vars(ap.parse_args())
args["display"]=1
args["output"]="output.mp4"
args["test"]="./test.mp4"
args["detection-method"]="hog"

# 加载已经保存的编码
print("[INFO] loading encodings...")
data = pickle.loads(open(args["encodings"], "rb").read())

vs = cv2.VideoCapture(args["test"])

writer = None


# 抓取视频帧并进行处理
while True:
    # grab the frame from the threaded video stream
    frame = vs.read()
    frame = frame[1]
    if frame is None:
        break
    
    # convert the input frame from BGR to RGB then resize it to have
    # a width of 750px (to speedup processing)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    rgb = imutils.resize(frame, width=750)
    r = frame.shape[1] / float(rgb.shape[1])

    # detect the (x, y)-coordinates of the bounding boxes
    # corresponding to each face in the input frame, then compute
    # the facial embeddings for each face
    boxes = face_recognition.face_locations(rgb, model=args["detection_method"])
    encodings = face_recognition.face_encodings(rgb, boxes)
    names = []
    # loop over the facial embeddings
    for encoding in encodings:
        # attempt to match each face in the input image to our known
        # encodings
        matches = face_recognition.compare_faces(data["encodings"],  encoding)
        name = "Unknown"
        # check to see if we have found a match
        if True in matches:
            # find the indexes of all matched faces then initialize a
            # dictionary to count the total number of times each face
            # was matched
            matchedIdxs = [i for (i, b) in enumerate(matches) if b]
            counts = {}
            #loop over the matched indexes and maintain a count for each recognized face face
            for i in matchedIdxs:
                name = data["names"][i]
                counts[name] = counts.get(name, 0) + 1
            # determine the recognized face with the largest number
            # of votes (note: in the event of an unlikely tie Python
            # will select first entry in the dictionary)
            name = max(counts, key=counts.get)
        # update the list of names
        names.append(name)



    for ((top, right, bottom, left), name) in zip(boxes, names):
        # rescale the face coordinates
        top = int(top * r)
        right = int(right * r)
        bottom = int(bottom * r)
        left = int(left * r)
        # draw the predicted face name on the image
        cv2.rectangle(frame, (left, top), (right, bottom),(0, 255, 0), 2)
        y = top - 15 if top - 15 > 15 else top + 15
        cv2.putText(frame, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), 2)
        # 保存输出到硬盘，VideoWriter_fourcc。FourCC是一种四字符编码，在这里就是MJPG 四字符编码。
        # 接下来将对象、输出路径、每秒帧数的目标值和帧尺寸传递给VideoWriter（行5和6）。
        # if the video writer is None *AND* we are supposed to write
        # the output video to disk initialize the writer
        if writer is None and args["output"] is not None:
            fourcc = cv2.VideoWriter_fourcc(*"MJPG")
            writer = cv2.VideoWriter(args["output"], fourcc, 20,(frame.shape[1], frame.shape[0]), True)
 
    # if the writer is not None, write the frame with recognized
    # faces t odisk
    if writer is not None:
        writer.write(frame)


     # 把视频帧输出到屏幕
     # check to see if we are supposed to display the output frame to
    # the screen
    if args["display"] > 0:
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break


# 最后的一些清理工作
# do a bit of cleanup
cv2.destroyAllWindows()

# check to see if the video writer point needs to be released
if writer is not None:
    writer.release()

[INFO] loading encodings...
