-
Notifications
You must be signed in to change notification settings - Fork 0
/
lipsCoordExtraction.py
100 lines (76 loc) · 3.55 KB
/
lipsCoordExtraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 28 12:08:14 2020
@author: jose
"""
from __future__ import division
import cv2
import glob
import dlib
import json
from auxiliars.lipsExtraction import lips_segm_HOG
from auxiliars.faceDetection import detectFaceOpenCVDnn,detectFaceViolaJ
#NUMBER OF COORDINATES TO TRACK ON LIPS
NCOORDINATES = 12
if __name__ == "__main__" :
datasetMode = ["Normal","Silent","Whispered"]
predictor = dlib.shape_predictor("modelsFaceRecognition\shape_predictor_68_face_landmarks.dat")
modelFile = "modelsFaceRecognition\opencv_face_detector_uint8.pb"
configFile = "modelsFaceRecognition\opencv_face_detector.pbtxt"
net = cv2.dnn.readNetFromTensorflow(modelFile, configFile)
for mode in datasetMode:
text_filename = "LipsCoordinates_"+ mode +"_12coor_Phrases_ViolaJ.txt"
# CHANGE "Phrases to Digits to work with Digits dataset"
videos_path = r"AVSegmentedDataset\Phrases" +"\\"+ mode +"\*.mp4"
videos = glob.glob(videos_path)
currentVideo = 0
speakerNameList = []
speakerNameDict = {}
counter=0
for video in videos:
counter+=1
name_v = video.split('\\')
name_v = name_v[3].split('.')
currentVideo=+1
cam = cv2.VideoCapture(video)
if name_v[0] not in speakerNameDict:
speakerNameList.append(name_v[0])
speakerNameDict[name_v[0]]={}
currentframe = 0
c = 0
print(str(counter) + ' of ' + str(len(videos)) + '\n')
while(True):
# reading from frame
ret,frame = cam.read()
if ret:
##############################################################################
########################### DESCOMENTAR PARA USAR RED NEURONAL ###############
##############################################################################
# #Deteccion del rostro usando la red definida previamente
# outOpencvDnn, bboxes = detectFaceOpenCVDnn(net,frame)
# #Recorte del rostro de la imagen original
# for (x, y, w, h) in bboxes:
# f_image = frame[y:h,x:w]
##############################################################################
########################## DESCOMENTAR PARA USAR VIOLA JONES #################
##############################################################################
try:
f_image = detectFaceViolaJ(frame)
except:
# print("error")
continue
##############################################################################
########### EXTRACCION DE COORDENADAS SOBRE PARTE DEL ROSTRO #################
##############################################################################
roi,shape = lips_segm_HOG(f_image,predictor,NCOORDINATES)
speakerNameDict[name_v[0]][name_v[0]+'_' +str(currentframe)]=shape.tolist()
currentframe += 1
else:
break
# Release all space and windows once done
cam.release()
points_json = json.dumps(speakerNameDict)
f= open(text_filename,"w+")
f.write(points_json)
f.close()