Face recognition in real time (using ar glass and unity and dlibdotnet) #301

seoyeon22 · 2023-06-20T12:51:31Z

Summary of issue

Hi,
I'm working on a project that does face recognition in real-time using ar glass(nreal light).
'shape_predictor_68_face_landmarks.dat' and 'dlib_face_recognition_resnet_model_v1.dat' are loaded successfully, but there seems to be a problem in the part where the operator of detector is called.
I tried two ways to use the camera texture in dlibdotnet.
First, I used nreal light's rgbcamera to get texture2d and converted it to Matrix type, but FrontalFaceDetector.Operator() doesn't seem to be applied.
Second, I used data of type byte[] and the result was same.

So my point is
1. I want to know if real-time face recognition is possible.
2. I wonder if data in byte[] format or Unity's texture2d format is available on dlibdotnet.

Any advice about this issue would be appreciated!

Environment

The ar glass model is nreal light and I am working in unity environment.
Also, android phone model is samsung galaxy s22.

Code:

using DlibDotNet;
using NRKernal;
using UnityEngine;
using System;
using UnityEngine.UI;
...

public class SpeakerRecognition : MonoBehaviour {

    public RawImage captureImage;
    public Button activeButton;

    private bool isActivated = false;
    private FrontalFaceDetector detector;
    private ShapePredictor sp;
    private DlibDotNet.Dnn.LossMetric net;
    private List<Matrix<float>> knownFaces = new List<Matrix<float>>();
    private List<string> knownNames = new List<string>();
    private List<float> faceDiffs = new List<float>();
    private NRRGBCamTexture camTexture;

    void Start()
    {
        detector = Dlib.GetFrontalFaceDetector();
        sp = ShapePredictor.Deserialize(Application.dataPath + "/Plugins/shape_predictor_68_face_landmarks.dat");
        net = DlibDotNet.Dnn.LossMetric.Deserialize(Application.dataPath + "/Plugins/dlib_face_recognition_resnet_model_v1.dat");

        LoadKnownFaces();

        camTexture = new NRRGBCamTexture();
        captureImage.texture = camTexture.GetTexture();
        
        activeButton.onClick.AddListener(ToggleActivation);
    }

    private IEnumerator RecognizeSpeaker() {
        // while camera is activated
        if(isActivated){
            // convert texture2d to matrix
            int width = camTexture.Width;
            int height = camTexture.Height;

            Matrix<RgbPixel> img = new Matrix<RgbPixel>(height, width);
            Color32[] pixels = camTexture.GetTexture().GetPixels32();

            for (int y = 0; y < height; y++)
            {
                for (int x = 0; x < width; x++)
                {
                    Color32 pixel = pixels[y * width + x];
                    RgbPixel rgbPixel = new RgbPixel(pixel.r, pixel.g, pixel.b);
                    img[y, x] = rgbPixel;
                }
            }

            var faces = new List<Matrix<RgbPixel>>();
            var shapes = new List<FullObjectDetection>();

            foreach(var face in detector.Operator(img))
            {
                var shape = sp.Detect(img, face);
                var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
                var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);
                
                shapes.Add(shape);
                faces.Add(faceChip);
            }

            var faceDescriptors = net.Operator(faces);
            for(int i = 0; i < faceDescriptors.Count; i++)
            {
                //
                // DrawFaceLandmarks(shapes[i]);
                //
                debugLog.text = "descripter " + i;
                faceDiffs.Clear();

                for(int j = 0; j < knownFaces.Count; j++)
                {
                    var diff = Dlib.Length(knownFaces[j] - faceDescriptors[i]);
                    faceDiffs.Add(diff);
                }
                var index = faceDiffs.IndexOf(minDiff);
                
                var matchRate = 1 / (1 + minDiff);
                if(matchRate > 0.5)
                {
                    ...
                }

            }
        }
        yield return null;
    }

    public void LoadKnownFaces() {
        debugLog.text = "load known faces";

        string relativePath = "NRSDK/Demos/OverTheGlass/Scripts/faces";
        string absolutePath = Application.dataPath + "/" + relativePath;
        foreach (var file in System.IO.Directory.GetFiles(absolutePath, "*.jpg"))
        {
            var img = Dlib.LoadImageAsMatrix<RgbPixel>(file);
            var faces = detector.Operator(img);

            if (faces.Any())
            {
                // Detect face
                var shape = sp.Detect(img, faces[0]);
                var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
                var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);

                // Get face descriptor
                var faceDescriptor = net.Operator(faceChip);

                // Add known face and name informations
                knownFaces.Add(faceDescriptor.First());
                knownNames.Add(System.IO.Path.GetFileNameWithoutExtension(file));
            }
            else
            {
                debugLog.text = "No face found in " + file;
            }
        }
        lipDiffs = new List<double>(knownFaces.Count);
        return;
    }

    public void ToggleActivation()
    {
        isActivated = !isActivated;

        if (isActivated)
        {
            if(camTexture == null)
            {
                camTexture = new NRRGBCamTexture();
                captureImage.texture = camTexture.GetTexture();
            }
            camTexture.Play();
            captureImage.texture = camTexture.GetTexture();
        
            StartCoroutine(RecognizeSpeaker());
        }
        else
        {
            camTexture?.Stop();
            camTexture = null;
            StopCoroutine(RecognizeSpeaker());
        }
    }
    void OnDestroy()
    {
        camTexture?.Stop();
        camTexture = null;
    }
}

using System.Collections.Generic;
using DlibDotNet;
using NRKernal;
using UnityEngine;
using System;
using System.Linq;
using UnityEngine.UI;
using System.Collections;
using OpenCvSharp;
using System.Runtime.InteropServices;

public class SpeakerRecognition : MonoBehaviour {

    public Text debugLog;
    public RawImage captureImage;
    public Text speakerName;
    public Button activeButton;
    public GameObject spherePrefab;

    private bool isActivated = false;
    private FrontalFaceDetector detector;
    private ShapePredictor sp;
    private DlibDotNet.Dnn.LossMetric net;
    private List<Matrix<float>> knownFaces = new List<Matrix<float>>();
    private List<string> knownNames = new List<string>();
    private List<LipMovement> movements = new List<LipMovement>();
    private (double, double) latestSpeakerPosition;
    private List<float> faceDiffs = new List<float>();
    private List<double> lipDiffs = new List<double>(); 
    private NRRGBCamTexture camTexture;
    private Texture2D mTex2d;

    void Start()
    {
        detector = Dlib.GetFrontalFaceDetector();
        sp = ShapePredictor.Deserialize(Application.dataPath + "/Plugins/shape_predictor_68_face_landmarks.dat");
        net = DlibDotNet.Dnn.LossMetric.Deserialize(Application.dataPath + "/Plugins/dlib_face_recognition_resnet_model_v1.dat");

        LoadKnownFaces();

        camTexture = new NRRGBCamTexture();
        captureImage.texture = camTexture.GetTexture();
        mTex2d = camTexture.GetTexture();
        
        activeButton.onClick.AddListener(ToggleActivation);
    }

    private IEnumerator RecognizeSpeaker() {
        // 카메라 작동 중일 때 계속 실행
        while(isActivated){
						if(!isActivated || camTexture == null)
            {
                break;
            }
            var temp = new Mat();

            // Get raw data from Texture2D
            var array = mTex2d.GetRawTextureData();
            Marshal.Copy(array, 0, temp.Data, array.Length);

            var cimg = Dlib.LoadImageData<BgrPixel>(temp.Data, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize()));

            var faces = new List<Matrix<RgbPixel>>();
            var shapes = new List<FullObjectDetection>();

            foreach(var face in detector.Operator(cimg))
            {
                var shape = sp.Detect(img, face);
                var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
                var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);

                shapes.Add(shape);
                faces.Add(faceChip);
						}
            
            var faceDescriptors = net.Operator(faces);
            for(int i = 0; i < faceDescriptors.Count; i++)
            {
                faceDiffs.Clear();

                for(int j = 0; j < knownFaces.Count; j++)
                {
                    var diff = Dlib.Length(knownFaces[j] - faceDescriptors[i]);
                    faceDiffs.Add(diff);
                }

                var minDiff = faceDiffs.Min();
                var index = faceDiffs.IndexOf(minDiff);
                
                var matchRate = 1 / (1 + minDiff);
                if(matchRate > 0.5)
                {
                    var eyeDistance = (shapes[i].GetPart(37) - shapes[i].GetPart(44)).Length;
                    var lipHeight = (shapes[i].GetPart(62) - shapes[i].GetPart(66)).Length;
                    var lipWidth = (shapes[i].GetPart(48) - shapes[i].GetPart(54)).Length;

                    (double, double) averageLength = movements[index].CheckMovement(lipWidth / eyeDistance * 100, lipHeight / eyeDistance * 100);
                    if(averageLength.Item1 > 2 || averageLength.Item2 > 2)
                    {
                        lipDiffs[index] = averageLength.Item1 + averageLength.Item2;
                    }
                }

            }
            speakerName.text = knownNames[lipDiffs.IndexOf(lipDiffs.Min())];
        }
        yield return null;
    }

    public void LoadKnownFaces() {
        debugLog.text = "load known faces";

        string relativePath = "NRSDK/Demos/OverTheGlass/Scripts/faces";
        string absolutePath = Application.dataPath + "/" + relativePath;
        foreach (var file in System.IO.Directory.GetFiles(absolutePath, "*.jpg"))
        {
            var img = Dlib.LoadImageAsMatrix<RgbPixel>(file);
            var faces = detector.Operator(img);

            if (faces.Any())
            {
                // Detect face
                var shape = sp.Detect(img, faces[0]);
                var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
                var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);

                // Get face descriptor
                var faceDescriptor = net.Operator(faceChip);

                // Add known face and name informations
                knownFaces.Add(faceDescriptor.First());
                knownNames.Add(System.IO.Path.GetFileNameWithoutExtension(file));
            }
            else
            {
                Debug.Log("No face found in " + file);
            }
        }
        lipDiffs = new List<double>(knownFaces.Count);
        return;
    }

    internal class LipMovement
    {
        private string name;
        private System.Collections.Generic.Queue<double> widthDiffs;
        private System.Collections.Generic.Queue<double> heightDiffs;
        private double prevHeight;
        private double prevWidth;

        public LipMovement(string name)
        {
            this.name = name;
            widthDiffs = new System.Collections.Generic.Queue<double>(3);
            heightDiffs = new System.Collections.Generic.Queue<double>(3);
            prevHeight = 0;
            prevWidth = 0;
        }

        public (double, double) CheckMovement(double width, double height)
        {
            heightDiffs.Enqueue(Math.Abs(prevHeight - height));
            widthDiffs.Enqueue(Math.Abs(prevWidth - width));

            List<double> widthNumbers = new List<double>(widthDiffs);
            List<double> heightNumbers = new List<double>(heightDiffs);

            double widthAverage = widthNumbers.Sum() / widthNumbers.Count;
            double heightAverage = heightNumbers.Sum() / heightNumbers.Count;

            prevHeight = height;
            prevWidth = width;

            return (Math.Round(widthAverage, 3), Math.Round(heightAverage, 3));
        }
    }

    public void ToggleActivation()
    {
        isActivated = !isActivated;

        if (isActivated)
        {
            if(camTexture == null)
            {
                camTexture = new NRRGBCamTexture();
                captureImage.texture = camTexture.GetTexture();
            }
            camTexture.Play();
            captureImage.texture = camTexture.GetTexture();
        
            StartCoroutine(RecognizeSpeaker());
        }
        else
        {
            camTexture?.Stop();
            camTexture = null;
            StopCoroutine(RecognizeSpeaker());
        }
    }
    void OnDestroy()
    {
        camTexture?.Stop();
        camTexture = null;
    }
}

Output:

I checked that the while statement, LoadKnownFaces() camera are running fine, but foreach(var face in detector.Opertaor(img)) doesn't seem to be running.

The text was updated successfully, but these errors were encountered:

seoyeon22 changed the title ~~Problems with detector operation (using ar glass and unity and dlibdotnet)~~ Face recognition in real time (using ar glass and unity and dlibdotnet) Jun 22, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Face recognition in real time (using ar glass and unity and dlibdotnet) #301

Face recognition in real time (using ar glass and unity and dlibdotnet) #301

seoyeon22 commented Jun 20, 2023 •

edited

Loading

Face recognition in real time (using ar glass and unity and dlibdotnet) #301

Face recognition in real time (using ar glass and unity and dlibdotnet) #301

Comments

seoyeon22 commented Jun 20, 2023 • edited Loading

Summary of issue

Environment

Code:

Output:

seoyeon22 commented Jun 20, 2023 •

edited

Loading