1. Install python then setup MTCNN and FaceNet. Deep-learning frameworks like tensorflow and pytorch are required to run MTCNN and FaceNet.

In [None]:

%pip install opencv-python
# https://github.com/timesler/facenet-pytorch
# In this repository, it also includs MTCNN for face detection prior to inference.
# Making this the fastest MTCNN implementation available.
%pip install facenet-pytorch
%pip install MTCNN
%pip install tensorflow


In [None]:
%pip install opencv-contrib-python

2. Initialize MTCNN and FaceNet Models

In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1

In [None]:


# If required, create a face detection pipeline using MTCNN:
mtcnn = MTCNN(image_size=576, margin=0)

# Create an inception resnet (in eval mode):
resnet = InceptionResnetV1(pretrained='vggface2').eval()

3. Process an image

In [None]:
from PIL import Image


img = Image.open(r"C:\Users\kamka\OneDrive\Desktop\CMP6200 Project\Facial Recognition using MTCNN and FaceNet\sample_data\Karhou2.jpg")

# Convert the image to RGB
img_rgb = img.convert("RGB")

# Get cropped and prewhitened image tensor
img_cropped = mtcnn(img_rgb, save_path=r"C:\Users\kamka\OneDrive\Desktop\CMP6200 Project\Facial Recognition using MTCNN and FaceNet\cropped_imgdata\Karhou2.jpg")

# Calculate embedding (unsqueeze to add batch dimension)
img_embedding = resnet(img_cropped.unsqueeze(0))

# Or, if using for VGGFace2 classification
resnet.classify = True
img_probs = resnet(img_cropped.unsqueeze(0))

4. Try with camera and make a bounding box around a face frame

In [None]:
%pip install opencv-python opencv-python-headless mtcnn
%pip install mtcnn
%pip install tensorflow
%pip install matplotlib
%pip install scikit-learn

Open Camera to video capture

In [1]:
import cv2

stream = cv2.VideoCapture(0)

if not stream.isOpened():
    print("No stream available")
    exit()
    
while (True):
    ret, frame = stream.read()
    if not ret:
        print("No stream available")
        break
        
    cv2.imshow("Webcam", frame)
    if cv2.waitKey(1) == ord('q'):
        break
        
stream.release()
cv2.destroyAllWindows()

In [None]:
import cv2

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades +
                    "haarcascade_frontalface_default.xml")
smile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades +
                    "haarcascade_smile.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades +
                    "haarcascade_eye.xml")

def detect_features(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
            # You can also specify minSize and maxSize
    for (x, y, w, h) in faces:
        frame = cv2.rectangle(frame, (x, y), (x+w, y+h),
                            color=(0, 255, 0), thickness=5)
        face = frame[y : y+h, x : x+w]
        gray_face = gray[y : y+h, x : x+w]
        smiles = smile_cascade.detectMultiScale(gray_face, 
                            2.5, minNeighbors=9)
        for (xp, yp, wp, hp) in smiles:
            face = cv2.rectangle(face, (xp, yp), (xp+wp, yp+hp),
                    color=(0, 0, 255), thickness=5)
        
        eyes = eye_cascade.detectMultiScale(gray_face, 
                    2.5, minNeighbors=7)
        for (xp, yp, wp, hp) in eyes:
            face = cv2.rectangle(face, (xp, yp), (xp+wp, yp+hp),
                    color=(255, 0, 0), thickness=5)
    
    return frame

stream = cv2.VideoCapture(0)

if not stream.isOpened():
    print("No stream :(")
    exit()

fps = stream.get(cv2.CAP_PROP_FPS)
width = int(stream.get(3))
height = int(stream.get(4))

# list of FourCC video codes: https://softron.zendesk.com/hc/en-us/articles/207695697-List-of-FourCC-codes-for-video-codecs
output = cv2.VideoWriter("assets/6_facial_detection.mp4",
            cv2.VideoWriter_fourcc('m', 'p', '4', 'v'),
            fps=fps, frameSize=(width, height))

while(True):
    ret, frame = stream.read()
    if not ret:
        print("No more stream :(")
        break
    
    frame = detect_features(frame)
    output.write(frame)
    cv2.imshow("Webcam!", frame)
    if cv2.waitKey(1) == ord('q'):
        break

stream.release()
cv2.destroyAllWindows() #!

In [None]:
import cv2
from mtcnn import MTCNN

# Initialize MTCNN for face detection
detector = MTCNN()

def detect_features(frame):
    # Convert frame to RGB (MTCNN expects RGB images)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Detect faces using MTCNN
    faces = detector.detect_faces(rgb_frame)
    
    # Draw rectangles around detected faces
    for face in faces:
        x, y, w, h = face['box']
        frame = cv2.rectangle(frame, (x, y), (x+w, y+h), color=(0, 255, 0), thickness=5)
        
        # Draw rectangles around detected eyes (optional)
        for key, value in face['keypoints'].items():
            frame = cv2.rectangle(frame, (value[0]-5, value[1]-5), (value[0]+5, value[1]+5), color=(255, 0, 0), thickness=2)
    
    return frame

stream = cv2.VideoCapture(0)

if not stream.isOpened():
    print("No stream :(")
    exit()

fps = stream.get(cv2.CAP_PROP_FPS)
width = int(stream.get(3))
height = int(stream.get(4))

# Define the codec and create VideoWriter object
output = cv2.VideoWriter("assets/6_facial_detection.mp4",
            cv2.VideoWriter_fourcc('m', 'p', '4', 'v'),
            fps=fps, frameSize=(width, height))

while(True):
    ret, frame = stream.read()
    if not ret:
        print("No more stream :(")
        break
    
    frame = detect_features(frame)
    output.write(frame)
    cv2.imshow("Webcam!", frame)
    if cv2.waitKey(1) == ord('q'):
        break

stream.release()
cv2.destroyAllWindows()


In [None]:
import cv2
from mtcnn import MTCNN
from facenet_pytorch import MTCNN as FaceNetMTCNN, InceptionResnetV1
from PIL import Image as PILImage  # Alias PIL's Image
from torchvision import transforms
from IPython.display import display, Image as IPyImage  # Alias IPython's Image
import io



# Initialize MTCNN from facenet_pytorch
mtcnn = FaceNetMTCNN()
facenet_model = InceptionResnetV1(pretrained='vggface2').eval()

# Load the sample picture
sample_image_path = r"C:\Users\kamka\OneDrive\Desktop\CMP6200 Project\Facial Recognition using MTCNN and FaceNet\cropped_imgdata\Karhou2.jpg"
sample_image = Image.open(sample_image_path)

# Convert the PIL Image to a tensor and normalize it
transform = transforms.Compose([
    transforms.Resize((160, 160)),  # Resize to the input size that FaceNet expects
    transforms.ToTensor(),  # This also scales pixel values to [0, 1]
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Adjust these values based on the expected normalization parameters for your model
])

sample_image_tensor = transform(sample_image).unsqueeze(0)  # Add batch dimension

# Extract face embeddings from the sample picture
sample_image_embedding = facenet_model(sample_image_tensor).detach().numpy()

# Define a threshold for face similarity
threshold = 0.6

def detect_and_match_faces(frame):
    # Detect faces using MTCNN
    boxes, _ = mtcnn.detect(frame)
    
    if boxes is not None:
        for box in boxes:
            # Extract face from the frame
            x1, y1, x2, y2 = map(int, box)
            face = frame[y1:y2, x1:x2]
            
            # Convert the face to a PIL Image and apply transforms
            face_image_pil = Image.fromarray(face)
            face_image_tensor = transform(face_image_pil).unsqueeze(0)  # Apply the same transformations
            
            # Extract face embedding from the detected face
            face_embedding = facenet_model(face_image_tensor).detach().numpy()

            # Calculate similarity between the embeddings
            similarity = cosine_similarity(sample_image_embedding, face_embedding)[0][0]
            
            # Display indications based on similarity
            if similarity >= threshold:
                cv2.putText(frame, "Match", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            else:
                cv2.putText(frame, "No match", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
    
    return frame

def show_image_in_notebook(image):
    is_success, buffer = cv2.imencode(".jpg", image)
    io_buf = io.BytesIO(buffer)
    display(Image(io_buf.read()))

stream = cv2.VideoCapture(0)

if not stream.isOpened():
    print("No stream :(")
    exit()

fps = stream.get(cv2.CAP_PROP_FPS)
width = int(stream.get(3))
height = int(stream.get(4))

# Define the codec and create VideoWriter object
output = cv2.VideoWriter("assets/6_facial_detection.mp4",
            cv2.VideoWriter_fourcc('m', 'p', '4', 'v'),
            fps=fps, frameSize=(width, height))

while True:
    ret, frame = stream.read()
    if not ret:
        print("No more stream :(")
        break
    
    frame = detect_and_match_faces(frame)
    output.write(frame)
    #cv2.imshow("Webcam", frame)
    show_image_in_notebook(frame)
    if cv2.waitKey(1) == ord('q'):
        break

stream.release()
cv2.destroyAllWindows()


In [1]:
import cv2
from facenet_pytorch import InceptionResnetV1, MTCNN
from PIL import Image as PILImage
from torchvision import transforms
from IPython.display import display, Image as IPyImage
import io
from sklearn.metrics.pairwise import cosine_similarity

# Initialize MTCNN for face detection 
mtcnn = MTCNN()
# Initialize FaceNet model
facenet_model = InceptionResnetV1(pretrained='vggface2').eval()

# Load the sample image and preprocess it
sample_image_path = r"C:\Users\kamka\OneDrive\Desktop\CMP6200 Project\Facial Recognition using MTCNN and FaceNet\cropped_imgdata\Karhou2.jpg"
sample_image = PILImage.open(sample_image_path)
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])
sample_image_tensor = transform(sample_image).unsqueeze(0)
sample_image_embedding = facenet_model(sample_image_tensor).detach().numpy()

# Define a threshold for face similarity
threshold = 0.6

# Define function for detect face and match face similarity
def detect_and_match_faces(frame):
    try:
        # Detect faces using MTCNN
        boxes, _ = mtcnn.detect(frame)

        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = map(int, box)
                face = frame[y1:y2, x1:x2]

                # Convert face to PIL Image and preprocess
                face_image_pil = PILImage.fromarray(face)
                face_image_tensor = transform(face_image_pil).unsqueeze(0)

                # Extract face embedding using FaceNet Model
                face_embedding = facenet_model(face_image_tensor).detach().numpy()

                # Calculate similarity
                similarity = cosine_similarity(sample_image_embedding, face_embedding)[0][0]

                # Display match indication based on similarity
                if similarity >= threshold:
                    cv2.putText(frame, "Match", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
                else:
                    cv2.putText(frame, "No match", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
    except Exception as e:
        print(f"Error processing frame: {e}")

    return frame

def show_image_in_notebook(image):
    is_success, buffer = cv2.imencode(".jpg", image)
    io_buf = io.BytesIO(buffer)
    display(IPyImage(data=io_buf.read(), format='jpg')) 

# Open video capture
stream = cv2.VideoCapture(0)

if not stream.isOpened():
    print("No stream :(")
    exit()

fps = stream.get(cv2.CAP_PROP_FPS)
width = int(stream.get(3))
height = int(stream.get(4))

# Define VideoWriter object
output = cv2.VideoWriter("assets/6_facial_detection.mp4", cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps=fps, frameSize=(width, height))

while True:
    ret, frame = stream.read()
    if not ret:
        print("No more stream :(")
        break

    frame = detect_and_match_faces(frame)
    output.write(frame)
    cv2.imshow("Webcam", frame)
    if cv2.waitKey(1) == ord('q'):
        break

stream.release()
cv2.destroyAllWindows()


  from .autonotebook import tqdm as notebook_tqdm


Error processing frame: tile cannot extend outside image
Error processing frame: tile cannot extend outside image
Error processing frame: tile cannot extend outside image
Error processing frame: tile cannot extend outside image
