In [1]:
# python detect_mask_video.py

# import the necessary packages
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import argparse
import imutils
import time
import cv2
import os

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# to capture the image 
def detect_and_predict_mask(frame,faceNet,maskNet):
    # to get the dimension of the frame and to construct the blob
    (h,w)= frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame,1.0,(300,300),
                                 (104.0,177.0,123.0))
    
    #pass the blob through model and detect the face using faceNet
    faceNet.setInput(blob)
    detections = faceNet.forward()
    
    #initialize the list of face and the corresponding locatins and the list of predicitons from our face mask model
    
    faces =[]
    locs =[]
    preds =[]
    
    # loop over detections
    for i in range(0,detections.shape[2]):
        # extract the probability
        confidence = detections[0,0,i,2]
        
        #filter out weak detections by ensuring the confidence/probability value
        # (i.e) the confidence value is greater than the min conf value
        
        if confidence>args['confidence']:
            #compute the x and y coordinates of bounding box 
            
            box = detections[0,0,i,3:7]*np.array([w,h,w,h])
            (startX,startY,endX,endY)=box.astype("int")
            
            #ensure the box falls within the face dim
            (startX,startY) =(max(0,startX),max(0,startY))
            (endX,endY) = (min(w-1,endX),min(h-1,endY))
            face = frame[startY:endY,startX:endX]

            # Debugging statements
            print(f"Detection {i}: confidence={confidence}")
            print(f"Bounding box: startX={startX}, startY={startY}, endX={endX}, endY={endY}")
            print(f"Face shape: {face.shape}")

            if face.size == 0:
                print("Empty face image detected, skipping...")
                continue
            
            #extract the face ROI, convert it to BGR to RGB channel ordering, resize it to 224x224, and preprocess it
            # face = frame[startY:endY,startX:endX]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
            face = cv2.resize(face,(224,224))
            face = img_to_array(face)
            face = preprocess_input(face)
            
            # add the face and bounding box to the respective list
            faces.append(face)
            locs.append((startX,startY,endX,endY))
            
    # to give a condition to predict atleast one face was detected
    if len(faces)>0:
# for faster inference we'll make batch predictions on *all*
# faces at the same time rather than one-by-one predictions
# in the above `for` loop
        faces = np.array(faces,dtype ='float32')
        preds = maskNet.predict(faces,batch_size = 32)
        
    return (locs,preds)
        


In [4]:
# construct the argument parser and parse the arguments
# Construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-f", "--face", type=str, default="face_detector", help="path to face detector model directory")
# ap.add_argument("-m", "--model", type=str, default="mask_detector.h5", help="path to trained face mask detector model")
# ap.add_argument("-c", "--confidence", type=float, default=0.5, help="minimum probability to filter weak detections")
# args = vars(ap.parse_args())

args = {
    "face": "face_detector",
    "model": "mask_detector.h5",
    "confidence": 0.5
}


# Construct the absolute paths to the model files
project_root = os.path.abspath(os.path.dirname(r"S:\AI_RELATED\AI_PROJECTS\Face_Mask_Detection\face_detector"))
face_detector_directory = os.path.join(project_root, 'face_detector')
prototxtPath = os.path.join(face_detector_directory, "deploy.prototxt")
weightsPath = os.path.join(face_detector_directory, "res10_300x300_ssd_iter_140000.caffemodel")

print(f"Prototxt path: {prototxtPath}")
print(f"Weights path: {weightsPath}")

if not os.path.exists(prototxtPath):
    print(f"Error: {prototxtPath} not found.")
if not os.path.exists(weightsPath):
    print(f"Error: {weightsPath} not found.")

faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)
# Load the face mask detector model
from tensorflow.keras.models import load_model
maskNet = load_model(args["model"])




Prototxt path: S:\AI_RELATED\AI_PROJECTS\Face_Mask_Detection\face_detector\deploy.prototxt
Weights path: S:\AI_RELATED\AI_PROJECTS\Face_Mask_Detection\face_detector\res10_300x300_ssd_iter_140000.caffemodel
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [6]:
# Load the face mask detector model from disk
print("[INFO] loading face mask detector model...")
maskNet = load_model(args["model"])



[INFO] loading face mask detector model...


In [7]:
# pip install pyttsx3==2.90

In [8]:
# initialize the video stream and allow the camera sensor to warm up
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)



[INFO] starting video stream...


In [9]:
import pyttsx3

# Initialize the TTS engine
engine = pyttsx3.init()
engine.setProperty('rate', 150)  # Speed of speech
engine.setProperty('volume', 1.0)  # Volume (0.0 to 1.0)


# With Voice

In [10]:
import pyttsx3

# Initialize TTS engine
engine = pyttsx3.init()
engine.setProperty('rate', 150)
engine.setProperty('volume', 1.0)

# Track last spoken label
previous_label = None
cooldown_counter = 0  # Helps avoid repeating voice messages

# Start looping through video frames
while True:
    # Read and resize frame
    frame = vs.read()
    frame = imutils.resize(frame, width=400)

    # Detect faces and predict mask usage
    (locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)

    for (box, pred) in zip(locs, preds):
        (startX, startY, endX, endY) = box
        (mask, withoutMask) = pred

        label = "Mask" if mask > withoutMask else "No Mask"
        color = (0, 255, 0) if label == "Mask" else (0, 0, 255)
        msg = "Mask: You are allowed" if label == "Mask" else "No Mask: You are not allowed"

        # Draw rectangle and label
        cv2.putText(frame, msg, (startX, startY - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)

        # Text-to-speech logic
        if label != previous_label or cooldown_counter > 30:
            engine.say(msg)
            engine.runAndWait()
            previous_label = label
            cooldown_counter = 0
        else:
            cooldown_counter += 1

    # Show the frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # Press 'q' to quit
    if key == ord("q"):
        break

# Cleanup
cv2.destroyAllWindows()
vs.stop()  # use .stop() instead of .release() for VideoStream


Detection 0: confidence=0.9702900648117065
Bounding box: startX=175, startY=126, endX=265, endY=244
Face shape: (118, 90, 3)
Detection 0: confidence=0.9596977829933167
Bounding box: startX=171, startY=125, endX=261, endY=244
Face shape: (119, 90, 3)
Detection 0: confidence=0.9718539714813232
Bounding box: startX=169, startY=126, endX=262, endY=244
Face shape: (118, 93, 3)
Detection 0: confidence=0.9600735306739807
Bounding box: startX=171, startY=126, endX=261, endY=245
Face shape: (119, 90, 3)
Detection 0: confidence=0.9642112255096436
Bounding box: startX=171, startY=126, endX=262, endY=245
Face shape: (119, 91, 3)
Detection 0: confidence=0.95664381980896
Bounding box: startX=170, startY=125, endX=262, endY=244
Face shape: (119, 92, 3)
Detection 0: confidence=0.959155797958374
Bounding box: startX=170, startY=125, endX=262, endY=244
Face shape: (119, 92, 3)
Detection 0: confidence=0.9667173027992249
Bounding box: startX=170, startY=126, endX=262, endY=244
Face shape: (118, 92, 3)
Det

Detection 0: confidence=0.9708477258682251
Bounding box: startX=174, startY=121, endX=271, endY=250
Face shape: (129, 97, 3)
Detection 0: confidence=0.9362245202064514
Bounding box: startX=174, startY=120, endX=270, endY=247
Face shape: (127, 96, 3)
Detection 0: confidence=0.9169260263442993
Bounding box: startX=174, startY=120, endX=270, endY=246
Face shape: (126, 96, 3)
Detection 0: confidence=0.9036173820495605
Bounding box: startX=175, startY=120, endX=270, endY=247
Face shape: (127, 95, 3)
Detection 0: confidence=0.897260308265686
Bounding box: startX=174, startY=120, endX=270, endY=247
Face shape: (127, 96, 3)
Detection 0: confidence=0.9237420558929443
Bounding box: startX=174, startY=119, endX=270, endY=248
Face shape: (129, 96, 3)
Detection 0: confidence=0.9553943872451782
Bounding box: startX=174, startY=120, endX=270, endY=247
Face shape: (127, 96, 3)
Detection 0: confidence=0.9263185262680054
Bounding box: startX=175, startY=120, endX=269, endY=248
Face shape: (128, 94, 3)
D

Detection 0: confidence=0.9664467573165894
Bounding box: startX=159, startY=115, endX=249, endY=241
Face shape: (126, 90, 3)
Detection 0: confidence=0.9642346501350403
Bounding box: startX=160, startY=115, endX=250, endY=241
Face shape: (126, 90, 3)
Detection 0: confidence=0.9667760133743286
Bounding box: startX=159, startY=114, endX=250, endY=241
Face shape: (127, 91, 3)
Detection 0: confidence=0.9779900312423706
Bounding box: startX=158, startY=115, endX=249, endY=240
Face shape: (125, 91, 3)
Detection 0: confidence=0.9801265597343445
Bounding box: startX=157, startY=113, endX=249, endY=240
Face shape: (127, 92, 3)
Detection 0: confidence=0.9857017993927002
Bounding box: startX=157, startY=114, endX=247, endY=240
Face shape: (126, 90, 3)
Detection 0: confidence=0.9900221228599548
Bounding box: startX=154, startY=116, endX=246, endY=242
Face shape: (126, 92, 3)
Detection 0: confidence=0.990465521812439
Bounding box: startX=154, startY=117, endX=247, endY=240
Face shape: (123, 93, 3)
D

Detection 0: confidence=0.9541753530502319
Bounding box: startX=160, startY=116, endX=248, endY=240
Face shape: (124, 88, 3)
Detection 0: confidence=0.9644218683242798
Bounding box: startX=159, startY=120, endX=249, endY=239
Face shape: (119, 90, 3)
Detection 0: confidence=0.977055549621582
Bounding box: startX=159, startY=121, endX=249, endY=238
Face shape: (117, 90, 3)
Detection 0: confidence=0.9702892899513245
Bounding box: startX=158, startY=120, endX=250, endY=239
Face shape: (119, 92, 3)
Detection 0: confidence=0.9733712673187256
Bounding box: startX=159, startY=120, endX=250, endY=239
Face shape: (119, 91, 3)


# NO Voice

In [12]:
# loop over the frames from the video stream
while True:
    # grab the frame from the threaded video stream and resize it
    # to have a maximum width of 400 pixels
    frame = vs.read()
    frame = imutils.resize(frame, width=400)

    # detect faces in the frame and determine if they are wearing a
    # face mask or not
    (locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)

    # loop over the detected face locations and their corresponding
    # locations
    for (box, pred) in zip(locs, preds):
        # unpack the bounding box and predictions
        (startX, startY, endX, endY) = box
        (mask, withoutMask) = pred

        # determine the class label and color we'll use to draw
        # the bounding box and text
        label = "Mask" if mask > withoutMask else "No Mask"
        color = (0, 255, 0) if label == "Mask" else (0, 0, 255)

        # include the probability in the label
        #label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
        
        # display the label and bounding box rectangle on the output
        # frame
        if(label=="Mask"):    
            cv2.putText(frame,"Mask: You are allowed", (startX, startY - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
            cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        elif(label=="No Mask"):
            lab="No Mask: You are not allowed"
            cv2.putText(frame, lab, (startX, startY - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
            cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)

    # show the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# do a bit of cleanup
cv2.destroyAllWindows()

vs.release()

Detection 0: confidence=0.999160885810852
Bounding box: startX=138, startY=134, endX=199, endY=209
Face shape: (75, 61, 3)
Detection 0: confidence=0.9991210103034973
Bounding box: startX=138, startY=134, endX=198, endY=210
Face shape: (76, 60, 3)
Detection 0: confidence=0.9988546371459961
Bounding box: startX=139, startY=134, endX=198, endY=210
Face shape: (76, 59, 3)
Detection 0: confidence=0.9982299208641052
Bounding box: startX=139, startY=134, endX=199, endY=210
Face shape: (76, 60, 3)
Detection 0: confidence=0.9979614019393921
Bounding box: startX=139, startY=134, endX=199, endY=210
Face shape: (76, 60, 3)
Detection 0: confidence=0.99820876121521
Bounding box: startX=139, startY=134, endX=199, endY=210
Face shape: (76, 60, 3)
Detection 0: confidence=0.9986454844474792
Bounding box: startX=139, startY=134, endX=199, endY=210
Face shape: (76, 60, 3)
Detection 0: confidence=0.9987025260925293
Bounding box: startX=139, startY=134, endX=199, endY=210
Face shape: (76, 60, 3)
Detection 0

Detection 0: confidence=0.9999723434448242
Bounding box: startX=189, startY=116, endX=291, endY=254
Face shape: (138, 102, 3)
Detection 0: confidence=0.9999805688858032
Bounding box: startX=188, startY=115, endX=290, endY=254
Face shape: (139, 102, 3)
Detection 0: confidence=0.9999808073043823
Bounding box: startX=188, startY=113, endX=289, endY=249
Face shape: (136, 101, 3)
Detection 0: confidence=0.9999850988388062
Bounding box: startX=187, startY=113, endX=287, endY=249
Face shape: (136, 100, 3)
Detection 0: confidence=0.9999879598617554
Bounding box: startX=187, startY=112, endX=287, endY=249
Face shape: (137, 100, 3)
Detection 0: confidence=0.9999867677688599
Bounding box: startX=187, startY=112, endX=287, endY=248
Face shape: (136, 100, 3)
Detection 0: confidence=0.9999864101409912
Bounding box: startX=186, startY=112, endX=286, endY=249
Face shape: (137, 100, 3)
Detection 0: confidence=0.9999837875366211
Bounding box: startX=186, startY=111, endX=286, endY=247
Face shape: (136, 

Detection 0: confidence=0.9331687092781067
Bounding box: startX=162, startY=122, endX=220, endY=199
Face shape: (77, 58, 3)
Detection 0: confidence=0.9784107208251953
Bounding box: startX=155, startY=124, endX=216, endY=198
Face shape: (74, 61, 3)
Detection 0: confidence=0.9882110953330994
Bounding box: startX=150, startY=124, endX=215, endY=199
Face shape: (75, 65, 3)
Detection 0: confidence=0.9867488145828247
Bounding box: startX=147, startY=125, endX=208, endY=201
Face shape: (76, 61, 3)
Detection 0: confidence=0.9987748265266418
Bounding box: startX=146, startY=125, endX=207, endY=201
Face shape: (76, 61, 3)
Detection 0: confidence=0.9999171495437622
Bounding box: startX=149, startY=125, endX=209, endY=199
Face shape: (74, 60, 3)
Detection 0: confidence=0.9997894167900085
Bounding box: startX=157, startY=127, endX=211, endY=199
Face shape: (72, 54, 3)
Detection 0: confidence=0.9978899359703064
Bounding box: startX=161, startY=127, endX=215, endY=198
Face shape: (71, 54, 3)
Detectio

AttributeError: 'WebcamVideoStream' object has no attribute 'release'