In [9]:
import cv2
import numpy as np
from keras.applications import VGG16
from keras.models import Sequential
from keras.layers import Dense, Flatten, AveragePooling2D, Dropout
from keras.optimizers import Adam
from keras.models import Model
from keras.utils import img_to_array, load_img

In [7]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

In [10]:
baseModel = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(512, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(2, activation="softmax")(headModel)
# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)
# loop over all layers in the base model and freeze them so they will
# *not* be updated during the training process
for layer in baseModel.layers:
	layer.trainable = False


In [11]:
num_classes = 2  # Two classes: 'positive' and 'negative'

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])


In [6]:
def extract_frames_and_predict(video_path, model, frame_interval=3, k=128):
    cap = cv2.VideoCapture(video_path)
    predictions_buffer = []
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_interval == 0:
            # grayscale conversion
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = face_cascade.detectMultiScale(frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
            # check for number of faces detected
            if len(faces) == 0:
                continue
            elif len(faces) == 1:
                (x, y, w, h) = faces[0]
            else:
                # keep the largest face
                (x, y, w, h) = max(faces, key=lambda rectangle: (rectangle[2] * rectangle[3]))
            face = frame[y:y+h, x:x+w]
            # resize face
            face = cv2.resize(face, (224, 224))
            face = img_to_array(face)
            face = np.expand_dims(face, axis=0)

            # make prediction
            prediction = model.predict(face)

            predictions_buffer.append(prediction)
            if len(predictions_buffer) > k:
                predictions_buffer.pop(0)

        frame_count += 1

    cap.release()
    cv2.destroyAllWindows()

    # Calculate the rolling average
    rolling_average = np.mean(predictions_buffer, axis=0)
    
    # Choose the label with the highest probability
    predicted_label = np.argmax(rolling_average)

    return predicted_label


# Example usage:
video_path = 'path_to_video.mp4'
predicted_label = extract_frames_and_predict(video_path, model)
print(f"Predicted Label: {'positive' if predicted_label == 0 else 'negative'}")


Predicted Label: positive


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
