# Import Libraries

In [12]:
#Import All the libraries used in this project

from tensorflow.keras.models import load_model
import cv2
import numpy as np
import pandas as pd
from collections import deque
from moviepy.editor import *
import time

# Declare Constants

In [13]:
# Specify the height and width,the number of frames of a video,directory containing the dataset,he list containing the names of the classes used for training.
IMAGE_HEIGHT , IMAGE_WIDTH = 64, 64
SEQUENCE_LENGTH = 30
DATASET_DIR = "Dataset"
CLASSES_LIST = ["fighting", "walking", "running"]

# Load Model

In [14]:
#Load the model that we have made.
model = load_model('HAD_LRCN_Model.h5')

# Prediction of Single Action

In [15]:
def predict_single_action(video_file_path, SEQUENCE_LENGTH):
 
    # Get the width and height of the video.
    video_reader = cv2.VideoCapture(video_file_path)
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Declare a list to store video frames and a variable to store the predicted.
    frames_list = []
    predicted_class_name = ''

    #number of frames in the video.
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH),1)
    
    for frame_counter in range(SEQUENCE_LENGTH):

        # Set the current frame position of the video.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
        success, frame = video_reader.read() 

        if not success:
            break

        
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
        normalized_frame = resized_frame / 255
        # Appending the pre-processed frame into the frames list
        frames_list.append(normalized_frame)
        
    predicted_labels_probabilities = model.predict(np.expand_dims(frames_list, axis = 0))[0]
    predicted_label = np.argmax(predicted_labels_probabilities)
    predicted_class_name = CLASSES_LIST[predicted_label]
    print(f'Action Predicted: {predicted_class_name}\nConfidence: {predicted_labels_probabilities[predicted_label]}') 
    video_reader.release()

In [16]:
predict_single_action("Predict/fight.avi",SEQUENCE_LENGTH)

Action Predicted: fighting
Confidence: 0.9998096823692322


In [17]:
predict_single_action("Predict/running.avi",SEQUENCE_LENGTH)

Action Predicted: walking
Confidence: 0.7878350019454956


In [18]:
predict_single_action("Predict/walking.avi",SEQUENCE_LENGTH)

Action Predicted: walking
Confidence: 0.993097722530365


# All Action Prediction in Video

In [19]:
#Load YOLO model and predict only person class
net = cv2.dnn.readNet('yolo-coco-data/yolov3.cfg','yolo-coco-data/yolov3.weights')

# Load COCO class labels
classes = []
with open("yolo-coco-data/coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

person_indices = [i for i, c in enumerate(classes) if c == "person"]




In [20]:
def predict_on_video(video_file_path, output_file_path, SEQUENCE_LENGTH,net,classes,person_indices):
 
  

    # Set threshold for detection confidence
    confidence_threshold = 0.5
    writer = None

    # Preparing variables for spatial dimensions of the frames
    h, w = None, None
    probability_minimum = 0.3

    # Setting threshold for filtering weak bounding boxes
    # with non-maximum suppression
    threshold = 0.3
    colours = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')

    f = 0
    t = 0
    # Initialize the VideoCapture object to read from the video file.
    video_reader = cv2.VideoCapture(video_file_path)
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'), 
                                   video_reader.get(cv2.CAP_PROP_FPS), (original_video_width, original_video_height))

    # Declare a queue to store video frames.
    frames_queue = deque(maxlen = SEQUENCE_LENGTH)
    # Initialize a variable to store the predicted action being performed in the video.
    predicted_class_name = ''

    # Iterate until the video is accessed successfully.
    while video_reader.isOpened():

        # Read the frame.
        ok, frame = video_reader.read() 
        
        # Check if frame is not read properly then break the loop.
        if not ok:
            break
            
        if w is None or h is None:
            h, w = frame.shape[:2]

        blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)
        start = time.time()

    # Forward pass through network
        outputs = net.forward(net.getUnconnectedOutLayersNames())
        end = time.time()
        boxes = []
        confidences = []
        class_ids = []

        for output in outputs:
            for detection in output:
                # Get class probabilities
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                # Filter out detections with confidence below threshold
                if confidence > confidence_threshold:
                    # Check if detected class is "person"
                    if class_id in person_indices:
                        # Get bounding box coordinates
                        box = detection[0:4] * np.array([frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]])
                        (centerX, centerY, width, height) = box.astype("int")

                        # Compute top-left corner of bounding box
                        x = int(centerX - (width / 2))
                        y = int(centerY - (height / 2))

                        # Add results to output lists
                        boxes.append([x, y, int(width), int(height)])
                        confidences.append(float(confidence))
                        class_ids.append(class_id)

        # Apply non-maximum suppression to remove overlapping boxes
        indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, 0.4)

        # Draw bounding boxes around detected objects
        if len(indices) > 0:
            for i in indices.flatten():
                (x, y) = (boxes[i][0], boxes[i][1])
                (w, h) = (boxes[i][2], boxes[i][3])
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                
                try:
                    

                    roi = frame[y:y+h, x:x+w]

                    # Resize the ROI to a fixed size
                    resized_roi = cv2.resize(roi, (IMAGE_HEIGHT, IMAGE_WIDTH))

                    # Normalize the resized ROI by dividing it with 255
                    normalized_roi = resized_roi / 255

                    # Append the pre-processed ROI into the frames list
                    frames_queue.append(normalized_roi)





                    # Check if the number of frames in the queue are equal to the fixed sequence length.
                    if len(frames_queue) == SEQUENCE_LENGTH:

                        # Pass the normalized frames to the model and get the predicted probabilities.
                        predicted_labels_probabilities = model.predict(np.expand_dims(frames_queue, axis = 0))[0]

                        # Get the index of class with highest probability.
                        predicted_label = np.argmax(predicted_labels_probabilities)

                        # Get the class name using the retrieved index.
                        predicted_class_name = CLASSES_LIST[predicted_label]
                        print(predicted_label)

                    # Write predicted class name on top of the frame.
                    #cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                    cv2.putText(frame, predicted_class_name, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (45, 155, 0), 2)
                    
                except:
                    continue
            # Write The frame into the disk using the VideoWriter Object.
            video_writer.write(frame)
        
    # Release the VideoCapture and VideoWriter objects.
    video_reader.release()
    video_writer.release()

In [21]:
%%time
predict_on_video("Predict/fight.avi","fight.avi",SEQUENCE_LENGTH,net,classes,person_indices)

OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'avi / AVI (Audio Video Interleaved)'
OpenCV: FFMPEG: fallback to use tag 0x34504d46/'FMP4'


0
0
0
0
0
0
0
0
0
0
0
0
CPU times: user 1min 17s, sys: 6.02 s, total: 1min 23s
Wall time: 14.6 s


In [22]:
VideoFileClip("fight.avi", audio=False).ipython_display()

Moviepy - Building video __temp__.mp4.
Moviepy - Writing video __temp__.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready __temp__.mp4


In [37]:
#Check Real-Time Prediction 

# Set threshold for detection confidence
confidence_threshold = 0.5


    # Preparing variable for writer
    # that we will use to write processed frames
writer = None

    # Preparing variables for spatial dimensions of the frames
h, w = None, None


probability_minimum = 0.3

    # Setting threshold for filtering weak bounding boxes
    # with non-maximum suppression
threshold = 0.3

    # Generating colours for representing every detected object
    # with function randint(low, high=None, size=None, dtype='l')
colours = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')




f = 0
    # Defining variable for counting total time
    # At the end we will show time spent for processing all frames
t = 0
    # Initialize the VideoCapture object to read from the video file.
video_reader = cv2.VideoCapture(0)

    # Get the width and height of the video.
original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize the VideoWriter Object to store the output video in the disk.

    # Declare a queue to store video frames.
frames_queue = deque(maxlen = SEQUENCE_LENGTH)

    # Initialize a variable to store the predicted action being performed in the video.
predicted_class_name = ''

    # Iterate until the video is accessed successfully.
while video_reader.isOpened():

        # Read the frame.
    ok, frame = video_reader.read() 
        
        # Check if frame is not read properly then break the loop.
    if not ok:
        break
            
    if w is None or h is None:
        h, w = frame.shape[:2]


    # Preprocess image
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)

    # Set input to network
    net.setInput(blob)
    start = time.time()

    # Forward pass through network
    outputs = net.forward(net.getUnconnectedOutLayersNames())
    end = time.time()
    boxes = []
    confidences = []
    class_ids = []

    for output in outputs:
        for detection in output:
                # Get class probabilities
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

                # Filter out detections with confidence below threshold
            if confidence > confidence_threshold:
                    # Check if detected class is "person"
                if class_id in person_indices:
                        # Get bounding box coordinates
                    box = detection[0:4] * np.array([frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]])
                    (centerX, centerY, width, height) = box.astype("int")

                        # Compute top-left corner of bounding box
                    x = int(centerX - (width / 2))
                    y = int(centerY - (height / 2))

                        # Add results to output lists
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # Apply non-maximum suppression to remove overlapping boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, 0.4)

        # Draw bounding boxes around detected objects
    if len(indices) > 0:
        for i in indices.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 250, 71), 2)
                
            try:
                    

                roi = frame[y:y+h, x:x+w]

                    # Resize the ROI to a fixed size
                resized_roi = cv2.resize(roi, (IMAGE_HEIGHT, IMAGE_WIDTH))

                    # Normalize the resized ROI by dividing it with 255
                normalized_roi = resized_roi / 255

                    # Append the pre-processed ROI into the frames list
                frames_queue.append(normalized_roi)





                    # Check if the number of frames in the queue are equal to the fixed sequence length.
                if len(frames_queue) == SEQUENCE_LENGTH:

                        # Pass the normalized frames to the model and get the predicted probabilities.
                    predicted_labels_probabilities = model.predict(np.expand_dims(frames_queue, axis = 0))[0]

                        # Get the index of class with highest probability.
                    predicted_label = np.argmax(predicted_labels_probabilities)

                        # Get the class name using the retrieved index.
                    predicted_class_name = CLASSES_LIST[predicted_label]
                    print(predicted_label)

                    # Write predicted class name on top of the frame.
                    #cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.putText(frame, predicted_class_name, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (132,45,0), 2)
                    
            except:
                continue
            # Write The frame into the disk using the VideoWriter Object.
    cv2.namedWindow("real time track", cv2.WINDOW_NORMAL)
    cv2.imshow("real time",frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        
    # Release the VideoCapture and VideoWriter objects.

video_reader.release()
cv2.destroyAllWindows()

2
2
2
2
2
1
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
