Mount the google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**Model for Action Recognition**

This notebook demonstrates action recognition using a pre-trained Long-term Recurrent Convolutional Network (LRCN) or Conv-LSTM model. The model processes a sequence of video frames and predicts the action being performed.

**Workflow:**



*   Load the trained LRCN or Conv-LSTM model.
*   Read the input video and extract frames.
*   Pre-process frames by resizing and normalizing.
*   Store frames in a sequence and pass them to the model.
*   Predict the action for each sequence and overlay the result on the video frames.
*   Save the output video with predicted labels displayed.
*   The final output is a processed video where each frame is labeled with the predicted action, providing a real-time visual representation of activity recognition.


In [None]:
import cv2
import numpy as np
from keras.models import load_model
from collections import deque
from moviepy.editor import VideoFileClip

# Set up model and sequence length
LRCN_model = load_model("/content/LRCN_model___Date_Time_2024_10_13__04_01_18___Loss_0.38333097100257874___Accuracy_0.913165271282196.keras")
CLASSES_LIST = ['snatching', 'fighting', 'snatching']  # Modify this based on your classes
SEQUENCE_LENGTH = 20  # Adjust according to your model input

# Video dimensions
IMAGE_HEIGHT, IMAGE_WIDTH = 64, 64  # Modify according to your model's input

# Function to predict on already downloaded video
def predict_on_video(video_file_path, output_file_path, SEQUENCE_LENGTH):
    '''
    Perform action recognition on a video using the LRCN model.
    Args:
    video_file_path:  The path of the video on which action recognition is performed.
    output_file_path: The path where the output video with the predicted action will be stored.
    SEQUENCE_LENGTH:  The number of frames to pass to the model as one sequence.
    '''

    # Initialize the VideoCapture object to read from the video file.
    video_reader = cv2.VideoCapture(video_file_path)

    # Get the width and height of the video.
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initialize the VideoWriter Object to store the output video in the disk.
    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'),
                                   video_reader.get(cv2.CAP_PROP_FPS), (original_video_width, original_video_height))

    # Declare a queue to store video frames.
    frames_queue = deque(maxlen=SEQUENCE_LENGTH)

    # Initialize a variable to store the predicted action being performed in the video.
    predicted_class_name = ''

    # Iterate until the video is accessed successfully.
    while video_reader.isOpened():
        # Read the frame.
        ok, frame = video_reader.read()

        # Check if the frame is not read properly, then break the loop.
        if not ok:
            break

        # Resize and normalize the frame.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
        normalized_frame = resized_frame / 255.0

        # Append the pre-processed frame into the frames list.
        frames_queue.append(normalized_frame)

        # Check if the number of frames in the queue equals the sequence length.
        if len(frames_queue) == SEQUENCE_LENGTH:
            # Pass the normalized frames to the model and get the predicted probabilities.
            predicted_labels_probabilities = LRCN_model.predict(np.expand_dims(frames_queue, axis=0))[0]

            # Get the index of class with highest probability.
            predicted_label = np.argmax(predicted_labels_probabilities)

            # Get the class name using the retrieved index.
            predicted_class_name = CLASSES_LIST[predicted_label]

        # Write predicted class name on top of the frame.
        cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Write the frame into the disk using the VideoWriter Object.
        video_writer.write(frame)

    # Release the VideoCapture and VideoWriter objects.
    video_reader.release()
    video_writer.release()

# Path to your already downloaded video
input_video_file_path = '/content/Mobile_snatching_1.mp4'  # Change this to the path of your downloaded video
# this code is not performing well, it only predicts the 1st class of list
# Construct the output video path
output_video_file_path = '/content/-Output-SeqLen.mp4'

# Perform Action Recognition on the Test Video
predict_on_video(input_video_file_path, output_video_file_path, SEQUENCE_LENGTH)

# Display the output video
VideoFileClip(output_video_file_path, audio=False, target_resolution=(300, None)).ipython_display()


In [None]:
import os
import cv2
import numpy as np
from collections import deque
from keras.models import load_model
from moviepy.editor import VideoFileClip

# Define the path to the pre-trained models
LRCN_MODEL_PATH = '/content/LRCN_model___Date_Time_2024_10_13__04_01_18___Loss_0.38333097100257874___Accuracy_0.913165271282196.keras'
CONVLSTM_MODEL_PATH = '/content/convlstm_model___Date_Time_2024_10_12__20_09_35___Loss_0.6140506267547607___Accuracy_0.8123249411582947.keras'

# Load the models
LRCN_model = load_model(LRCN_MODEL_PATH)
convlstm_model = load_model(CONVLSTM_MODEL_PATH)

# Set the sequence length and image dimensions
SEQUENCE_LENGTH = 20
IMAGE_HEIGHT, IMAGE_WIDTH = 64, 64  # Adjust these based on your model's input size
CLASSES_LIST = ["Fighting", "Normal", "Snatching"]  # Update based on your classes

# Function to perform action recognition on a video and draw bounding boxes
def analyze_and_predict_on_video(video_file_path, output_file_path, SEQUENCE_LENGTH, frame_skip=5):
    video_reader = cv2.VideoCapture(video_file_path)
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc(*'mp4v'),
                                   video_reader.get(cv2.CAP_PROP_FPS), (original_video_width, original_video_height))

    frames_queue = deque(maxlen=SEQUENCE_LENGTH)
    frames_list = []
    action_predictions = []

    # Read the video frames
    while video_reader.isOpened():
        ok, frame = video_reader.read()
        if not ok:
            break

        frames_list.append(frame)
        if len(frames_list) >= SEQUENCE_LENGTH:
            # Prepare the frames for prediction
            processed_frames = []
            for frm in frames_list[-SEQUENCE_LENGTH:]:
                resized_frame = cv2.resize(frm, (IMAGE_HEIGHT, IMAGE_WIDTH))
                normalized_frame = resized_frame / 255.0
                processed_frames.append(normalized_frame)

            # Perform action prediction
            predicted_labels_probabilities = convlstm_model.predict(np.expand_dims(processed_frames, axis=0))[0]

            # Print raw predicted probabilities for each class
            print(f"Predicted probabilities: {predicted_labels_probabilities}")

            # Get the index of the class with the highest probability
            predicted_label = np.argmax(predicted_labels_probabilities)

            # Get the class name using the retrieved index
            predicted_class_name = CLASSES_LIST[predicted_label]
            action_predictions.append(predicted_class_name)
        else:
            action_predictions.append("")

    video_reader.release()

    # Annotate the video with predictions
    video_reader = cv2.VideoCapture(video_file_path)
    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc(*'mp4v'),
                                   video_reader.get(cv2.CAP_PROP_FPS), (original_video_width, original_video_height))

    frame_counter = 0
    while video_reader.isOpened():
        ok, frame = video_reader.read()
        if not ok:
            break

        # Draw bounding boxes or text on the frame
        if frame_counter >= SEQUENCE_LENGTH:
            predicted_class_name = action_predictions[frame_counter - SEQUENCE_LENGTH]
            if predicted_class_name:
                # Example box; adjust as needed
                cv2.rectangle(frame, (10, 10), (200, 60), (0, 255, 0), 2)
                cv2.putText(frame, predicted_class_name, (15, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        video_writer.write(frame)
        frame_counter += 1

    video_reader.release()
    video_writer.release()

# Upload and process the video
uploaded_video_file_path = '/content/Mobile_snatching_1.mp4'  # Replace this with the actual path of the uploaded video

# Construct the output video path
output_video_file_path = f'{os.path.splitext(uploaded_video_file_path)[0]}-Output-SeqLen{SEQUENCE_LENGTH}.mp4'

# Perform action recognition and draw bounding boxes
analyze_and_predict_on_video(uploaded_video_file_path, output_video_file_path, SEQUENCE_LENGTH)

# Display the output video
VideoFileClip(output_video_file_path, audio=False, target_resolution=(300, None)).ipython_display()


Single Frame Prediction on videos

In [None]:
import os
import cv2
import numpy as np
from keras.models import load_model
from moviepy.editor import VideoFileClip

# Define the path to the pre-trained models
LRCN_MODEL_PATH = '/content/LRCN_model___Date_Time_2024_10_13__04_01_18___Loss_0.38333097100257874___Accuracy_0.913165271282196.keras'
CONVLSTM_MODEL_PATH = '/content/improve_convlstm_model___Date_Time_2024_10_12__20_42_24___Loss_0.41087716817855835___Accuracy_0.8095238208770752.keras'

# Load the models
LRCN_model = load_model(LRCN_MODEL_PATH)
convlstm_model = load_model(CONVLSTM_MODEL_PATH)

# Set the sequence length and image dimensions
SEQUENCE_LENGTH = 20
IMAGE_HEIGHT, IMAGE_WIDTH = 64, 64  # Adjust these based on your model's input size
CLASSES_LIST = ["Fighting", "Snatching"]  # Update based on your classes

# Function to perform action recognition on a video and draw bounding boxes
# def analyze_and_predict_on_video(video_file_path, output_file_path):
def analyze_and_predict_on_video(video_file_path, output_file_path, SEQUENCE_LENGTH):
    video_reader = cv2.VideoCapture(video_file_path)
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc(*'mp4v'),
                                   video_reader.get(cv2.CAP_PROP_FPS), (original_video_width, original_video_height))

    frames_queue = []  # Use a list to maintain the latest frames for prediction
    action_predictions = []

    # Set a confidence threshold
    CONFIDENCE_THRESHOLD = 0.5

    # Read the video frames
    while video_reader.isOpened():
        ok, frame = video_reader.read()
        if not ok:
            break

        # Resize and normalize the frame
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
        normalized_frame = resized_frame / 255.0

        # Append the processed frame to the queue
        frames_queue.append(normalized_frame)

        # If we have enough frames, make a prediction
        if len(frames_queue) >= SEQUENCE_LENGTH:
            # Prepare the input for the model (5D tensor)
            input_sequence = np.array(frames_queue[-SEQUENCE_LENGTH:])
            input_sequence = np.expand_dims(input_sequence, axis=0)

            # Make the prediction
            predicted_labels_probabilities = convlstm_model.predict(input_sequence)[0]

            # Print raw predicted probabilities for each class
            print(f"Predicted probabilities for frame: {predicted_labels_probabilities}")

            # Annotate the frame based on confidence threshold
            if np.max(predicted_labels_probabilities) >= CONFIDENCE_THRESHOLD:
                predicted_label = np.argmax(predicted_labels_probabilities)
                predicted_class_name = CLASSES_LIST[predicted_label]
                action_predictions.append(predicted_class_name)
                cv2.putText(frame, predicted_class_name, (15, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            else:
                action_predictions.append("Uncertain")
                cv2.putText(frame, "Uncertain", (15, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        else:
            action_predictions.append("")  # Not enough frames to predict yet

        # Write the annotated frame to the output video
        video_writer.write(frame)

    video_reader.release()
    video_writer.release()


# Upload and process the video
uploaded_video_file_path = '/content/fighting.mp4'  # Replace this with the actual path of the uploaded video

# Construct the output video path
output_video_file_path = f'{os.path.splitext(uploaded_video_file_path)[0]}-Output-SeqLen{SEQUENCE_LENGTH}.mp4'

# Perform action recognition and draw bounding boxes
analyze_and_predict_on_video(uploaded_video_file_path, output_video_file_path, SEQUENCE_LENGTH)

# Display the output video
VideoFileClip(output_video_file_path, audio=False, target_resolution=(300, None)).ipython_display()