# Using CNN-LSTM to classify violent clips

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf

# The video should be converted to frames first. During training, three frames are extracted per second,
# each frame is resized to 64x64 pixels, and the LSTM layer takes 10 consecutive frames as input.
def preprocess_video(video_path, frame_count=10, frame_size=(64, 64)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < frame_count:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, frame_size)
        frames.append(frame)
    cap.release()
    
    if len(frames) == 0:
        return np.zeros((frame_count, frame_size[0], frame_size[1], 3))
    elif len(frames) < frame_count:
        frames.extend([np.zeros_like(frames[0])]*(frame_count - len(frames)))
    return np.array(frames)

def load_test_data(folder_path, frame_count=10, frame_size=(64, 64)):
    test_data = []
    video_files = [f for f in os.listdir(folder_path) if f.endswith('.mp4') or f.endswith('.avi')]
    for video_file in video_files:
        video_path = os.path.join(folder_path, video_file)
        frames = preprocess_video(video_path, frame_count, frame_size)
        test_data.append(frames)
    return np.array(test_data), video_files

# Folder containing your test videos
test_video_folder = "D:/Yolo/test/result"
test_data, test_video_files = load_test_data(test_video_folder)
test_data = test_data / 255.0

# This model can achieve over 90% accuracy on the training set and 77% on the validation set.
model_path = "D:/CNN-LSTM/models/violence_detection_model_conv_64_lstm_64.h5"
model = tf.keras.models.load_model(model_path, compile=False)

# Recompile the model with a compatible optimizer and loss function
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

predictions = model.predict(test_data)
predicted_classes = np.argmax(predictions, axis=1)

class_names = ["high-level violence", "low-level violence", "non-violence"]
for video_file, predicted_class in zip(test_video_files, predicted_classes):
    print(f"Video: {video_file}, Predicted Class: {class_names[predicted_class]}")