In [None]:
"""
Violence Detection Model Training Pipeline

This notebook implements a sophisticated deep learning pipeline for violence detection in video sequences.
It includes:

- Data generation and preprocessing
- Advanced model architecture with multi-stream processing
- Training and evaluation workflow
- Visualization of results

Key Components:

1. Data Management
   - VideoFrameGenerator: Generates synthetic video frames with metadata
   - ViolenceDetectionDataset: Manages dataset creation and splitting
   - FrameMetadata: Stores per-frame metadata

2. Model Architecture  
   - Based on InceptionV3 backbone
   - Bidirectional LSTM and GRU streams
   - Attention mechanism
   - Multi-task learning with main and auxiliary outputs

3. Training Pipeline
   - Custom learning rate scheduling
   - Early stopping
   - Performance visualization
   - Comprehensive logging

Usage:
    Run all cells to:
    1. Generate synthetic training data
    2. Build and compile model
    3. Train model with visualization
    4. Evaluate performance

Author: Marc Reyes
"""

# Import required libraries
import logging
from dataclasses import dataclass
from typing import Dict, List, Tuple

import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import applications, layers, models
from tensorflow.keras.layers import GRU, LSTM, Attention, Bidirectional, TimeDistributed

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


@dataclass
class FrameMetadata:
    """
    Dataclass to store metadata for each video frame.

    Attributes:
        motion_intensity (float): Measure of motion between consecutive frames (0-1)
        scene_complexity (float): Complexity score of the frame content (0-1)
        lighting_condition (str): Lighting description ('dark', 'bright', 'mixed')
        timestamp (float): Normalized timestamp in the sequence (0-1)
        blur_factor (float): Amount of motion blur in the frame (0-1)
        dominant_color (Tuple[float, float, float]): RGB values of dominant color
    """

    motion_intensity: float
    scene_complexity: float
    lighting_condition: str
    timestamp: float
    blur_factor: float
    dominant_color: Tuple[float, float, float]


class VideoFrameGenerator:
    """
    Class to generate sophisticated mock video frames.

    This class creates synthetic video frames with different patterns for violent
    and non-violent content, including motion effects and metadata generation.

    Args:
        frame_size (int): Size of generated frames (width=height)
    """

    def __init__(self, frame_size: int = 299):
        self.frame_size = frame_size
        self._supported_patterns = {
            "violent": self._generate_violent_pattern,
            "non_violent": self._generate_non_violent_pattern,
        }

    def _apply_motion_blur(self, frame: np.ndarray, kernel_size: int) -> np.ndarray:
        """
        Apply sophisticated motion blur effect to frame.

        Args:
            frame (np.ndarray): Input frame
            kernel_size (int): Size of motion blur kernel

        Returns:
            np.ndarray: Blurred frame
        """
        kernel = np.zeros((kernel_size, kernel_size))
        kernel[int((kernel_size - 1) / 2), :] = np.ones(kernel_size)
        kernel = kernel / kernel_size
        return cv2.filter2D(frame, -1, kernel)

    def _generate_violent_pattern(self, frame_idx: int, num_frames: int) -> np.ndarray:
        """
        Generate complex violent patterns with temporal coherence.

        Creates frames with rapid motion, sharp transitions and geometric shapes
        characteristic of violent scenes.

        Args:
            frame_idx (int): Current frame index
            num_frames (int): Total number of frames

        Returns:
            np.ndarray: Generated frame with violent patterns
        """
        frame = np.zeros((self.frame_size, self.frame_size, 3))

        t = frame_idx / num_frames
        frequency = 5 + 3 * np.sin(2 * np.pi * t)
        x, y = np.meshgrid(
            np.linspace(0, frequency, self.frame_size),
            np.linspace(0, frequency, self.frame_size),
        )

        wave = np.sin(x) * np.cos(y) * np.sin(2 * np.pi * t)
        frame[:, :, 0] = np.clip(wave * 0.5 + 0.5, 0, 1)

        num_shapes = int(5 + 3 * np.sin(2 * np.pi * t))
        for _ in range(num_shapes):
            points = np.random.randint(0, self.frame_size, (np.random.randint(3, 7), 2))
            cv2.fillPoly(frame, [points], (1, np.random.random() * 0.5, 0))

        return frame

    def _generate_non_violent_pattern(
        self, frame_idx: int, num_frames: int
    ) -> np.ndarray:
        """
        Generate complex non-violent patterns with smooth transitions.

        Creates frames with gentle gradients and smooth color transitions
        characteristic of non-violent scenes.

        Args:
            frame_idx (int): Current frame index
            num_frames (int): Total number of frames

        Returns:
            np.ndarray: Generated frame with non-violent patterns
        """
        frame = np.zeros((self.frame_size, self.frame_size, 3))

        t = frame_idx / num_frames
        x, y = np.meshgrid(
            np.linspace(0, 1, self.frame_size), np.linspace(0, 1, self.frame_size)
        )

        frame[:, :, 1] = np.sin(3 * np.pi * x) * np.cos(3 * np.pi * y) * 0.5 + 0.5
        frame[:, :, 2] = (
            np.cos(2 * np.pi * x * t) * np.sin(2 * np.pi * y * t) * 0.3 + 0.7
        )

        return frame

    def generate_sequence(
        self, num_frames: int, is_violent: bool = False
    ) -> Tuple[np.ndarray, List[FrameMetadata]]:
        """
        Generate a sequence of frames with metadata.

        Args:
            num_frames (int): Number of frames to generate
            is_violent (bool): Whether to generate violent or non-violent sequence

        Returns:
            Tuple[np.ndarray, List[FrameMetadata]]: Generated frames and metadata
        """
        pattern_type = "violent" if is_violent else "non_violent"
        frames = []
        metadata = []

        for i in range(num_frames):
            frame = self._supported_patterns[pattern_type](i, num_frames)

            if is_violent:
                kernel_size = np.random.choice([3, 5, 7])
                frame = self._apply_motion_blur(frame, kernel_size)

            frames.append(frame)

            metadata.append(
                FrameMetadata(
                    motion_intensity=np.random.uniform(0.7, 1.0)
                    if is_violent
                    else np.random.uniform(0.1, 0.5),
                    scene_complexity=np.random.uniform(0.6, 1.0)
                    if is_violent
                    else np.random.uniform(0.2, 0.7),
                    lighting_condition=np.random.choice(["dark", "bright", "mixed"]),
                    timestamp=i / num_frames,
                    blur_factor=np.random.uniform(0.5, 1.0)
                    if is_violent
                    else np.random.uniform(0.1, 0.4),
                    dominant_color=tuple(np.random.random(3)),
                )
            )

        return np.array(frames), metadata


class ViolenceDetectionDataset:
    """
    Class to manage the violence detection dataset.

    Handles dataset generation, storage and train/test splitting.

    Args:
        num_samples (int): Number of sequences to generate
        sequence_length (int): Length of each sequence
    """

    def __init__(self, num_samples: int = 200, sequence_length: int = 45):
        self.num_samples = num_samples
        self.sequence_length = sequence_length
        self.frame_generator = VideoFrameGenerator()
        self.data = self._generate_dataset()

    def _generate_dataset(self) -> Dict:
        """
        Generate complete dataset with balanced classes.

        Returns:
            Dict: Dataset containing features, labels and metadata
        """
        logger.info("Generating sophisticated mock dataset...")

        X, y, metadata = [], [], []

        for is_violent in [True, False]:
            for _ in range(self.num_samples // 2):
                frames, frame_metadata = self.frame_generator.generate_sequence(
                    self.sequence_length, is_violent
                )
                X.extend(frames)
                y.extend([1 if is_violent else 0] * len(frames))
                metadata.extend(frame_metadata)

        return {"X": np.array(X), "y": np.array(y), "metadata": metadata}

    def get_train_test_split(
        self, test_size: float = 0.2
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        """
        Split dataset into training and testing sets.

        Args:
            test_size (float): Proportion of data for testing

        Returns:
            Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Train/test split arrays
        """
        return train_test_split(
            self.data["X"], self.data["y"], test_size=test_size, random_state=42
        )


class ViolenceDetectionModel:
    """
    Advanced model for violence detection.

    Implements a sophisticated neural network architecture combining CNN, RNN
    and attention mechanisms for video-based violence detection.

    Args:
        input_shape (Tuple[int, int, int, int]): Shape of input tensors
    """

    def __init__(self, input_shape: Tuple[int, int, int, int]):
        self.input_shape = input_shape
        self.model = self._build_model()

    def _build_model(self) -> models.Model:
        """
        Build sophisticated model architecture.

        Returns:
            models.Model: Compiled Keras model
        """
        base_model = applications.InceptionV3(
            weights="imagenet", include_top=False, input_shape=self.input_shape[2:]
        )

        for layer in base_model.layers[:249]:
            layer.trainable = False

        input_layer = layers.Input(shape=self.input_shape[1:])

        x = TimeDistributed(base_model)(input_layer)
        x = TimeDistributed(layers.GlobalAveragePooling2D())(x)

        lstm_stream = Bidirectional(LSTM(256, return_sequences=True))(x)
        lstm_stream = Bidirectional(LSTM(128, return_sequences=True))(lstm_stream)

        gru_stream = Bidirectional(GRU(256, return_sequences=True))(x)
        attention_layer = Attention()([gru_stream, gru_stream])

        merged = layers.Concatenate()([lstm_stream, attention_layer])

        x = layers.Dense(512, activation="relu")(merged)
        x = layers.Dropout(0.5)(x)
        x = layers.Dense(256, activation="relu")(x)
        x = layers.Dropout(0.3)(x)

        x = layers.GlobalAveragePooling1D()(x)

        main_output = layers.Dense(1, activation="sigmoid", name="main_output")(x)
        aux_output = layers.Dense(3, activation="softmax", name="aux_output")(x)

        return models.Model(inputs=input_layer, outputs=[main_output, aux_output])

    def compile_model(self, learning_rate: float = 0.0001):
        """
        Compile model with custom settings.

        Args:
            learning_rate (float): Initial learning rate
        """
        self.model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
            loss={
                "main_output": "binary_crossentropy",
                "aux_output": "sparse_categorical_crossentropy",
            },
            loss_weights={"main_output": 1.0, "aux_output": 0.3},
            metrics=["accuracy"],
        )

    def train(
        self, train_data: Dict, test_data: Dict, epochs: int = 20, batch_size: int = 16
    ) -> Dict:
        """
        Train the model with advanced callbacks.

        Args:
            train_data (Dict): Training data dictionary
            test_data (Dict): Testing data dictionary
            epochs (int): Number of training epochs
            batch_size (int): Batch size for training

        Returns:
            Dict: Training history
        """
        callbacks = [
            tf.keras.callbacks.ReduceLROnPlateau(
                monitor="val_main_output_loss", factor=0.5, patience=3, min_lr=1e-6
            ),
            tf.keras.callbacks.EarlyStopping(
                monitor="val_main_output_loss", patience=5, restore_best_weights=True
            ),
        ]

        return self.model.fit(
            train_data["X"],
            {"main_output": train_data["y"], "aux_output": train_data["aux_y"]},
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(
                test_data["X"],
                {"main_output": test_data["y"], "aux_output": test_data["aux_y"]},
            ),
            callbacks=callbacks,
        )


def main():
    """
    Main execution function.

    Orchestrates the complete training pipeline:
    1. Dataset creation and preprocessing
    2. Model initialization and training
    3. Results visualization
    """
    # Create dataset
    dataset = ViolenceDetectionDataset(num_samples=200)
    X_train, X_test, y_train, y_test = dataset.get_train_test_split()

    # Reshape data
    seq_length = 45
    X_train = X_train.reshape(-1, seq_length, 299, 299, 3)
    X_test = X_test.reshape(-1, seq_length, 299, 299, 3)
    y_train = y_train.reshape(-1, seq_length)
    y_test = y_test.reshape(-1, seq_length)

    # Create auxiliary targets
    y_aux_train = np.random.randint(0, 3, size=(y_train.shape[0], seq_length))
    y_aux_test = np.random.randint(0, 3, size=(y_test.shape[0], seq_length))

    # Prepare data dictionaries
    train_data = {"X": X_train, "y": y_train, "aux_y": y_aux_train}
    test_data = {"X": X_test, "y": y_test, "aux_y": y_aux_test}

    # Create and train model
    model = ViolenceDetectionModel(input_shape=X_train.shape)
    model.compile_model()

    logger.info("Training advanced model...")
    history = model.train(train_data, test_data)

    # Visualize results
    plt.figure(figsize=(15, 5))

    plt.subplot(1, 3, 1)
    plt.plot(history.history["main_output_accuracy"], label="Training")
    plt.plot(history.history["val_main_output_accuracy"], label="Validation")
    plt.title("Main Task Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.subplot(1, 3, 2)
    plt.plot(history.history["main_output_loss"], label="Training")
    plt.plot(history.history["val_main_output_loss"], label="Validation")
    plt.title("Main Task Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()

    plt.subplot(1, 3, 3)
    plt.plot(history.history["aux_output_accuracy"], label="Training")
    plt.plot(history.history["val_aux_output_accuracy"], label="Validation")
    plt.title("Auxiliary Task Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.tight_layout()
    plt.show()


if __name__ == "__main__":
    main()