In [1]:
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [8]:
def load_video_to_frames(video_path):
    """
    Load a video and convert it into a list of frames.
    
    Parameters:
    - video_path (str): Path to the video file.
    
    Returns:
    - frames (list): List of frames.
    """
    
    cap = cv2.VideoCapture(video_path)
    frames = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    
    cap.release()
    return frames


def preprocess_frames(frames: list[np.ndarray], input_size: tuple[int, int], to_rgb: bool = True, normalize: bool = False,) -> list[np.ndarray]:
    """
    Preprocess a list of frames.

    Args:
        frames (list of numpy arrays): List of frames to be preprocessed.
        input_size (tuple): A tuple specifying the target size for each frame (width, height).
        to_rgb (bool): whether should convert from BGR to RGB (needed if extracting frames with OpenCV)
        normalize (bool): whether to normalize (False for EfficientNet models, True otherwise)

    Returns:
        list of numpy arrays: List of preprocessed frames with the following transformations applied:
            1. Resized to the specified input size.
            2. (optional) Converted from BGR to RGB color format
            3. (optional) normalized
    """
    # Resize the frames to the specified input size
    preprocessed_frames = [cv2.resize(frame, input_size) for frame in frames]
    # Convert the frames from BGR to RGB color format
    if to_rgb:
        preprocessed_frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in preprocessed_frames]
    # normalize pixel values to between 0 and 1
    if normalize:
        preprocessed_frames = [frame / 255.0 for frame in preprocessed_frames]
    return preprocessed_frames


def predict_frames(model: tf.keras.Model, frames: list) -> np.ndarray:
    """
    Predict the class for each frame.
    
    Parameters:
    - model (tf.Model): Trained TensorFlow model.
    - frames (list): List of frames which are preprocessed from the preprocess_frames() function.
    - input_size (tuple): Tuple indicating the input size (height, width) expected by the model.
    
    Returns:
    - predictions (np.ndarray): Array of predictions (0s and 1s).
    """
    pred_probs = model.predict(np.array(frames))
    # binary_predictions = np.round(predictions).flatten()  # should switch to returning this this
    return pred_probs.flatten()

def main(video_path, model_path, input_size=(224, 224)):
    """
    Load a video and a TensorFlow model, then predict the class for each frame in the video.
    
    Parameters:
    - video_path (str): Path to the video file.
    - model_path (str): Path to the TensorFlow model file.
    - input_size (tuple): Tuple indicating the input size (height, width) expected by the model.
    
    Returns:
    - predictions (np.array): Array of predictions (0s and 1s).
    """
    
    frames = load_video_to_frames(video_path)
    frames = preprocess_frames(frames, input_size, to_rgb=True, normalize=False)
    model = tf.keras.models.load_model(model_path)
    predictions = predict_frames(model, frames)
    
    return predictions

In [3]:
frames = load_video_to_frames("output_segments_long_att4/segment_3.mp4")
model = tf.keras.models.load_model("models/b0_2_epochs_half_data_h5.h5")
preprocessed_frames = preprocess_frames(frames, (224, 224))


In [9]:
predictions = predict_frames(model, preprocessed_frames)
# predictions = main('output_segments_long_att4/segment_3.mp4', 'models/b0_2_epochs_half_data_h5.h5')




In [None]:
print(len(preprocessed_frames))

In [None]:
contact_dict = {0: "no contact", 1: "contact"}
for i, frame in enumerate(preprocessed_frames):
    plt.imshow(frame)
    plt.title(f"Frame {i}, {contact_dict[np.round(predictions)[i]]}, p={predictions[i]:.2f}")
    plt.show()
    

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def display_frames_in_grid(preprocessed_frames, predictions, contact_dict):
    """
    Display frames in a 4x4 grid with text color based on prediction values.

    Args:
        preprocessed_frames (list of numpy arrays): List of preprocessed frames to display.
        predictions (list of floats): List of prediction values for the frames.
        contact_dict (dict): A dictionary mapping prediction values to labels.

    Returns:
        None
    """
    num_frames = len(preprocessed_frames)
    rows = (num_frames + 3) // 4  # Calculate the number of rows needed for the grid

    for i in range(rows):
        fig, axs = plt.subplots(1, 4, figsize=(16, 4))

        for j in range(4):
            frame_idx = i * 4 + j

            if frame_idx < num_frames:
                frame = preprocessed_frames[frame_idx]

                # Determine text color based on the prediction value
                if np.round(predictions)[frame_idx] < 0.5:
                    text_color = 'red'
                else:
                    text_color = 'green'

                axs[j].imshow(frame)
                axs[j].set_title(f"Frame {frame_idx}, {contact_dict[np.round(predictions)[frame_idx]]}, p={predictions[frame_idx]:.2f}", color=text_color)

            axs[j].axis('off')  # Hide axis for empty subplots

        plt.tight_layout()
        plt.show()

display_frames_in_grid(preprocessed_frames, predictions, contact_dict)

In [15]:
# import cv2
# 
# 
# def create_labeled_video1(frames, predictions, contact_dict, output_path, frame_rate=29.97):
#     """
#     Create a video with colored labels above frames.
# 
#     Args:
#         frames (list of numpy arrays): List of frames to include in the video.
#         predictions (list of floats): List of prediction values for the frames.
#         contact_dict (dict): A dictionary mapping prediction values to labels.
#         output_path (str): Path to save the output video file.
#         frame_rate (float, optional): Frame rate of the output video. Default is 29.97 fps.
# 
#     Returns:
#         None
#     """
#     height, width, _ = frames[0].shape
#     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#     out = cv2.VideoWriter(output_path, fourcc, frame_rate, (width, height))
#     # frames = [cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) for frame in frames]
# 
#     for frame, prediction in zip(frames, predictions):
#         # Determine text color based on the prediction value
#         if np.round(prediction) < 0.5:
#             text_color = (0, 0, 255)  # Red color for values > 0.5 (BGR format)
#         else:
#             text_color = (0, 255, 0)  # Green color for values <= 0.5 (BGR format)
# 
#         # Add label text above the frame
#         label = f"{contact_dict[np.round(prediction)]}, p={prediction:.2f}"
#         cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, text_color, 2)
# 
#         out.write(frame)
# 
#     out.release()
#     cv2.destroyAllWindows()
#     
#     
# def create_labeled_video(frames, predictions, contact_dict, output_path, frame_rate=29.97):
#     """
#     Create a video with colored labels above frames.
# 
#     Args:
#         frames (list of numpy arrays): List of frames to include in the video.
#         predictions (list of floats): List of prediction values for the frames.
#         contact_dict (dict): A dictionary mapping prediction values to labels.
#         output_path (str): Path to save the output video file.
#         frame_rate (float, optional): Frame rate of the output video. Default is 29.97 fps.
# 
#     Returns:
#         None
#     """
#     height, width, _ = frames[0].shape
#     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#     out = cv2.VideoWriter(output_path, fourcc, frame_rate, (width, height))
#     frames = [cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) for frame in frames]
# 
#     for frame, prediction in zip(frames, predictions):
#         # Create a blank frame to clear previous labels
#         blank_frame = np.zeros_like(frame)
# 
#         # Determine text color based on the prediction value
#         if np.round(prediction) > 0.5:
#             text_color = (0, 0, 255)  # Red color for values > 0.5 (BGR format)
#         else:
#             text_color = (0, 255, 0)  # Green color for values <= 0.5 (BGR format)
# 
#         # Add label text above the frame
#         label = f"Prediction: {contact_dict[np.round(prediction)]}, p={prediction:.2f}"
#         cv2.putText(blank_frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, text_color, 2)
# 
#         # Overlay the labeled frame on top of the blank frame
#         result_frame = cv2.addWeighted(frame, 1, blank_frame, 1, 0)
# 
#         out.write(result_frame)
# 
#     out.release()
#     cv2.destroyAllWindows()
    
    
def create_labeled_video_og(frames, predictions, output_path, frame_rate=29.97):
    """
    Create a video with colored labels above frames.

    Args:
        frames (list of numpy arrays): List of frames to include in the video.
        predictions (list of floats): List of prediction values for the frames.
        contact_dict (dict): A dictionary mapping prediction values to labels.
        output_path (str): Path to save the output video file.
        frame_rate (float, optional): Frame rate of the output video. Default is 29.97 fps.

    Returns:
        None
    """
    height, width, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, frame_rate, (width, height))
    frames = [cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) for frame in frames]

    for frame, prediction in zip(frames, predictions):
        # Determine text color based on the prediction value
        if np.round(prediction) < 0.5:
            text_color = (0, 0, 255)  # Red color for values > 0.5 (BGR format)
        else:
            text_color = (0, 255, 0)  # Green color for values <= 0.5 (BGR format)

        # Add label text above the frame
        label = str(round(prediction * 100, 2)) + "%"
        cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, text_color, 2)

  
        out.write(frame)

    out.release()
    cv2.destroyAllWindows()

def create_labeled_video(frames, predictions, output_path, frame_rate=29.97):
    """
    Create a video with colored labels above frames.

    Args:
        frames (list of numpy arrays): List of frames to include in the video.
        predictions (list of floats): List of prediction values for the frames.
        output_path (str): Path to save the output video file.
        frame_rate (float, optional): Frame rate of the output video. Default is 29.97 fps.

    Returns:
        None
    """
    frames = [cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) for frame in frames]
    height, width, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, frame_rate, (width, height))

    for frame, prediction in zip(frames, predictions):
        # Create a blank frame for each frame
        blank_frame = np.zeros_like(frame)

        # Determine text color based on the prediction value
        if np.round(prediction) < 0.5:
            text_color = (0, 0, 255)  # Red color for values > 0.5 (BGR format)
        else:
            text_color = (0, 255, 0)  # Green color for values <= 0.5 (BGR format)

        # Add label text above the frame
        label = str(int(prediction * 100)) + "%"
        cv2.putText(blank_frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, text_color, 2)

        # Overlay the labeled frame on top of the original frame
        result_frame = cv2.addWeighted(frame, 1, blank_frame, 1, 0)

        out.write(result_frame)

    out.release()
    cv2.destroyAllWindows()




# Example usage:
create_labeled_video(preprocessed_frames, predictions, "tmp_label_vid2.mp4")
