In [54]:
import os
import numpy as np
import cv2
from tkinter import Tk, filedialog, messagebox

In [55]:
def select_video_files() -> list:
    """Select video files using a file dialog.

    Returns:
        list: List of selected video files.
    """
    # Initialize Tkinter and hide the root window
    root = Tk()
    root.withdraw()
    
    # Open file dialog to select video files
    video_files = filedialog.askopenfilenames(
        title="Select Video Files",
        filetypes=[("Video Files", "*.mp4 *.avi *.mkv *.mov")]
    )
    if not video_files:
        raise ValueError("No video files selected.")
    
    print(f"Selected {len(video_files)} videos.")

    return video_files

In [56]:
def merge_frames(video_files: list) -> np.ndarray:
    """
    Merge the first frame of each video file into a single image.

    Args:
        video_files (list): List of video files.
    
    Returns:
        np.ndarray: Merged image.
    """
    merged_image = None
    
    if len(video_files) > 1:
        for video_file in video_files:
            cap = cv2.VideoCapture(video_file)
            success, frame = cap.read()
            cap.release()
            
            if not success:
                print(f"Could not read first frame of {video_file}")
                continue
            
            # Calculate transparency
            transparency = round(1 / len(video_files), 4)
            transparent_frame = (frame * transparency).astype(np.uint8)
            
            if merged_image is None:
                # Initialize merged image
                merged_image = np.zeros_like(transparent_frame)
            
            # Add transparent frame to the merged image
            merged_image = cv2.add(merged_image, transparent_frame)
    
    else:
        video_file = video_files[0]
        cap = cv2.VideoCapture(video_file)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        selected_frame_indices = [1, total_frames//2, total_frames-1] # merge the first, middle, and last frames

        for frame_idx in selected_frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            success, frame = cap.read()

            if not success:
                print(f"Could not read frame {frame_idx} from {video_file}")
                continue

            # Calculate transparency
            transparency = 1/3 # set transparency to 1/3 for each of the three frames
            transparent_frame = (frame * transparency).astype(np.uint8)
            
            if merged_image is None:
                # Initialize merged image
                merged_image = np.zeros_like(transparent_frame)
            
            # Add transparent frame to the merged image
            merged_image = cv2.add(merged_image, transparent_frame)
        
        cap.release()
        
    return merged_image

In [57]:
def calculate_mean_points(point_pairs, horizontal=False):

    mean_points = np.mean(point_pairs, axis=0)
    mean_point_1, mean_point_2 = mean_points.astype(int)

    if horizontal:
        # Calculate the mean y-value
        y_mean = (mean_point_1[1] + mean_point_2[1]) // 2  # Use integer division if you want the result as int

        # Update the y-values of both points
        mean_point_1[1] = y_mean
        mean_point_2[1] = y_mean

    print(f"Mean points: {mean_point_1, mean_point_2}")
    return mean_point_1, mean_point_2

In [58]:
def save_video(video_path, output_folder, rotate_matrix, translate_matrix, width, height, fps): 
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_name = os.path.basename(video_path)
    output_path = os.path.join(output_folder, video_name)
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Apply transformations
        frame = cv2.warpAffine(frame, rotate_matrix, (width, height))
        frame = cv2.warpAffine(frame, translate_matrix, (width, height))
        out.write(frame)
    
    cap.release()
    out.release()
    print(f'Aligned {video_name}')

def apply_transformations(video_files, point_pairs, mean_point_1, mean_point_2):
    output_folder = os.path.join(os.path.dirname(video_files[0]), 'aligned')
    os.makedirs(output_folder, exist_ok=True)

    # Compute global mean properties
    mean_vector = mean_point_2 - mean_point_1
    mean_length = np.linalg.norm(mean_vector)
    mean_angle = np.arctan2(mean_vector[1], mean_vector[0])

    for video_path, (point1, point2) in zip(video_files, point_pairs):
        cap = cv2.VideoCapture(video_path)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        cap.release()

        # Compute transformation parameters
        vector = np.array(point2) - np.array(point1)
        length = np.linalg.norm(vector)
        angle = np.arctan2(vector[1], vector[0])

        if length == 0:  # Avoid division by zero
            print(f"Skipping {video_path} due to zero-length vector.")
            continue

        scale = mean_length / length
        rotation_angle = np.degrees(mean_angle - angle)

        # Rotation matrix
        center = (width // 2, height // 2)
        rotate_matrix = cv2.getRotationMatrix2D(center, rotation_angle, scale)

        # Compute translation matrix
        new_point1 = rotate_matrix[:, :2] @ np.array(point1).T + rotate_matrix[:, 2]
        dx, dy = mean_point_1 - new_point1
        translate_matrix = np.float32([[1, 0, dx], [0, 1, dy]])

        save_video(video_path, output_folder, rotate_matrix, translate_matrix, width, height, fps)
        print(f"Scale {scale:.2f}, Rotation {rotation_angle:.2f}°, Translation {dx:.2f}, {dy:.2f}")

    print(f"Aligned videos saved in '{output_folder}'.")


In [59]:
def zoom_in_display(frame, x, y, zoom_scale = 5, zoom_window_size = 25):
    # Create zoomed-in display
    x1 = max(0, x - zoom_window_size)
    x2 = min(frame.shape[1], x + zoom_window_size)
    y1 = max(0, y - zoom_window_size)
    y2 = min(frame.shape[0], y + zoom_window_size)

    zoomed_area = frame[y1:y2, x1:x2]
    
    # Resize zoomed-in area
    zoomed_area_resized = cv2.resize(zoomed_area, None, fx=zoom_scale, fy=zoom_scale, interpolation=cv2.INTER_LINEAR)

    # Add crosshair to the center
    center_x = zoomed_area_resized.shape[1] // 2
    center_y = zoomed_area_resized.shape[0] // 2
    color = (0, 255, 0)  # Black crosshair
    thickness = 2
    line_length = 20  # Length of crosshair lines

    # Draw vertical line
    cv2.line(zoomed_area_resized, (center_x, center_y - line_length), (center_x, center_y + line_length), color, thickness)
    # Draw horizontal line
    cv2.line(zoomed_area_resized, (center_x - line_length, center_y), (center_x + line_length, center_y), color, thickness)

    if x2 > (frame.shape[1] - zoomed_area_resized.shape[1] - 10) and y1 < (10 + zoomed_area_resized.shape[0]):
        # Overlay zoomed-in area in the top-left corner of the frame
        overlay_x1 = 10
        overlay_x2 = 10 + zoomed_area_resized.shape[1]
        overlay_y1 = 10
        overlay_y2 = 10 + zoomed_area_resized.shape[0]
    
    else:
        # Overlay zoomed-in area in the top-right corner of the frame
        overlay_x1 = frame.shape[1] - zoomed_area_resized.shape[1] - 10
        overlay_x2 = frame.shape[1] - 10
        overlay_y1 = 10
        overlay_y2 = 10 + zoomed_area_resized.shape[0]

    placement = (overlay_x1, overlay_x2, overlay_y1, overlay_y2)

    return zoomed_area_resized, placement
        

In [60]:
def select_point_pairs(video_files: list) -> list:

    print("Instructions:")
    print("1. Left-click to select points.")
    print("2. Enter to confirm the current point.")
    print("3. Select two points on each video to align them.")
    print("Press 'q' to quit without aligning.")

    # Initialize Tkinter and hide the root window
    root = Tk()
    root.withdraw()
    
    # Initialize variables
    point_pairs = []  # To store pairs of points for each video
    first_frames = []

    # Define callback function for point selection
    def select_points(event, x, y, flags, param):
        nonlocal frame, temp_frame, current_point, confirmed_points

        #if event == cv2.EVENT_MOUSEMOVE:   

        if event == cv2.EVENT_LBUTTONDOWN:
            # Update the current point with the clicked position
            current_point = (x, y)
            # Draw the current point
            cv2.circle(temp_frame, current_point, 3, (0, 255, 0), -1)
            # Draw the confirmed points on the frame
            for point in confirmed_points: 
                cv2.circle(temp_frame, point, 3, (0, 0, 255), -1)
            # Display the frame
            cv2.imshow('Select Points', temp_frame)
        
        # Reset the frame
        temp_frame = frame.copy()

        # Draw the current point
        if current_point is not None:
            cv2.circle(temp_frame, current_point, 3, (0, 255, 0), -1)
        # Draw the confirmed points on the frame
        for point in confirmed_points:
            cv2.circle(temp_frame, point, 3, (0, 0, 255), -1)
        # Display the zoomed-in area
        zoomed_area_resized, placement = zoom_in_display(temp_frame, x, y)
        overlay_x1, overlay_x2, overlay_y1, overlay_y2 = placement
        temp_frame[overlay_y1:overlay_y2, overlay_x1:overlay_x2] = zoomed_area_resized
        # Display the frame
        cv2.imshow('Select Points', temp_frame)

    def confirm_point():
        """Confirm the current point and add it to the list."""
        nonlocal temp_frame, confirmed_points, current_point
        if current_point is not None:
            confirmed_points.append(current_point)
            # Draw the confirmed points on the frame
            for point in confirmed_points: 
                cv2.circle(temp_frame, point, 3, (0, 0, 255), -1)
            # Display the frame
            cv2.imshow('Select Points', temp_frame)
            current_point = None
            print(f"Point confirmed: {confirmed_points[-1]}")  # Feedback to the user
    
    # Step 1: Extract first frames and collect two points for each video
    for video_path in video_files:
        frame = merge_frames([video_path]) # we make video_path a list because merge_frames expects a list
        first_frames.append((frame, video_path))
        confirmed_points = []  # Store the two confirmed points for this video
        current_point = None  # Temporary point being adjusted
        temp_frame = frame.copy()  # Create a copy of the frame

        # Run the mouse callback with the frame and confirmed points
        cv2.imshow('Select Points', frame)
        cv2.setMouseCallback('Select Points', select_points)

        # Wait for user to confirm two points
        while len(confirmed_points) < 2:
            key = cv2.waitKey(1) & 0xFF
            if key == 13:  # Enter key to confirm the current point
                confirm_point()
            elif key == ord('q'):  # Press 'q' to quit
                response = messagebox.askquestion("Exit", "Do you want to exit aligner?")
                if response == 'yes':
                    print("Exiting point selection.")
                    cv2.destroyAllWindows()
                    return
            
        # Save the confirmed points
        point_pairs.append(confirmed_points)
    
    cv2.destroyAllWindows()
    
    return point_pairs

In [61]:
def align_videos():
    # Open file dialog to select video files
    video_files = select_video_files()

    point_pairs = select_point_pairs(video_files)

    response = messagebox.askquestion("Alignment", "Do you want the points to stand on the same horizontal line?")  
    mean_point_1, mean_point_2 = calculate_mean_points(point_pairs, horizontal = response)

    apply_transformations(video_files, point_pairs, mean_point_1, mean_point_2)

In [62]:
align_videos()

Selected 2 videos.
Instructions:
1. Left-click to select points.
2. Enter to confirm the current point.
3. Select two points on each video to align them.
Press 'q' to quit without aligning.
Point confirmed: (138, 111)
Point confirmed: (840, 111)
Point confirmed: (138, 110)
Point confirmed: (840, 110)
Mean points: (array([138, 110]), array([840, 110]))
Aligned social_R01_Hab.avi
Scale 1.00, Rotation 0.00°, Translation 0.00, -1.00
Aligned social_R01_TR1.avi
Scale 1.00, Rotation 0.00°, Translation 0.00, 0.00
Aligned videos saved in 'C:/Users/dhers/Desktop/prueba\aligned'.
