# Supplementary Movies (Videos)

In [None]:
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as tck
import seaborn as sns

import cv2
from scipy.interpolate import interp1d

import sys
sys.path.append("../") # Set parent directory to sys.path
sys.dont_write_bytecode = True
%load_ext autoreload
%autoreload 2
import src.utils as utils

palette2 = sns.color_palette(["#D81B60", "#1E88E5", "#FFC107", "#004D40"])
palette = palette2
display(palette)
sns.set_theme(context='poster', style='ticks', palette=palette, font_scale=1.0)

## Config

In [None]:
# parameters

# for test
# BEFORE_SEC = 5
# AFTER_SEC = 5

# for honban
BEFORE_SEC = 20
AFTER_SEC = 20

DURATION = BEFORE_SEC + AFTER_SEC
GPS_SAMPLING_RATE = 1
ACC_SAMPLING_RATE = 25
FPS = 30
TOTAL_FRAMES = DURATION * FPS

# session_id = "LBP01_S00"
session_id = "LBP03_S00"

test_id = session_id[:5]
gps_data_path = f"../data/extracted-gps-data/{test_id}/{session_id}.csv"
acc_data_path = f"../data/extracted-imu-data/{test_id}/{session_id}.csv"

# Change the path appropriately
# base_dir = "../path_to_base_dir"
base_dir = "C:/Users/ryoma/Desktop/logbot-v5-playback-supplementary-movies"

frames_directory = f"{base_dir}/video-frames/{session_id}"
sensor_save_dir = f"{base_dir}/sensor-data/{session_id}"

print(frames_directory)
print(sensor_save_dir)

## Sensor data

In [None]:
# acceleration data
time_acc = np.linspace(-BEFORE_SEC, AFTER_SEC, DURATION * ACC_SAMPLING_RATE)
# time_acc_rounded = np.round(time_acc, 3)
# print(time_acc)
# print(time_acc_rounded)
# print(len(time_acc))
df_acc = pd.read_csv(acc_data_path)
speaker_turn_on_idx = utils.get_speaker_turn_on_idx(df_acc)
print(f"speaker_turn_on_idx: {speaker_turn_on_idx}")

df_acc = df_acc[ speaker_turn_on_idx-BEFORE_SEC*ACC_SAMPLING_RATE : speaker_turn_on_idx+AFTER_SEC*ACC_SAMPLING_RATE ]
display(df_acc.head(3))
time_acc = df_acc['_program_index'].values
# print(time_acc)
print(len(time_acc))
acc_x = df_acc['acc_x']
acc_y = df_acc['acc_y']
acc_z = df_acc['acc_z']

# ground speed data (GPS)
_speaker_turn_on_idx_1hz = int(speaker_turn_on_idx / ACC_SAMPLING_RATE)
df_speed = pd.read_csv(gps_data_path)

gps_offset = 1
df_speed = df_speed[_speaker_turn_on_idx_1hz - BEFORE_SEC + gps_offset: _speaker_turn_on_idx_1hz + AFTER_SEC + gps_offset]
print(len(df_speed))
speed_data = df_speed['speed_distance_km_h']
time_speed = np.linspace(-BEFORE_SEC, AFTER_SEC, DURATION)
# time_speed = df_speed['_program_index']


# up-sampling of GPS data (1 Hz to 25 Hz)
time_speed_for_upsampling = np.linspace(0, DURATION, DURATION)
time_speed_25Hz = np.linspace(0, DURATION, DURATION * ACC_SAMPLING_RATE)
interp_func = interp1d(time_speed_for_upsampling, speed_data, kind='linear')
speed_data_25Hz = interp_func(time_speed_25Hz)

# 
_speed_data = []
_time_speed = []
for i in range(0, len(time_acc), ACC_SAMPLING_RATE):
    _time_speed.append(time_acc[i])
    _speed_data.append(speed_data_25Hz[i])
_time_speed = np.array(_time_speed)

print(len(time_acc))
print(len(speed_data_25Hz))
print(len(acc_x))

In [None]:
time_acc[0]

In [None]:
# Policy:
# The program tries to ensure that the movement of the slider drawn over the acceleration and GPS data images
# is synchronized with the 30FPS video playback.
# The sampling rates of acceleration and GPS data, which are visualized as the background images 
# over which the slider is drawn, do not matter.
# There is no need to adjust the sampling rate of the acceleration or GPS data for visualization.

# 方針: (in Japanese) 
# 30FPSの動画と加速度やGPSデータの動きを見かけ上シンクロさせるために、
# 加速度/GPSデータの画像上に描画するスライダーの動きが30FPSの動画の動きと連動するようにする
# 加速度やGPSデータ（後からスライダーを描画する背景画像）は固定
# 加速度やGPSデータのサンプリングレートを調整する必要はない

if BEFORE_SEC < 10:
    INTERVAL_SEC = 2
else:
    INTERVAL_SEC = 5
xticks = np.arange(-30*ACC_SAMPLING_RATE, 30*ACC_SAMPLING_RATE, INTERVAL_SEC*ACC_SAMPLING_RATE)
xticklabels = np.arange(-30, 30, INTERVAL_SEC)
xlim = (-int(BEFORE_SEC + BEFORE_SEC*0.1)*ACC_SAMPLING_RATE, (AFTER_SEC + AFTER_SEC*0.1)*ACC_SAMPLING_RATE)

# 1. plot the acceleration and GPS data (fixed)
# 2. add a slider bar (corresponding to the 30 FPS video frames)
# 3. save the image with the slider (30 FPS)
for frame_idx in range(TOTAL_FRAMES):
# for frame_idx in range(3):
# for frame_idx in range(FPS*2+1):
    GRIDSPEC_KW = {'wspace': 0.1, 'hspace': 0.5}
    fig, ax = plt.subplots(2, 1, figsize=(12.8, 9.6), gridspec_kw=GRIDSPEC_KW)

    # Acceleration data
    ax[0].plot(time_acc, acc_x, color=palette[0], linewidth=1.5, label='x')
    ax[0].plot(time_acc, acc_y, color=palette[2], linewidth=1.5, label='y')
    ax[0].plot(time_acc, acc_z, color=palette[1], linewidth=1.5, label='z')
    ax[0].set_title('Acceleration Data', pad=10)
    ax[0].set_ylabel('Acc (g)', labelpad=10)
    ax[0].set_yticks(np.arange(-8, 9, 4))
    ax[0].set_ylim(-8.8, 8.8)
    ax[0].grid(True)
    ax[0].axvspan(0, 4.2*ACC_SAMPLING_RATE, color="#808080", alpha=0.25)
    ax[0].legend(ncol=3, loc='lower left')
    ax[0].set_xticks(xticks)
    ax[0].set_xticklabels(xticklabels)
    ax[0].set_xlim(xlim)
    ax[0].xaxis.set_minor_locator(tck.AutoMinorLocator(2))
    ax[0].yaxis.set_minor_locator(tck.AutoMinorLocator(2))

    # GPS data (speed)
    ax[1].plot(time_acc, speed_data_25Hz, linewidth=5, color="#FFC107")
    # ax[1].plot(_time_speed, _speed_data, marker="o", color="#FFC107")
    ax[1].set_title('Speed', pad=10)
    ax[1].set_ylabel('Speed (km/h)', labelpad=10)
    ax[1].set_yticks(np.arange(0, 100, 30))
    ax[1].set_ylim(-5, 95)
    ax[1].grid(True)
    ax[1].axvspan(0, 4.2*ACC_SAMPLING_RATE, color="#808080", alpha=0.25)
    ax[1].set_xticks(xticks)
    ax[1].set_xticklabels(xticklabels)
    ax[1].set_xlim(xlim)
    ax[1].xaxis.set_minor_locator(tck.AutoMinorLocator(2))
    ax[1].yaxis.set_minor_locator(tck.AutoMinorLocator(2))
    
    # Slider (adjust 25Hz data and 30FPS video frame）
    slider_idx = (frame_idx * ACC_SAMPLING_RATE) // FPS
    # The 30FPS video frame IDs are mapped to the positions of the slider.
    # print(f"frame_idx: {frame_idx} | slider_idx: {slider_idx}") # 
    # For simplicity, the 1 Hz GPS data were up-sampled to 25 Hz using linear interpolation
    # Otherwise, there will be misalignment of the x-axis ticks for the acceleration and GPS data, resulting in an untidy display.
    if slider_idx < len(speed_data_25Hz):
        slider_pos_velocity = time_acc[slider_idx]
        ax[0].axvline(x=slider_pos_velocity, color='#333333', linestyle='-')
    if slider_idx < len(acc_x):
        slider_pos_acceleration = time_acc[slider_idx]
        ax[1].axvline(x=slider_pos_acceleration, color='#333333', linestyle='-')
    
    plt.savefig(f'{sensor_save_dir}/sensor_{frame_idx:05d}.png')
    plt.close(fig)

## Video data

In [None]:
# Load the video frames
video_frames = []
video_width, video_height = 640, 480

# Fetch the image file path list and sort
image_path_list = sorted(glob.glob(f"{frames_directory}/*.jpg"))
print(len(image_path_list))
print(image_path_list[0])

frame_data_path = f"../data/umineko-2024-v8i-yolov8/{session_id}.csv"
df_frame = pd.read_csv(frame_data_path)
display(df_frame.head(3))
speaker_turn_on_frame_idx = utils.get_speaker_turn_on_idx(df_frame)
print(f"speaker_turn_on_frame_idx: {speaker_turn_on_frame_idx}")
if session_id == "LBP01_S00":
    speaker_turn_on_frame_idx = speaker_turn_on_frame_idx + 1 * FPS
elif session_id == "LBP03_S00":
    speaker_turn_on_frame_idx = speaker_turn_on_frame_idx + 1 * FPS
print(f"-> speaker_turn_on_frame_idx: {speaker_turn_on_frame_idx}")
image_path_list = image_path_list[speaker_turn_on_frame_idx-(BEFORE_SEC)*FPS: speaker_turn_on_frame_idx+(AFTER_SEC)*FPS]

# Load image data one by one
for frame_idx in range(TOTAL_FRAMES):
    frame = cv2.imread(image_path_list[frame_idx])

    sub_img = frame[15:65, 0:640]
    black_rect = np.zeros(sub_img.shape, dtype=np.uint8)
    rect = cv2.addWeighted(sub_img, 0.3, black_rect, 0.7, 1.0)
    frame[15:65, 0:640] = rect

    FONT = cv2.FONT_HERSHEY_DUPLEX
    FONT_SIZE = 0.85
    FONT_WEIGHT = 1
    pre_post = "Pre " if frame_idx < BEFORE_SEC * FPS else "Post"
    if frame_idx >= BEFORE_SEC * FPS and frame_idx < (BEFORE_SEC + 4.2) * FPS:
        speaker_on = "On"
    else:
        speaker_on = "Off"

    # Bird individual name and session name
    cv2.putText(
        frame, 
        f'{session_id.replace("_", " ")} | Period: {pre_post} | Speaker: {speaker_on}', 
        (20, 50), FONT, FONT_SIZE, (255, 255, 255), FONT_WEIGHT, cv2.LINE_AA
    )
    video_frames.append(frame)

# Load image data with the slider and 
# concatenate it horizontally with the video frames to create the video.
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(
    f'{base_dir}/{session_id}_B{BEFORE_SEC:02d}s_A{AFTER_SEC:02d}s.mp4', 
    fourcc, 
    FPS, 
    (1280, 720) # 1280 * 720 black image 
)

for frame_idx in range(TOTAL_FRAMES):
    # Load the image with the slider
    slider_img = cv2.imread(f'{sensor_save_dir}/sensor_{frame_idx:05d}.png')
    slider_img = cv2.resize(slider_img, (640, 480))
    # Fetch the corresponding video frame
    video_frame = video_frames[frame_idx]
    # Concatenate horizontally
    combined_frame = np.hstack((video_frame, slider_img))
    # Generate and add black images (upper and lower)
    black_image_upper = np.zeros((120, 1280, 3), dtype=np.uint8)
    black_image_lower = np.zeros((120, 1280, 3), dtype=np.uint8)
    combined_frame_with_black = np.vstack((black_image_upper, combined_frame, black_image_lower))

    # Write the frame to video data
    out.write(combined_frame_with_black)
    
# Release after loading all frames
out.release()

print("Video creation completed.")