# Extraction of video recording session data
Extract data only when videos were recorded from the raw logdata.csv

In [None]:
import os
import glob
import numpy as np
import pandas as pd

import sys
sys.path.append("../") # Set parent directory to sys.path
sys.dont_write_bytecode = True
%load_ext autoreload
%autoreload 2
import src.utils as utils

import warnings
warnings.simplefilter('ignore', FutureWarning)

In [None]:
test_id_list = [
    "LBP00", 
    "LBP01", 
    # "LBP02", # no video data
    "LBP03",
    # "LBP04", # no video data
    "LBP05",
    "LBP06",
    "LBP07",
    "LBP08",
    "LBP09",
]

use_columns = [
    'unixtime', 'datetime_utc', 'datetime_jst',
    'rtc_year', 'rtc_month', 'rtc_day', 'rtc_hour', 'rtc_min', 'rtc_sec','rtc_msec', 
    'gps_hour', 'gps_min', 'gps_sec', 'latitude', 'longitude','altitude', 'fix_type', 'SIV', 
    'acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'mag_x', 'mag_y', 'mag_z', 
    'illuminance', 'atmopress', 'water_pressure', 'water_temp', 'depth', 'alt_wps', 'battery_level', 
    'behavior_class', 
    'camera_command', 'camera_recording', 'camera_count', 
    'play_audio', 'speaker_on', 'audio_file',
    'prev_sd_write_time_ms', 'delay_occurred_counter'
]


## 0 | Extract all playback session data
Extract all playback session data and save as new csv file per deployment.

In [None]:
# Use this if you want to extract all video recording session data
base_dir = f"../data/corrected-raw-data" # check the directory path
target_path = f"{base_dir}/*.csv"
print(target_path)
input_path_list = sorted(glob.glob(target_path))
for i, input_path in enumerate(input_path_list):
    print(f"{i:0=3}: {input_path}")

In [None]:
ffill_columns = [
    'behavior_class', 
    'camera_command', 'camera_recording', 'camera_count', 
    'play_audio', 'speaker_on', 'audio_file',
    'prev_sd_write_time_ms', 'delay_occurred_counter'
]

# for i, input_path in enumerate(input_path_list[:1]):
for i, input_path in enumerate(input_path_list):
    print(f"input path {i:0=4}: {input_path}")
    test_id = os.path.basename(input_path).replace(".csv", "")
    if test_id not in test_id_list:
        print(f"skipped {test_id}")
    else:
        df = pd.read_csv(input_path)  
        df_extracted = utils.extract_camera_recording_session(
            df,
            use_columns, 
            ffill_columns,
        )
        # file_name = f"{os.path.basename(input_path)}"
        save_dir = f"../data/extracted-all-sessions-data/"
        save_path = f"{save_dir}/{test_id}.csv"
        if os.path.exists(save_dir) == False:
            os.makedirs(save_dir)
        # df_extracted.to_csv(save_path, index=False)
        print(f"Saved!: {save_path}")
    print("------------------------------------")

## 1 | Extract GPS Data (1 Hz)
Extract GPS data during video recording and save the data as one csv file per session.  
Calculate ground speeds and turning angles and add new columns.

In [None]:
target_path = "../data/extracted-all-sessions-data/*.csv"
path_list = sorted(glob.glob(target_path))

# process each bird
# for i, path in enumerate(path_list[:1]):
for i, path in enumerate(path_list):
    print(f"===============================================================")
    test_id = os.path.basename(path).replace(".csv", "")
    print(f"test_id: {test_id}")

    if test_id not in test_id_list:
        print(f"skipped {test_id}")
        continue
    
    df = pd.read_csv(path)
    # display(df.head(3))
    camera_count_list = list((np.unique(df['camera_count'])).astype(int))
    if -1 in camera_count_list:
        camera_count_list.remove(-1)
    print(f"camera_count_list: {camera_count_list}")
    df = df[ df['rtc_msec'] == 0 ] # filter only GPS data (1Hz)
    
    # save each recording session as csv file 
    for c, camera_count in enumerate(camera_count_list):
        if camera_count < 0:
            continue

        file_name_base = f"{test_id}_S{camera_count:0=2}"
        print(f"-------------------------- {file_name_base} --------------------------")

        df_tmp = df[ df['camera_count'] == camera_count ]
        
        # reset index
        df_tmp.insert(0, 'old_index', df_tmp.index) # original index -> 'old_index'
        df_tmp.reset_index(drop=True, inplace=True)
        
        # Exception handler
        # if test_id == "LBP03" and camera_count == 0:
        #     display(df_tmp)
        #     df_tmp['datetime_jst'] = pd.to_datetime(df_tmp['datetime_jst'])
        #     df_tmp.set_index('datetime_jst', drop=True, inplace=True)
        #     df_tmp = df_tmp.asfreq('1s') # 1 s update period
        #     display(df_tmp)
        #     df_tmp['latitude'] = df_tmp['latitude'].interpolate(method='linear')
        #     df_tmp['longitude'] = df_tmp['longitude'].interpolate(method='linear')
        #     df_tmp.reset_index(inplace=True)

        # Calc distances & turning angle
        df_tmp = utils.calc_speed(df_tmp)
        df_tmp = utils.calc_turning_angle(df_tmp)
        idx = utils.get_speaker_turn_on_idx(df_tmp)
        # print(idx)
        indices = np.arange(0, len(df_tmp), 1)
        program_index = np.arange(1, len(df_tmp)+1, 1)
        _program_index = program_index - idx -1
        df_tmp.insert(0, 'index', indices)
        df_tmp.insert(1, 'program_index', program_index)
        df_tmp.insert(1, '_program_index', _program_index)
        # display(df_tmp.head(3))
        # display(df_tmp.tail(3))
        
        df_tmp.insert(0, 'test_id', [test_id]*len(df_tmp))
        df_tmp.insert(1, 'session_id', [camera_count]*len(df_tmp))

        df_tmp.insert(0, 'file_name', [file_name_base]*len(df_tmp))
        if camera_count >= 0:
            save_path = f"../data/extracted-gps-data/{test_id}/{file_name_base}.csv"
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            # df_tmp.to_csv(save_path, index=False)
            print(f"Saved!: {save_path}")
print(f"===============================================================")

## 2 | Extract IMU data (25 Hz)
Extract IMU data during video recording and save the data as one csv file per session.  
Calculate static components (moving average values), dynamic components, ODBA, and VeDBA. 

In [None]:
target_path = "../data/extracted-all-sessions-data/*.csv"
path_list = sorted(glob.glob(target_path))

# process each bird
# for i, path in enumerate(path_list[:1]):
for i, path in enumerate(path_list):
    print(f"===============================================================")
    test_id = os.path.basename(path).replace(".csv", "")
    print(f"test_id: {test_id}")
    
    if test_id not in test_id_list:
        print(f"skipped {test_id}")
        continue

    df = pd.read_csv(path)
    # display(df.head(3))
    camera_count_list = list((np.unique(df['camera_count'])).astype(int))
    if -1 in camera_count_list:
        camera_count_list.remove(-1)
    print(f"camera_count_list: {camera_count_list}")
    df = df[use_columns]
    # display(df.head(5))

    # save each recording session as csv file 
    for c, camera_count in enumerate(camera_count_list):
        if camera_count < 0:
            continue
        
        file_name_base = f"{test_id}_S{camera_count:0=2}"
        print(f"-------------------------- {file_name_base} --------------------------")

        df_tmp = df[ df['camera_count'] == camera_count ]
        
        # calculate moving average values and add new columns
        sampling_rate = 25
        rolling_window_sec = 3
        # Shepard et al., 2008 Aquatic Biology
        # """
        # In the present study, 3 s appeared to be a minimum and robust length 
        # over which to take the running mean to derive static acceleration 
        # in animals that have a dominant stroke period < 3 s.
        # """
        window_size = int(sampling_rate * rolling_window_sec)

        # moving average (centered)
        acc_x_ma = df_tmp['acc_x'].rolling(window=window_size, center=True).mean()
        acc_y_ma = df_tmp['acc_y'].rolling(window=window_size, center=True).mean()
        acc_z_ma = df_tmp['acc_z'].rolling(window=window_size, center=True).mean()
        gyro_x_ma = df_tmp['gyro_x'].rolling(window=window_size, center=True).mean()
        gyro_y_ma = df_tmp['gyro_y'].rolling(window=window_size, center=True).mean()
        gyro_z_ma = df_tmp['gyro_z'].rolling(window=window_size, center=True).mean()

        # Insert new columns (remove the old one if exist)
        for col in ['acc_x_ma', 'acc_y_ma', 'acc_z_ma', 'gyro_x_ma', 'gyro_y_ma', 'gyro_z_ma']:
            if col in df_tmp.columns:
                df_tmp.drop(columns=[col], inplace=True)
        df_tmp.insert(len(df_tmp.columns), 'acc_x_ma', acc_x_ma)
        df_tmp.insert(len(df_tmp.columns), 'acc_y_ma', acc_y_ma)
        df_tmp.insert(len(df_tmp.columns), 'acc_z_ma', acc_z_ma)
        df_tmp.insert(len(df_tmp.columns), 'gyro_x_ma', gyro_x_ma)
        df_tmp.insert(len(df_tmp.columns), 'gyro_y_ma', gyro_y_ma)
        df_tmp.insert(len(df_tmp.columns), 'gyro_z_ma', gyro_z_ma)
        
        # Apply low pass filter to gyro data and add the data
        df_tmp = utils.calc_lowpass_filtered_gyro_data(df_tmp, filter_type='butter')

        # a metric combining gravity components gyroscope data after applying a low-pass filter.
        df_tmp = utils.calc_angle_change_using_filtered_gyro_and_ma_acc_data(df_tmp)

        # ODBA and VeDBA
        df_tmp = utils.calc_odba_and_vedba(df_tmp, filter_type='ma')

        idx = utils.get_speaker_turn_on_idx(df_tmp)
        # print(idx)
        indices = np.arange(0, len(df_tmp), 1)
        df_tmp.insert(0, 'index', indices)
        
        _indices = indices - idx
        df_tmp.insert(1, '_index', _indices)

        df_tmp.insert(0, 'test_id', [test_id]*len(df_tmp))
        df_tmp.insert(1, 'session_id', [camera_count]*len(df_tmp))
        program_index = np.arange(1, len(df_tmp)+1, 1)
        _program_index = program_index - idx -1
        df_tmp.insert(1, 'program_index', program_index)
        df_tmp.insert(1, '_program_index', _program_index)

        df_tmp.insert(0, 'file_name', [file_name_base]*len(df_tmp))
        save_path = f"../data/extracted-imu-data/{test_id}/{file_name_base}.csv"
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        # df_tmp.to_csv(save_path, index=False)
        print(f"Saved!: {save_path}")
print(f"===============================================================")