# Pre- and Post-mean data

In [None]:
import warnings
warnings.simplefilter('ignore', FutureWarning)

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tck
import seaborn as sns

import sys
sys.path.append("../") # Set parent directory to sys.path
sys.dont_write_bytecode = True
%load_ext autoreload
%autoreload 2
import src.ci_utils as ci_utils

palette0 = sns.color_palette(['#E69F00', '#56B4E9', '#009E73', '#F0E442', '#0072B2', '#D55E00', '#CC79A7', '#000000']) # Okabe-Ito
palette = palette0
display(palette)
sns.set_palette(palette)
sns.set_theme(context='poster', style='ticks', palette=palette, font_scale=1.0)

In [None]:
# test data
from sklearn.datasets import load_iris
import pandas as pd
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df = df[0:10]
display(df)

# check index for pre and post data
idx = 5
idx_shift = 3
df_pre = df[ idx - idx_shift : idx ]
df_post = df[ idx : idx + idx_shift ]
display(df_pre)
display(df_post)

## Data for R analysis

In [None]:
def categorize_time(time):
    if pd.to_datetime('03:00').time() <= time < pd.to_datetime('06:00').time():
        return '03-06'
    elif pd.to_datetime('06:00').time() <= time < pd.to_datetime('09:00').time():
        return '06-09'
    elif pd.to_datetime('09:00').time() <= time < pd.to_datetime('12:00').time():
        return '09-12'
    elif pd.to_datetime('12:00').time() <= time < pd.to_datetime('15:00').time():
        return '12-15'
    elif pd.to_datetime('15:00').time() <= time < pd.to_datetime('19:00').time():
        return '15-19'
    else:
        return 'Other'
    
def process_meta_data(path):
    df = pd.read_csv(path)
    # df.insert(len(df.columns), 'reaction', df['react_in_video'].apply(lambda x: 1 if x >= 1.0 else 0))
    category_map = {
        'bay': 'onshore',
        'bay_river': 'onshore',
        'onshore': 'onshore',
        'offshore': 'offshore',
        'city': 'land',
        'paddy_field': 'land',
        'river': 'land'
    }
    category_map2 = {
        'bay': 'land_onshore',
        'bay_river': 'land_onshore',
        'onshore': 'land_onshore',
        'offshore': 'offshore',
        'city': 'land_onshore',
        'paddy_field': 'land_onshore',
        'river': 'land_onshore'
    }
    location = df['location_category'].map(category_map)
    location2 = df['location_category'].map(category_map2)
    df.insert(len(df.columns), 'location', location)
    df.insert(len(df.columns), 'location2', location2)
    df['pb_time'] = pd.to_datetime(df['pb_time'], format='%H:%M:%S', errors='coerce').dt.time
    df['pb_time'] = df['pb_time'].apply(lambda x: x if pd.notna(x) else pd.to_datetime('00:00').time())
    df['AM_PM'] = df['pb_time'].apply(lambda x: 'PM' if x >= pd.to_datetime('12:00').time() else 'AM')
    df['time_category'] = df['pb_time'].apply(categorize_time)
    # display(df)
    
    return df

In [None]:
path = "../data/metadata/session_data.csv"
df_meta = process_meta_data(path)

_df_meta = df_meta[["file_name", "AM_PM", "location", "location2", "body_mass"]]
_df_meta.rename(columns={"file_name": "session_id"}, inplace=True)
display(_df_meta)

SEC = 5
# SEC = 10

value_type = 'mean'

# data_type_list = ['smoothed_VeDBA_2s', 'abs_diff_distance_m', 'abs_diff_pixel_count_p']
# data_type_list = ['smoothed_VeDBA_2s']
# data_type_list = ['abs_diff_distance_m']
data_type_list = ['abs_diff_pixel_count_p']
for data_type in data_type_list:
    # diff data (pre and post mean values)
    df, df2 = ci_utils.prep_df_pre_post_analysis(data_type, SEC, value_type)
    # display(df.head(3)) # this already include pb_count_1 and pb_count_2
    # print(len(df))

    # merge with metadata
    df_r = pd.merge(df, _df_meta, on='session_id', how='inner')
    display(df_r)
    print(np.unique(df_r["location"]))
    print(np.unique(df_r["location2"]))
    print(np.unique(df_r["body_mass"]))
    print(len(df_r))

    # save data to R directory
    if data_type == 'smoothed_VeDBA_2s':
        fname_base = "s_vedba"
    elif data_type == 'abs_diff_distance_m':
        fname_base = "ad_speed"
    elif data_type == 'abs_diff_pixel_count_p':
        fname_base = "ad_mpr"
    output_path = f"../r-analysis/data/mean/{fname_base}_{SEC:02}s.csv"
    # df_r.to_csv(output_path)

In [None]:
# display(df)
_df = df[df['audio_file_name'] != "Cancelled"]
# display(_df)
Y = _df['diff_value'].values
print(Y.shape)
print(len(Y))
fig, ax = plt.subplots(1, 1, figsize=(5, 4))
ax.hist(Y, bins=46, color="skyblue")
ax.grid(which='both')
plt.show()

## Fig. Sxx | Diff values per session

In [None]:
# duration_sec_list = [5, 10, 15, 20]
duration_sec_list = [5, 10]

data_type_list = ["smoothed_VeDBA_2s",  "abs_diff_distance_m", "abs_diff_pixel_count_p"]
data_name_list = ["S-VeDBA", "AD-Speed", "AD-MPR"]

for duration_sec in duration_sec_list:
    sns.set_theme(context='paper', style='ticks', palette=palette, font_scale=1.1)
    GRIDSPEC_KW = {'wspace': 0.2, 'hspace': 0.2, 'width_ratios': [1, 1, 1]}
    fig, axes = plt.subplots(1, 3, figsize=(7, 11), gridspec_kw=GRIDSPEC_KW)
    ax_list = axes.flatten().tolist()

    for i, data_type in enumerate(data_type_list):
        ax = ax_list[i]

        # plot config
        if data_type == "smoothed_VeDBA_2s":
            xticks = np.arange(-5, 5, 0.5)
            xlim = (-1.2, 1.2)
            COLOR = "#1E88E5"
        elif data_type == "abs_diff_distance_m":
            xticks = np.arange(-5, 5, 1.0)
            xlim = (-2.5, 2.5)
            COLOR = "#FFC107"
        elif data_type == "abs_diff_pixel_count_p":
            xticks = np.arange(-1, 1, 0.02)
            xlim = (-0.035, 0.035)
            COLOR = "#D81B60"

        df, df2 = ci_utils.prep_df_pre_post_analysis(data_type, duration_sec)
        df = df[df["audio_file_name"] != "Cancelled"] # filter
        df = df.reset_index()
        # data for plot| reversed order
        _avg_effects = df['diff_value'].values[::-1]
        _session_names = df['session_id'].values[::-1]
        _session_names_with_spaces = [name.replace('_', ' ') for name in _session_names]
        y = np.arange(len(_session_names))

        # shapes
        shapes = []
        for j in range(len(df)):
            audio_file_name = df["audio_file_name"][j]
            if audio_file_name == "Predator":
                shapes.append("^")
            else:
                shapes.append("o")
        shapes = np.array(shapes)[::-1] # should be reversed
        for j in range(len(y)):
            ax.scatter(x=_avg_effects[j], y=y[j], color=COLOR, marker=shapes[j], s=30, zorder=12)
        
        # center line (x = 0) 
        ax.axvline(x=0, color="black", linestyle="-", linewidth=1.0, zorder=3)

        # customize plot
        ax.set_xticks(xticks)
        ax.set_xlim(xlim)

        ax.set_yticks(y)
        if i == 0 or i == 3:
            ax.set_yticklabels(_session_names_with_spaces)
        else:
            ax.set_yticklabels([])
        ax.set_ylim(0-1, len(y))

        title = f"{data_name_list[i]}"
        ax.set_title(title, fontweight='bold', pad=7)
        
        ax.xaxis.set_minor_locator(tck.AutoMinorLocator(2))
        ax.grid(which='major', axis="both", alpha=0.5)
        ax.grid(which='minor', axis="x", alpha=0.5)
    plt.show()

    # save
    save_dir = f"../output/figure-for-paper/"
    fig.savefig(f"{save_dir}/png/fig_sxx_diff_pre_post_{duration_sec:02d}s.png", dpi=150, bbox_inches="tight", pad_inches=0.2, transparent=False)
    fig.savefig(f"{save_dir}/pdf/fig_sxx_diff_pre_post{duration_sec:02d}s.pdf", dpi=600, bbox_inches="tight", pad_inches=0.2, transparent=False)
    plt.close()