# Meta data

In [None]:
import warnings
warnings.simplefilter('ignore', FutureWarning)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

palette0 = sns.color_palette(['#E69F00', '#56B4E9', '#009E73', '#F0E442', '#0072B2', '#D55E00', '#CC79A7', '#000000']) # Okabe-Ito
palette2 = sns.color_palette(["#D81B60", "#1E88E5", "#FFC107", "#004D40"])
palette3 = sns.color_palette(["#1E88E5", "#D81B60", "#FFC107", "#004D40"])
palette = palette3
display(palette)
sns.set_palette(palette)
sns.set_theme(context='poster', style='ticks', palette=palette, font_scale=1.0)

In [None]:
def categorize_time(time):
    if pd.to_datetime('03:00').time() <= time < pd.to_datetime('06:00').time():
        return '03-06'
    elif pd.to_datetime('06:00').time() <= time < pd.to_datetime('09:00').time():
        return '06-09'
    elif pd.to_datetime('09:00').time() <= time < pd.to_datetime('12:00').time():
        return '09-12'
    elif pd.to_datetime('12:00').time() <= time < pd.to_datetime('15:00').time():
        return '12-15'
    elif pd.to_datetime('15:00').time() <= time < pd.to_datetime('19:00').time():
        return '15-19'
    else:
        return 'Other'

In [None]:
path = "../data/metadata/session_data.csv"
df = pd.read_csv(path)
category_map = {
    'bay': 'onshore',
    'bay_river': 'onshore',
    'onshore': 'onshore',
    'offshore': 'offshore',
    'city': 'land',
    'paddy_field': 'land',
    'river': 'land'
}
location = df['location_category'].map(category_map)
df.insert(len(df.columns), 'location', location)
df['pb_time'] = pd.to_datetime(df['pb_time'], format='%H:%M:%S', errors='coerce').dt.time
df['pb_time'] = df['pb_time'].apply(lambda x: x if pd.notna(x) else pd.to_datetime('00:00').time())
df['AM_PM'] = df['pb_time'].apply(lambda x: 'PM' if x >= pd.to_datetime('12:00').time() else 'AM')
df['time_category'] = df['pb_time'].apply(categorize_time)
display(df)

In [None]:
df.groupby(['audio_file_name']).describe()

# Visualization

## Audio files

In [None]:
_df = df[df['audio_file_name'] != np.nan]
_df = df[df['audio_file_name'] != "Cancelled"]
fig, ax = plt.subplots(1, 1, figsize=(5, 4))
sns.countplot(ax=ax, data=_df, x='audio_file_name')
ax.set_ylim(0, 34)
ax.set_xlabel("")
ax.set_ylabel("Count", labelpad=10)
plt.show()
plt.close()

## Individuals

In [None]:
test_id_list = list(np.unique(df['test_id']))
GRIDSPEC_KW = {'wspace': 0.5, 'hspace': 0.5}
fig, ax = plt.subplots(2, 4, figsize=(20, 8), gridspec_kw=GRIDSPEC_KW)
ax_list = list(ax.flatten())
for i, (ax, test_id) in enumerate(zip(ax_list, test_id_list)):
    _df = df[df['test_id'] == f'{test_id}']
    sns.countplot(
        ax=ax, data=_df, x='audio_file_name', 
        order=['Predator', 'Noise'], 
        alpha=0.9,
    )
    ax.set_yticks(np.arange(0, 12, 2))
    ax.set_ylim(0, 11)
    ax.set_xlabel("")
    ax.set_ylabel("Count", labelpad=10)
    ax.set_title(f'{test_id}', pad=10)
handles, labels = ax.get_legend_handles_labels()
plt.show()
plt.close()

## Location

In [None]:
location_list = ['land', 'onshore', 'offshore']
GRIDSPEC_KW = {'wspace': 0.4, 'hspace': 0.5}
fig, ax = plt.subplots(1, 3, figsize=(18, 4), gridspec_kw=GRIDSPEC_KW)
ax_list = list(ax.flatten())
for i, (ax, location) in enumerate(zip(ax_list, location_list)):
    _df = df[df['location'] == f'{location}']
    sns.countplot(
        ax=ax, data=_df, x='audio_file_name', 
        order=['Predator', 'Noise'], 
    )
    ax.set_yticks(np.arange(0, 20, 5))
    ax.set_ylim(0, 18)
    ax.set_xlabel("")
    ax.set_ylabel("Count", labelpad=10)
    ax.set_title(f'{location}', pad=10)
plt.show()
plt.close()

## Time category

In [None]:
am_pm_list = ['AM', 'PM']
GRIDSPEC_KW = {'wspace': 0.4, 'hspace': 0.5}
fig, ax = plt.subplots(1, 2, figsize=(10, 4), gridspec_kw=GRIDSPEC_KW)
ax_list = list(ax.flatten())
for i, (ax, am_pm) in enumerate(zip(ax_list, am_pm_list)):
    _df = df[df['AM_PM'] == f'{am_pm}']
    sns.countplot(
        ax=ax, data=_df, x='audio_file_name', 
        order=['Predator', 'Noise'], 
    )
    ax.set_yticks(np.arange(0, 20, 5))
    ax.set_ylim(0, 18)
    ax.set_xlabel("")
    ax.set_ylabel("Count", labelpad=10)
    ax.set_title(f'{am_pm}', pad=10)
plt.show()
plt.close()

In [None]:
time_cat_list = ['03-06', '06-09', '09-12', '12-15', '15-19']
GRIDSPEC_KW = {'wspace': 0.4, 'hspace': 0.5}
fig, ax = plt.subplots(1, 5, figsize=(28, 4), gridspec_kw=GRIDSPEC_KW)
ax_list = list(ax.flatten())
for i, (ax, time_cat) in enumerate(zip(ax_list, time_cat_list)):
    _df = df[df['time_category'] == f'{time_cat}']
    sns.countplot(
        ax=ax, data=_df, x='audio_file_name', 
        order=['Predator', 'Noise'], 
    )
    ax.set_yticks(np.arange(0, 20, 5))
    ax.set_ylim(0, 18)
    ax.set_xlabel("")
    ax.set_ylabel("Count", labelpad=10)
    ax.set_title(f'{time_cat}', pad=10)
plt.show()
plt.close()