### Load dataframe

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm.auto import tqdm
from joblib import Parallel, delayed
import pandas as pd

In [2]:
import avgn

In [3]:
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir, FIGURE_DIR
from avgn.visualization.spectrogram import draw_spec_set
from avgn.utils.general import save_fig

In [4]:
from scipy.stats import kruskal
from sklearn.metrics import silhouette_score, silhouette_samples

In [5]:
DATASET_ID = "git_repos_call"

In [6]:
DT_ID = '2022-03-12_17-46-00'

## LH CALLS

In [7]:
call_df = pd.read_pickle(DATA_DIR / DATASET_ID / "LHcall" /  'LHcall_UMAP.pickle')
call_df[:3]

Unnamed: 0_level_0,start_time,end_time,labels,indv,indvi,filename,group,location,sex,wav_loc,...,comb_labels,specs,umap,call_lab_simp,combi_lab_simp,call_unique_num,call_pos_combi,combi_label,combi_unique_num,simp
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.932017,1.36713,SHSHLH,MGGY,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,...,SHSHLH,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.04218862, 2.9634933]",SH-LH,DS-SH-DS SH-LH,1,1,DSSHDS SHSHLH,0,Contains LH Segment
3,1.442686,1.984961,USC,MGGY,0,BWY MGGY Call Combo 1 300719 AM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,...,USC,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[8.066194, 3.232741]",SH-LH,DS SH-LH,3,1,DS USC,1,Contains LH Segment
5,1.471596,1.897371,LH,MGGY,0,BWY MGGY Discrete 1 300719 AM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,...,LH,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[-0.3234938, 3.4373276]",LH,DS-SH-DS LH,5,1,DSSHDS LH,2,Contains LH Segment


In [8]:
len(call_df)

203

In [9]:
call_df["simp"].unique()

array(['Contains LH Segment'], dtype=object)

### Get specs

In [10]:
def norm(x):
    return (x-np.min(x)) / (np.max(x) - np.min(x))

In [11]:
specs = list(call_df.specs.values)
specs = [norm(i) for i in tqdm(specs)]

  0%|          | 0/203 [00:00<?, ?it/s]

In [12]:
from avgn.visualization.projections import scatter_spec

In [None]:
nex = -1
colors = ["thistle", "lightpink", "palevioletred","mediumvioletred", "purple", "midnightblue"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [-1.75,8.75],
    y_range = [-1.75,8.75],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.call_lab_simp.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

save_loc = (FIGURE_DIR / 'manuscript' / 'Figures' / 'LHcallsUMAP.jpeg')
ensure_dir(save_loc.as_posix())
save_fig(save_loc, dpi=600, save_jpg=False)

In [None]:
LHscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.call_lab_simp.values)
LHscore

In [None]:
samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = call_df.call_lab_simp.values)
chance_samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = np.random.permutation(call_df.call_lab_simp.values))

In [None]:
KWlabels = kruskal(samples, chance_samples)
KWlabels

Appears to be further clustering of similar calls, so we will create a new variable to group calls based on this.

In [36]:
call_df["call_lab_simp"].unique()

array(['SH-LH', 'LH', 'LH-DS', 'SH-DS-SH-LH', 'SH-DS-LH', 'SH-LH-DS',
       'DS-SH-DS-SH-LH'], dtype=object)

In [37]:
call_df["SimpLabel"] = call_df["call_lab_simp"]

In [38]:
## Create conditions for all labels containing repeated segments (these will be simplified in the next step)
cond1 = call_df['SimpLabel'] == 'SH-LH'
cond2 = call_df['SimpLabel'] == 'LH'
cond3 = call_df['SimpLabel'] == 'LH-DS'
cond4 = call_df['SimpLabel'] == 'SH-DS-SH-LH'
cond5 = call_df['SimpLabel'] == 'SH-DS-LH'
cond6 = call_df['SimpLabel'] == 'SH-LH-DS'
cond7 = call_df['SimpLabel'] == 'DS-SH-DS-SH-LH'

In [39]:
### Simplify
call_df.loc[cond1, 'SimpLabel'] = 'LH & SH-LH calls'
call_df.loc[cond2, 'SimpLabel'] = 'LH & SH-LH calls'
call_df.loc[cond3, 'SimpLabel'] = 'LH-DS & SH-LH-DS calls'
call_df.loc[cond4, 'SimpLabel'] = 'SH-DS-LH & (DS)-SH-DS-SH-LH calls'
call_df.loc[cond5, 'SimpLabel'] = 'SH-DS-LH & (DS)-SH-DS-SH-LH calls'
call_df.loc[cond6, 'SimpLabel'] = 'LH-DS & SH-LH-DS calls'
call_df.loc[cond7, 'SimpLabel'] = 'SH-DS-LH & (DS)-SH-DS-SH-LH calls'

In [None]:
nex = -1
colors = ["thistle", "lightpink", "palevioletred","mediumvioletred", "purple", "midnightblue"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [-1.75,8.75],
    y_range = [-1.75,8.75],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.SimpLabel.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

save_loc = (FIGURE_DIR / 'manuscript' / 'Figures' / 'LHcallsUMAPPairs.jpeg')
ensure_dir(save_loc.as_posix())
save_fig(save_loc, dpi=600, save_jpg=False)

In [None]:
LHSimpscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.SimpLabel.values)
LHSimpscore

In [64]:
samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = call_df.SimpLabel.values)
chance_samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = np.random.permutation(call_df.SimpLabel.values))

In [None]:
KWsimplab = kruskal(samples, chance_samples)
KWsimplab

In [83]:
samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = call_df.location.values)
chance_samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = np.random.permutation(call_df.location.values))

In [None]:
KWloc = kruskal(samples, chance_samples)
KWloc

In [86]:
sexscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.sex.values)
locscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.location.values)
indvscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.indv.values)
callscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.call_lab_simp.values)
groupscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.group.values)

In [None]:
sexscore, locscore, indvscore, callscore, groupscore

# Figures for MS

In [None]:
fig = plt.figure()
fig.set_figheight(15)
fig.set_figwidth(15)
ax1 = plt.subplot2grid(shape=(2, 4), loc=(0, 0), colspan=2)  ##1st Row
ax2 = plt.subplot2grid(shape=(2, 4), loc=(0, 2), colspan=2) #1st row
ax3 = plt.subplot2grid(shape=(2, 4), loc=(1, 0), colspan=1) #2nd row
ax4 = plt.subplot2grid(shape=(2, 4), loc=(1, 1), colspan=2) #2nd row
ax5 = plt.subplot2grid(shape=(2, 4), loc=(1, 3), colspan=1) #2nd row
ax1.axis('off')
ax2.axis('off')
ax3.axis('off')
ax4.axis('off')
ax5.axis('off')

colors = ["thistle", "lightpink", "palevioletred","mediumvioletred", "purple", "midnightblue"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [-1.75,8.75],
    y_range = [-1.75,8.75],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax1,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.call_lab_simp.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

colors = ["lightblue", "teal", "darkslategray","mediumvioletred", "purple", "midnightblue"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [-1.75,8.75],
    y_range = [-1.75,8.75],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax2,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.SimpLabel.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

colors = ["darkgreen", "darkseagreen"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [-1.75,8.75],
    y_range = [-1.75,8.75],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax4,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.location.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

ax1.set_title("A)", loc='left', fontsize = 25)
ax2.set_title("B)", loc='left', fontsize = 25)
ax4.set_title("C)", loc='left', fontsize = 25)

save_loc = (FIGURE_DIR / 'manuscript' / 'Figures' / 'Figure S3AB_LocInc.jpeg')
ensure_dir(save_loc.as_posix())
plt.tight_layout()
save_fig(save_loc, dpi=600, save_jpg=False)
plt.tight_layout()

In [None]:
fig = plt.figure()
fig.set_figheight(10)
fig.set_figwidth(20)
ax1 = plt.subplot2grid(shape=(1, 20), loc=(0, 0), colspan=10)  ##1st Row
ax2 = plt.subplot2grid(shape=(1, 20), loc=(0, 10), colspan=10) #2nd row
ax1.axis('off')
ax2.axis('off')

colors = ["thistle", "lightpink", "palevioletred","mediumvioletred", "purple", "midnightblue"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [-1.75,8.75],
    y_range = [-1.75,8.75],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax1,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.call_lab_simp.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

colors = ["lightblue", "teal", "darkslategray","mediumvioletred", "purple", "midnightblue"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [-1.75,8.75],
    y_range = [-1.75,8.75],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax2,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.SimpLabel.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

ax1.set_title("A)", loc='left', fontsize = 25)
ax2.set_title("B)", loc='left', fontsize = 25)

save_loc = (FIGURE_DIR / 'manuscript' / 'Figures' / 'Figure S3AB_240822.jpeg')
ensure_dir(save_loc.as_posix())
save_fig(save_loc, dpi=600, save_jpg=False)
plt.show()

In [None]:
fig = plt.figure()
fig.set_figheight(10)
fig.set_figwidth(20)
ax1 = plt.subplot2grid(shape=(1, 20), loc=(0, 0), colspan=10)  ##1st Row
ax2 = plt.subplot2grid(shape=(1, 20), loc=(0, 10), colspan=10) #2nd row
ax1.axis('off')
ax2.axis('off')

colors = ["thistle", "lightpink", "palevioletred","mediumvioletred", "purple", "midnightblue"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [-1.75,8.75],
    y_range = [-1.75,8.75],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax1,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.indv.values,
        'alpha':1,
        's': 15,
        'show_legend': False,
        "color_palette": "magma",
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

colors = ["lightblue", "teal", "darkslategray","mediumvioletred", "purple", "midnightblue"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [-1.75,8.75],
    y_range = [-1.75,8.75],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax2,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.location.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": "magma",
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

ax1.set_title("A)", loc='left', fontsize = 25)
ax2.set_title("B)", loc='left', fontsize = 25)

save_loc = (FIGURE_DIR / 'manuscript' / 'Figures' / 'Figure S3AB_IndvLoc.jpeg')
ensure_dir(save_loc.as_posix())
save_fig(save_loc, dpi=600, save_jpg=False)
plt.show()