### Load dataframe

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm.auto import tqdm
from joblib import Parallel, delayed
import pandas as pd

In [2]:
import avgn

In [3]:
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir, FIGURE_DIR
from avgn.visualization.spectrogram import draw_spec_set
from avgn.utils.general import save_fig

In [4]:
from scipy.stats import kruskal
from sklearn.metrics import silhouette_score, silhouette_samples

In [5]:
DATASET_ID = "git_repos_call"

In [6]:
DT_ID = '2022-03-12_17-46-00'

## No LH Calls

In [7]:
call_df = pd.read_pickle(DATA_DIR / DATASET_ID / "NoLHcalls" /  'NoLHCall_UMAP.pickle')
call_df[:3]

Unnamed: 0_level_0,start_time,end_time,labels,indv,indvi,filename,group,location,sex,wav_loc,...,comb_labels,specs,umap,call_lab_simp,combi_lab_simp,call_unique_num,call_pos_combi,combi_label,combi_unique_num,simp
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.753604,0.92116,DSSHDS,MGGY,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,...,DSSHDS,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[9.309541, 7.8061166]",DS-SH-DS,DS-SH-DS SH-LH,0,0,DSSHDS SHSHLH,0,Other Calls
2,1.218085,1.308841,DS,MGGY,0,BWY MGGY Call Combo 1 300719 AM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,...,DS,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[7.7548466, 3.2709882]",DS,DS SH-LH,2,0,DS USC,1,Other Calls
4,1.244022,1.420183,DSSHDS,MGGY,0,BWY MGGY Discrete 1 300719 AM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,...,DSSHDS,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[7.8939924, 7.3213944]",DS-SH-DS,DS-SH-DS LH,4,0,DSSHDS LH,2,Other Calls


In [8]:
len(call_df)

358

In [9]:
def norm(x):
    return (x-np.min(x)) / (np.max(x) - np.min(x))

In [10]:
specs = list(call_df.specs.values)
specs = [norm(i) for i in tqdm(specs)]

  0%|          | 0/358 [00:00<?, ?it/s]

In [11]:
from avgn.visualization.projections import scatter_spec

In [None]:
nex = -1
colors = ["navy", "darkslateblue", "slateblue", "steelblue", "lightblue", "lightseagreen", "teal",  "seagreen", "darkgreen", "forestgreen", "darkseagreen"]
scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [2,12],
    y_range = [0,10],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.call_lab_simp.values,
        'alpha':1,
        's': 10,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=False,
    n_subset= 1000,
    border_line_width = 3,
    

);

save_loc = (FIGURE_DIR / 'manuscript' / 'Figures' / 'S5A.jpeg')
ensure_dir(save_loc.as_posix())
save_fig(save_loc, dpi=600, save_jpg=False)

### Silhouette & KW-H test

In [None]:
callscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.call_lab_simp.values)
callscore

In [None]:
samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = call_df.call_lab_simp.values)
chance_samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = np.random.permutation(call_df.call_lab_simp.values))

In [None]:
KWlabels = kruskal(samples, chance_samples)
KWlabels

In [None]:
sexscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.sex.values)
indvscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.indv.values)
calllabelscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.call_lab_simp.values)
groupscore = silhouette_score(list(np.array(list(call_df['umap'].values))), labels = call_df.group.values)

In [None]:
sexscore, indvscore, calllabelscore, groupscore

In [None]:
samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = call_df.sex.values)
chance_samples = silhouette_samples(list(np.array(list(call_df['umap'].values))), labels = np.random.permutation(call_df.sex.values))

In [None]:
KWsex = kruskal(samples, chance_samples)
KWsex

## No LH containing or NL Lone Calls

In [45]:
other_df = pd.read_pickle(DATA_DIR / DATASET_ID / "Other_calls" /  'OtherCall_UMAP.pickle')
other_df[:3]

Unnamed: 0_level_0,start_time,end_time,labels,indv,indvi,filename,group,location,sex,wav_loc,...,specs,simplified,umap,call_lab_simp,combi_lab_simp,call_unique_num,call_pos_combi,combi_label,combi_unique_num,simp
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.753604,0.92116,DSSHDS,MGGY,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",1,"[8.485272, 6.9444375]",DS-SH-DS,DS-SH-DS SH-LH,0,0,DSSHDS SHSHLH,0,Other Calls
2,1.218085,1.308841,DS,MGGY,0,BWY MGGY Call Combo 1 300719 AM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",1,"[3.339999, 4.751708]",DS,DS SH-LH,2,0,DS USC,1,Other Calls
4,1.244022,1.420183,DSSHDS,MGGY,0,BWY MGGY Discrete 1 300719 AM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,...,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",1,"[8.053281, 5.8522944]",DS-SH-DS,DS-SH-DS LH,4,0,DSSHDS LH,2,Other Calls


In [46]:
len(other_df)

332

In [47]:
other_df["simp"].unique()

array(['Other Calls'], dtype=object)

In [48]:
def norm(x):
    return (x-np.min(x)) / (np.max(x) - np.min(x))

In [49]:
Ospecs = list(other_df.specs.values)
Ospecs = [norm(i) for i in tqdm(Ospecs)]

  0%|          | 0/332 [00:00<?, ?it/s]

In [None]:
nex = -1
color = ["black", "indigo",  "darkred", "indianred", "orangered",  "palevioletred", "sandybrown", "orange", "gold", "khaki",  "darkgoldenrod"]

scatter_spec(
    np.array(list(other_df['umap'].values)),
    Ospecs,
    column_size=10,
    x_range = [2,10],
    y_range = [2,10],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': other_df.call_lab_simp.values,
        'alpha':1,
        's': 10,
        'show_legend': True,
        "color_palette": "inferno",
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=False,
    n_subset= 1000,
    border_line_width = 3,
    

);

save_loc = (FIGURE_DIR / 'manuscript' / 'Figures' / 'FigSB.jpeg')
ensure_dir(save_loc.as_posix())
save_fig(save_loc, dpi=600, save_jpg=False)

In [None]:
labelscore = silhouette_score(list(np.array(list(other_df['umap'].values))), labels = other_df.call_lab_simp.values)
labelscore

In [None]:
samples = silhouette_samples(list(np.array(list(other_df['umap'].values))), labels = other_df.call_lab_simp.values)
chance_samples = silhouette_samples(list(np.array(list(other_df['umap'].values))), labels = np.random.permutation(other_df.call_lab_simp.values))

In [None]:
KWlabels = kruskal(samples, chance_samples)
KWlabels

### Further grouping
Appears to be further grouping of calls based on similar call labelling so will create a new variable to describe this.

In [54]:
other_df['CombCallLabel'] = other_df['call_lab_simp']

In [55]:
## Create conditions for all labels containing repeated segments (these will be simplified in the next step)
cond1 = other_df['CombCallLabel'] == 'DS-SH-DS'
cond2 = other_df['CombCallLabel'] == 'DS'
cond3 = other_df['CombCallLabel'] == 'NL-SH-DS'
cond4 = other_df['CombCallLabel'] == 'NL-DS'
cond5 = other_df['CombCallLabel'] == 'SH-DS-SH-DS'
cond6 = other_df['CombCallLabel'] == 'SH-DS'
cond7 = other_df['CombCallLabel'] == 'NL-DS-SH-DS'
cond9 = other_df['CombCallLabel'] == 'SH-NL-DS'
cond10 = other_df['CombCallLabel'] == 'SH-DS-SH'
cond11 = other_df['CombCallLabel'] == 'SH'

In [56]:
### Simplify
other_df.loc[cond1, 'CombCallLabel'] = 'SH Lone/Any Combined DS & SH'
other_df.loc[cond2, 'CombCallLabel'] = 'DS'
other_df.loc[cond3, 'CombCallLabel'] = 'Any Combined NL + DS/SH'
other_df.loc[cond4, 'CombCallLabel'] = 'Any Combined NL + DS/SH'
other_df.loc[cond5, 'CombCallLabel'] = 'SH Lone/Any Combined DS & SH'
other_df.loc[cond6, 'CombCallLabel'] = 'SH Lone/Any Combined DS & SH'
other_df.loc[cond7, 'CombCallLabel'] = 'Any Combined NL + DS/SH'
other_df.loc[cond9, 'CombCallLabel'] = 'Any Combined NL + DS/SH'
other_df.loc[cond10, 'CombCallLabel'] = 'SH Lone/Any Combined DS & SH'
other_df.loc[cond11, 'CombCallLabel'] = 'SH Lone/Any Combined DS & SH'

In [None]:
nex = -1
colors = ["midnightblue","royalblue", "lightskyblue", "teal"]
scatter_spec(
    np.array(list(other_df['umap'].values)),
    Ospecs,
    column_size=10,
    x_range = [1,10.5],
    y_range = [2,11.5],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': other_df.CombCallLabel.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": "inferno",
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=False,
    n_subset= 1000,
    border_line_width = 3,
    

);

save_loc = (FIGURE_DIR / 'manuscript' / 'Figures' / 'FigureS5C.jpeg')
ensure_dir(save_loc.as_posix())
save_fig(save_loc, dpi=600, save_jpg=False)

In [None]:
#silhouette score
score = silhouette_score(list(np.array(list(other_df['umap'].values))), labels = other_df.CombCallLabel.values)
score

In [None]:
#kruskal-wallis
samples = silhouette_samples(list(np.array(list(other_df['umap'].values))), labels = other_df.CombCallLabel.values)
chance_samples = silhouette_samples(list(np.array(list(other_df['umap'].values))), labels = np.random.permutation(other_df.CombCallLabel.values))

In [None]:
KWCombCallLabel = kruskal(samples, chance_samples)
KWCombCallLabel

In [None]:
sexscore = silhouette_score(list(np.array(list(other_df['umap'].values))), labels = other_df.sex.values)
indvscore = silhouette_score(list(np.array(list(other_df['umap'].values))), labels = other_df.indv.values)
callscore = silhouette_score(list(np.array(list(other_df['umap'].values))), labels = other_df.call_lab_simp.values)
groupscore = silhouette_score(list(np.array(list(other_df['umap'].values))), labels = other_df.group.values)

In [None]:
sexscore, indvscore, callscore, groupscore

## Figure for MS

In [None]:
fig = plt.figure()
fig.set_figheight(15)
fig.set_figwidth(15)
ax1 = plt.subplot2grid(shape=(2, 4), loc=(0, 0), colspan=1)  ##1st Row
ax2 = plt.subplot2grid(shape=(2, 4), loc=(0, 1), colspan=2) #1st row
ax3 = plt.subplot2grid(shape=(2, 4), loc=(0, 3), colspan=1) #1st row
ax4 = plt.subplot2grid(shape=(2, 4), loc=(1, 0), colspan=2) #2nd row
ax5 = plt.subplot2grid(shape=(2, 4), loc=(1, 2), colspan=2) #2nd row
ax1.axis('off')
ax2.axis('off')
ax3.axis('off')
ax4.axis('off')
ax5.axis('off')

colors = ["navy", "darkslateblue","steelblue", "slateblue", "deepskyblue", "cadetblue",  "lightblue", "darkseagreen",   "forestgreen",  "darkgreen","darkslategray" ]

scatter_spec(
    np.array(list(call_df['umap'].values)),
    specs,
    column_size=10,
    x_range = [2.25,12],
    y_range = [0.25,10],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax2,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': call_df.call_lab_simp.values,
        'alpha':1,
        's': 15,
        'show_legend': False,
        "color_palette": "viridis",
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

color = ["darkred", "crimson", "palevioletred", "lightsalmon", "orangered",  "orange", "sandybrown",   "khaki",  "gold","darkgoldenrod" ]
scatter_spec(
    np.array(list(other_df['umap'].values)),
    Ospecs,
    column_size=10,
    x_range = [0,9.5],
    y_range = [0.5,10],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax4,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': other_df.call_lab_simp.values,
        'alpha':1,
        's': 15,
        'show_legend': False,
        "color_palette": "inferno",
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

nex = -1
colors = ["midnightblue","royalblue", "lightskyblue", "teal"]
scatter_spec(
    np.array(list(other_df['umap'].values)),
    Ospecs,
    column_size=10,
    x_range = [0,9.5],
    y_range = [0.5,10],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax5,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': other_df.CombCallLabel.values,
        'alpha':1,
        's': 15,
        'show_legend': False,
        "color_palette": "inferno",
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);


ax2.set_title("A)", loc='left', fontsize = 25)
ax4.set_title("B)", loc='left', fontsize = 25)
ax5.set_title("C)", loc='left', fontsize = 25)

save_loc = (FIGURE_DIR / 'manuscript' / 'Figures' / 'FigS5ABC.tiff')
ensure_dir(save_loc.as_posix())
plt.tight_layout()
save_fig(save_loc, dpi=600, save_jpg=False)