# Within-Segment Class Variation
The following notebook looks into within-segment class variation within one individual's combinatorial repertoire (VVVM).

In [1]:
from tqdm.auto import tqdm

In [2]:
import avgn

In [3]:
import pandas as pd
import numpy as np
from avgn.utils.paths import DATA_DIR, ensure_dir, FIGURE_DIR

In [4]:
DATASET_ID = "git_repos"

In [5]:
DT_ID = '2022-03-04_18-41-29'

In [6]:
seg_df = pd.read_pickle(DATA_DIR / DATASET_ID / DT_ID /  'VVVM_UMAP.pickle')
seg_df[:3]

Unnamed: 0,start_time,end_time,labels,ID,start_times,end_times,call_label,call_start,seg_pos_call,call_unique_num,...,group,location,sex,wav_loc,key,rate,specs,comb_labels,call_lab_simp,umap
8,83.354563,83.394007,SH,1103,83.354563,83.394007,SHDS,83.354563,0,443,...,RVD,GUILDFORD,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,RVD VVVM Aerial Predator Call series 1 051120 AM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",SH,SH-DS,"[3.8620093, 0.2893895]"
3,0.87258,0.968577,SH,1122,0.87258,0.968577,SHDSSHSHLH,0.87258,0,452,...,RVD,GUILDFORD,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,RVD VVVM Call combo 1 170220 AM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",SH,SH-DS-SH-SH-LH,"[4.3479133, 0.30481717]"
1,0.692265,0.736698,SH,1128,0.692265,0.736698,SHDS,0.692265,0,454,...,RVD,GUILDFORD,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,RVD VVVM Call Combo Song Discrete Series 1 & X...,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",SH,SH-DS,"[3.833734, 0.7222579]"


In [7]:
len(seg_df)

244

In [8]:
seg_df.sort_values(by=['ID'], inplace = True)

In [9]:
from avgn.visualization.projections_magpie import scatter_spec
from avgn.utils.general import save_fig

### Create labels to differentiate LH and NL segments according to the within-segment variation

In [12]:
LH_df = seg_df.loc[seg_df["comb_labels"]=="LH"].copy(deep=True)

In [13]:
LH_df["simp"] = LH_df["call_lab_simp"]

In [14]:
LH_df.simp.unique()

array(['LH-DS', 'SH-DS-SH-SH-LH', 'SH-DS-SH-LH', 'SH-DS-LH'], dtype=object)

In [15]:
## Create conditions
cond1 = LH_df['simp'] == 'LH-DS'
cond2 = LH_df['simp'] == 'SH-DS-SH-SH-LH'
cond3 = LH_df['simp'] == 'SH-DS-SH-LH'
cond4 = LH_df['simp'] == 'SH-DS-LH'

In [16]:
## Modified 
LH_df.loc[cond1, 'simp'] = 'LH Precede DS'
LH_df.loc[cond2, 'simp'] = 'LH Last Segment'
LH_df.loc[cond3, 'simp'] = 'LH Last Segment'
LH_df.loc[cond4, 'simp'] = 'LH Last Segment'

In [24]:
NL_df = seg_df.loc[seg_df["comb_labels"]=="NL"].copy(deep=True)

In [25]:
NL_df["simp"] = NL_df["call_lab_simp"]

In [26]:
NL_df.simp.unique()

array(['NL-DS', 'NL', 'NL-SH-DS'], dtype=object)

In [27]:
## Create conditions
cond1 = NL_df['simp'] == 'NL-DS'
cond2 = NL_df['simp'] == 'NL'
cond3 = NL_df['simp'] == 'NL-SH-DS'

In [28]:
## Modified 
NL_df.loc[cond1, 'simp'] = 'NL Combined'
NL_df.loc[cond2, 'simp'] = 'NL Lone'
NL_df.loc[cond3, 'simp'] = 'NL Combined'

In [35]:
NL_df.simp.unique()

array(['NL Combined', 'NL Lone'], dtype=object)

Need to create a 'simp' column for SH and DS segs so that the dataframes can be combined

In [29]:
SH_df = seg_df.loc[seg_df["comb_labels"]=="SH"].copy(deep=True)

In [30]:
SH_df["simp"] = SH_df["comb_labels"]

In [31]:
SH_df.simp.unique()

array(['SH'], dtype=object)

In [32]:
DS_df = seg_df.loc[seg_df["comb_labels"]=="DS"].copy(deep=True)

In [33]:
DS_df["simp"] = DS_df["comb_labels"]

In [34]:
DS_df.simp.unique()

array(['DS'], dtype=object)

In [36]:
#attach to dataframe with other VVVM segments
frames = [NL_df, DS_df, SH_df, LH_df]
seg_df2 = pd.concat(frames)
len(seg_df2)

244

### Check Dataframe Before Plotting

In [37]:
seg_df2.simp.unique()

array(['NL Combined', 'NL Lone', 'DS', 'SH', 'LH Precede DS',
       'LH Last Segment'], dtype=object)

In [43]:
seg_df2[:3]

Unnamed: 0,start_time,end_time,labels,ID,start_times,end_times,call_label,call_start,seg_pos_call,call_unique_num,...,location,sex,wav_loc,key,rate,specs,comb_labels,call_lab_simp,umap,simp
0,5.088056,5.156045,NL,1095,5.088056,5.156045,NLDS,5.088056,0,439,...,GUILDFORD,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,RVD VVVM Aerial Predator Call series 1 051120 AM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",NL,NL-DS,"[-0.18258724, 1.9057153]",NL Combined
1,5.168242,5.257137,DS,1096,5.168242,5.257137,NLDS,5.088056,1,439,...,GUILDFORD,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,RVD VVVM Aerial Predator Call series 1 051120 AM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",DS,NL-DS,"[-3.3270218, -1.4800661]",DS
2,5.272444,5.380619,LH,1097,5.272444,5.380619,LHDS,5.272444,0,440,...,GUILDFORD,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,RVD VVVM Aerial Predator Call series 1 051120 AM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",LH,LH-DS,"[3.1635423, 2.847925]",LH Precede DS


In [40]:
def norm(x):
    return (x-np.min(x)) / (np.max(x) - np.min(x))

In [41]:
specs = list(seg_df2.specs.values)
specs = [norm(i) for i in tqdm(specs)]

  0%|          | 0/244 [00:00<?, ?it/s]

In [42]:
import matplotlib.pyplot as plt

In [None]:
nex = -1
color = ['xkcd:very dark blue','teal', 'xkcd:pale teal', 'peachpuff', 'indianred', 'thistle']
scatter_spec(
    np.array(list(seg_df2['umap'].values)),
    specs,
    column_size=10,
    x_range = [-6.25,15.5],
    y_range = [-8.5, 12.25],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': seg_df2.simp.values,
        'alpha':1,
        's': 3,
        'show_legend': True,
        "color_palette": color,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);
plt.show()