# Combine Label Classes & Tidy Dataframe

In this notebook, we combine two segment classes into the larger segment class in which they fit:
- US segment class change to SH segment class 
- HL segment class change to LH segment class

We will also create simplified labels for calls and combis to exclude repeated segments (e.g. SH-LH instead of SH-SH-LH) and calls (e.g. SH-DS LH instead of SH-DS SH-DS LH). 

In [1]:
from tqdm.auto import tqdm
import pandas as pd

In [2]:
import avgn

In [3]:
from avgn.utils.paths import DATA_DIR

In [4]:
DATASET_ID = "git_repos"

In [5]:
DT_ID = '2022-03-04_18-41-29'

In [6]:
seg_df = pd.read_pickle(DATA_DIR / DATASET_ID / DT_ID /  'segment_umap.pickle')
seg_df[:3]

Unnamed: 0,start_time,end_time,labels,ID,start_times,end_times,call_label,call_start,seg_pos_call,call_unique_num,...,indvi,filename,group,location,sex,wav_loc,key,rate,specs,umap
0,0.753604,0.776773,DS,0,0.753604,0.776773,DSSHDS,0.753604,0,0,...,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,BWY MGGY Call Combo 1 290719 PM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[14.15081, 3.406464]"
1,0.786865,0.835165,SH,1,0.786865,0.835165,DSSHDS,0.753604,1,0,...,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,BWY MGGY Call Combo 1 290719 PM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[5.388335, 10.057652]"
2,0.855941,0.92116,DS,2,0.855941,0.92116,DSSHDS,0.753604,2,0,...,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,BWY MGGY Call Combo 1 290719 PM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[12.421062, 0.6991728]"


In [7]:
list(seg_df.labels.unique())

['DS', 'SH', 'LH', 'US', 'HL', 'NL']

In [8]:
len(seg_df.combi_unique_num.unique())

222

## Combine some segment classes
Some classes with lower counts (<50) appear to  belong to other larger segment classes. As such, combine:
- US segments with SH segments
- HL segments with LH segments

In [9]:
## create a new column of segment labels data so that I can modify 
seg_df['comb_labels'] = seg_df['labels']

In [10]:
## Create conditions for US and HL segments (these will be modified in the next step)
cond1 = seg_df['comb_labels'] == 'US'
cond2 = seg_df['comb_labels'] == 'HL'

In [11]:
## Modified based on results of segment UMAP analysis
seg_df.loc[cond1, 'comb_labels'] = 'SH'
seg_df.loc[cond2, 'comb_labels'] = 'LH'

## Combine & Tidy Call Labels

In [12]:
len(seg_df.call_label.unique())

39

In [13]:
## create a new column of copied call label data so that I can modify
seg_df['call_lab_simp'] = seg_df['call_label']

In [14]:
## Create conditions for all labels containing repeated segments (these will be simplified in the next step) 
## and/or HL/US segments (these will be modified in the next step)
cond1 = seg_df['call_lab_simp'] == 'DSSHDS'
cond2 = seg_df['call_lab_simp'] == 'SHSHLH'
cond3 = seg_df['call_lab_simp'] == 'USHL'
cond4 = seg_df['call_lab_simp'] == 'SHLH'
cond5 = seg_df['call_lab_simp'] == 'NLSHDS'
cond6 = seg_df['call_lab_simp'] == 'NLDS'
cond7 = seg_df['call_lab_simp'] == 'SHDS'
cond8 = seg_df['call_lab_simp'] == 'SHSHDS'
cond9 = seg_df['call_lab_simp'] == 'DSSHSHDS'
cond10 = seg_df['call_lab_simp'] == 'SHDSSHDS'
cond11 = seg_df['call_lab_simp'] == 'SHSHDSSHDS'
cond12 = seg_df['call_lab_simp'] == 'SHSHSHLH'
cond13 = seg_df['call_lab_simp'] == 'LHDS'
cond14 = seg_df['call_lab_simp'] == 'DSDS'
cond15 = seg_df['call_lab_simp'] == 'SHSHDSSHLH'
cond16 = seg_df['call_lab_simp'] == 'SHDSLH'
cond17 = seg_df['call_lab_simp'] == 'SHSHDSLH'
cond18 = seg_df['call_lab_simp'] == 'USLHDS'
cond19 = seg_df['call_lab_simp'] == 'SHSHSHSHDS'
cond20 = seg_df['call_lab_simp'] == 'SHSHSHDSSHSHLH'
cond21 = seg_df['call_lab_simp'] == 'USDSSHDS'
cond22 = seg_df['call_lab_simp'] == 'USSHDS'
cond23 = seg_df['call_lab_simp'] == 'SHDSSHSHLH'
cond24 = seg_df['call_lab_simp'] == 'SHDSSHLH'
cond25 = seg_df['call_lab_simp'] == 'SHSHNLDS'
cond26 = seg_df['call_lab_simp'] == 'SHNLDS'
cond27 = seg_df['call_lab_simp'] == 'SHSHSHDS'
cond28 = seg_df['call_lab_simp'] == 'DSSHDSSHSHSHLH'
cond29 = seg_df['call_lab_simp'] == 'SHDSSH'
cond30 = seg_df['call_lab_simp'] == 'SHDSSHSHSHLH'
cond31 = seg_df['call_lab_simp'] == 'NLDSSHDS'
cond32 = seg_df['call_lab_simp'] == 'SHSHSHSHLH'
cond33 = seg_df['call_lab_simp'] == 'SHHL'
cond34 = seg_df['call_lab_simp'] == 'SHDSSHSHSHSHLH'
cond35 = seg_df['call_lab_simp'] == 'HL'

In [15]:
### Simplify/Modify
seg_df.loc[cond1, 'call_lab_simp'] = 'DS-SH-DS'
seg_df.loc[cond2, 'call_lab_simp'] = 'SH-LH'
seg_df.loc[cond3, 'call_lab_simp'] = 'SH-LH'
seg_df.loc[cond4, 'call_lab_simp'] = 'SH-LH'
seg_df.loc[cond5, 'call_lab_simp'] = 'NL-SH-DS'
seg_df.loc[cond6, 'call_lab_simp'] = 'NL-DS'
seg_df.loc[cond7, 'call_lab_simp'] = 'SH-DS'
seg_df.loc[cond8, 'call_lab_simp'] = 'SH-DS'
seg_df.loc[cond9, 'call_lab_simp'] = 'DS-SH-DS'
seg_df.loc[cond10, 'call_lab_simp'] = 'SH-DS-SH-DS'
seg_df.loc[cond11, 'call_lab_simp'] = 'SH-DS-SH-DS'
seg_df.loc[cond12, 'call_lab_simp'] = 'SH-LH'
seg_df.loc[cond13, 'call_lab_simp'] = 'LH-DS'
seg_df.loc[cond14, 'call_lab_simp'] = 'DS'
seg_df.loc[cond15, 'call_lab_simp'] = 'SH-DS-SH-LH'
seg_df.loc[cond16, 'call_lab_simp'] = 'SH-DS-LH'
seg_df.loc[cond17, 'call_lab_simp'] = 'SH-DS-LH'
seg_df.loc[cond18, 'call_lab_simp'] = 'SH-LH-DS'
seg_df.loc[cond19, 'call_lab_simp'] = 'SH-DS'
seg_df.loc[cond20, 'call_lab_simp'] = 'SH-DS-SH-LH'
seg_df.loc[cond21, 'call_lab_simp'] = 'SH-DS-SH-DS'
seg_df.loc[cond22, 'call_lab_simp'] = 'SH-DS'
seg_df.loc[cond23, 'call_lab_simp'] = 'SH-DS-SH-LH'
seg_df.loc[cond24, 'call_lab_simp'] = 'SH-DS-SH-LH'
seg_df.loc[cond25, 'call_lab_simp'] = 'SH-NL-DS'
seg_df.loc[cond26, 'call_lab_simp'] = 'SH-NL-DS'
seg_df.loc[cond27, 'call_lab_simp'] = 'SH-DS'
seg_df.loc[cond28, 'call_lab_simp'] = 'DS-SH-DS-SH-LH'
seg_df.loc[cond29, 'call_lab_simp'] = 'SH-DS-SH'
seg_df.loc[cond30, 'call_lab_simp'] = 'SH-DS-SH-LH'
seg_df.loc[cond31, 'call_lab_simp'] = 'NL-DS-SH-DS'
seg_df.loc[cond32, 'call_lab_simp'] = 'SH-LH'
seg_df.loc[cond33, 'call_lab_simp'] = 'SH-LH'
seg_df.loc[cond34, 'call_lab_simp'] = 'SH-DS-SH-LH'
seg_df.loc[cond35, 'call_lab_simp'] = 'LH'

In [16]:
list(seg_df.call_lab_simp.unique())

['DS-SH-DS',
 'SH-LH',
 'DS',
 'LH',
 'NL-SH-DS',
 'NL-DS',
 'SH-DS-SH-DS',
 'SH-DS',
 'NL-DS-SH-DS',
 'LH-DS',
 'SH-DS-SH-LH',
 'SH-DS-LH',
 'SH-LH-DS',
 'NL',
 'SH-NL-DS',
 'DS-SH-DS-SH-LH',
 'SH-DS-SH',
 'SH']

## Simplify Combi Labels

In [17]:
## What are the unique combi labels?
list(seg_df.combi_label.unique())

['DSSHDS SHSHLH',
 'DS USHL',
 'DSSHDS LH',
 'DSSHDS SHLH',
 'DSSHDS NLSHDS',
 'NLDS DSSHDS SHLH',
 'SHDSSHDS SHSHLH',
 'DSSHDS NLSHDS SHDS LH',
 'NLDSSHDS NLSHDS SHDS LH',
 'NLDS SHDS NLSHDS SHSHDS SHLH',
 'NLDS SHDS',
 'DSSHDS NLSHDS SHSHDS SHSHLH',
 'DSSHSHDS SHSHLH',
 'NLDS SHSHDS SHSHLH',
 'SHDSSHDS NLSHDS SHSHDS SHSHLH',
 'SHDS SHDS SHSHLH',
 'SHSHDSSHDS NLSHDS SHDS SHSHLH',
 'DSSHDS NLSHDS SHDS SHSHLH',
 'SHDSSHDS NLSHDS SHDS SHSHLH',
 'SHDS NLDS SHDS SHSHLH',
 'DSSHDS SHSHSHLH',
 'NLDS LHDS',
 'SHSHDS SHHL',
 'SHSHDS SHHL NLSHDS SHDSSHSHSHSHLH',
 'SHDS SHHL',
 'NLDS DS',
 'DS LHDS',
 'NLDS DS LHDS',
 'NLDS DSDS SHSHDSSHLH',
 'NLDS DSDS SHDSLH',
 'NLSHDS DSDS SHSHDSLH',
 'NLDS DSDS SHSHDSLH',
 'DS USLHDS',
 'SHDS SHDSLH',
 'NLDS DS LH',
 'NLDS SHDS SHSHLH',
 'SHDS SHSHLH',
 'NLDS SHDS SHDS SHSHLH',
 'NLDS SHLH',
 'DS NLDS',
 'NLDS HL',
 'NLDS SHSHLH',
 'NLDS SHSHSHLH',
 'NLSHDS SHSHSHLH',
 'NLSHDS SHSHLH',
 'SHDS SHSHSHSHDS SHSHLH',
 'NLDS SHSHSHDSSHSHLH',
 'NLSHDS DSSHDS SHSHLH

In [18]:
## create a new column of copied combi label data so that I can modify
seg_df['combi_lab_simp'] = seg_df['combi_label']

In [19]:
## Create conditions for all labels containing repeated segments/calls (these will be modified in the next step)
cond1 = seg_df['combi_lab_simp'] == 'DSSHDS SHSHLH'
cond2 = seg_df['combi_lab_simp'] == 'DS USHL'
cond3 = seg_df['combi_lab_simp'] == 'DSSHDS LH'
cond4 = seg_df['combi_lab_simp'] == 'DSSHDS SHLH'
cond5 = seg_df['combi_lab_simp'] == 'DSSHDS NLSHDS'
cond6 = seg_df['combi_lab_simp'] == 'NLDS SHDS'
cond7 = seg_df['combi_lab_simp'] == 'DSSHDS NLSHDS SHSHDS SHSHLH'
cond8 = seg_df['combi_lab_simp'] == 'DSSHSHDS SHSHLH'
cond9 = seg_df['combi_lab_simp'] == 'NLDS SHSHDS SHSHLH'
cond10 = seg_df['combi_lab_simp'] == 'SHDSSHDS NLSHDS SHSHDS SHSHLH'
cond11 = seg_df['combi_lab_simp'] == 'SHDS SHDS SHSHLH'
cond12 = seg_df['combi_lab_simp'] == 'SHSHDSSHDS NLSHDS SHDS SHSHLH'
cond13 = seg_df['combi_lab_simp'] == 'DSSHDS NLSHDS SHDS SHSHLH'
cond14 = seg_df['combi_lab_simp'] == 'SHDSSHDS NLSHDS SHDS SHSHLH'
cond15 = seg_df['combi_lab_simp'] == 'SHDS NLDS SHDS SHSHLH'
cond16 = seg_df['combi_lab_simp'] == 'DSSHDS SHSHSHLH'
cond17 = seg_df['combi_lab_simp'] == 'NLDS LHDS'
cond18 = seg_df['combi_lab_simp'] == 'NLDS DS LHDS'
cond19 = seg_df['combi_lab_simp'] == 'NLDS DS'
cond20 = seg_df['combi_lab_simp'] == 'DS LHDS'
cond21 = seg_df['combi_lab_simp'] == 'NLDS DSDS SHSHDSSHLH'
cond22 = seg_df['combi_lab_simp'] == 'NLDS DSDS SHDSLH'
cond23 = seg_df['combi_lab_simp'] == 'NLSHDS DSDS SHSHDSLH'
cond24 = seg_df['combi_lab_simp'] == 'NLDS DSDS SHSHDSLH'
cond25 = seg_df['combi_lab_simp'] == 'DS USLHDS'
cond26 = seg_df['combi_lab_simp'] == 'SHDS SHDSLH'
cond27 = seg_df['combi_lab_simp'] == 'NLDS DS LH'
cond28 = seg_df['combi_lab_simp'] == 'NLDS SHDS SHSHLH'
cond29 = seg_df['combi_lab_simp'] == 'SHDS SHSHLH'
cond30 = seg_df['combi_lab_simp'] == 'NLDS SHDS SHDS SHSHLH'

In [20]:
cond31 = seg_df['combi_lab_simp'] == 'NLDS SHLH'
cond32 = seg_df['combi_lab_simp'] == 'DS NLDS'
cond33 = seg_df['combi_lab_simp'] == 'NLDS HL'
cond34 = seg_df['combi_lab_simp'] == 'NLDS SHSHLH'
cond35 = seg_df['combi_lab_simp'] == 'NLDS SHSHSHLH'
cond36 = seg_df['combi_lab_simp'] == 'NLSHDS SHSHSHLH'
cond37 = seg_df['combi_lab_simp'] == 'NLSHDS SHSHLH'
cond38 = seg_df['combi_lab_simp'] == 'SHDS SHSHSHSHDS SHSHLH'
cond39 = seg_df['combi_lab_simp'] == 'NLDS SHSHSHDSSHSHLH'
cond40 = seg_df['combi_lab_simp'] == 'NLSHDS DSSHDS SHSHLH'
cond41 = seg_df['combi_lab_simp'] == 'NLSHDS SHDSSHDS SHLH'
cond42 = seg_df['combi_lab_simp'] == 'NLDS NLDS NLDS SHSHDS'
cond43 = seg_df['combi_lab_simp'] == 'NLDS NLDS NLSHDS SHDS SHSHLH'
cond44 = seg_df['combi_lab_simp'] == 'NLDS NLSHDS SHDS SHSHLH'
cond45 = seg_df['combi_lab_simp'] == 'NLDS USDSSHDS SHSHLH'
cond46 = seg_df['combi_lab_simp'] == 'NLDS SHDSSHDS'
cond47 = seg_df['combi_lab_simp'] == 'NLDS NLDS DSSHDS'
cond48 = seg_df['combi_lab_simp'] == 'SHDS LHDS'
cond49 = seg_df['combi_lab_simp'] == 'SHDS USLHDS'
cond50 = seg_df['combi_lab_simp'] == 'USSHDS USLHDS'
cond51 = seg_df['combi_lab_simp'] == 'SHDS LHDS LHDS'
cond52 = seg_df['combi_lab_simp'] == 'SHDS SHDSSHSHLH'
cond53 = seg_df['combi_lab_simp'] == 'SHDS SHDS SHLH'
cond54 = seg_df['combi_lab_simp'] == 'SHDS SHDSSHLH'
cond55 = seg_df['combi_lab_simp'] == 'SHDS SHDS LH'
cond56 = seg_df['combi_lab_simp'] == 'NL NLSHDS SHDSLH'
cond57 = seg_df['combi_lab_simp'] == 'NLDS SHSHNLDS DSSHDS SHSHSHLH'
cond58 = seg_df['combi_lab_simp'] == 'SHNLDS DSSHDS SHLH'
cond59 = seg_df['combi_lab_simp'] == 'NLDS SHDS SHSHSHLH'
cond60 = seg_df['combi_lab_simp'] == 'SHSHNLDS DSSHDS SHSHSHLH'

In [21]:
cond61 = seg_df['combi_lab_simp'] == 'NLSHDS SHDS SHSHSHLH'
cond62 = seg_df['combi_lab_simp'] == 'NLDS SHDS LH'
cond63 = seg_df['combi_lab_simp'] == 'SHSHSHDS DSSHDSSHLH'
cond64 = seg_df['combi_lab_simp'] == 'SHSHSHSHDS DSSHDSSHSHSHLH'
cond65 = seg_df['combi_lab_simp'] == 'SHDSSH LHDS'
cond66 = seg_df['combi_lab_simp'] == 'NLDS SHDSLH'
cond67 = seg_df['combi_lab_simp'] == 'NL NLDS'
cond68 = seg_df['combi_lab_simp'] == 'NLDS LH'
cond69 = seg_df['combi_lab_simp'] == 'NLDS SHDSSHSHSHLH'
cond70 = seg_df['combi_lab_simp'] == 'NLDS DSSHDS SHLH' 
cond71 = seg_df['combi_lab_simp'] == 'SHDSSHDS SHSHLH'
cond72 = seg_df['combi_lab_simp'] == 'DSSHDS NLSHDS SHDS LH'
cond73 = seg_df['combi_lab_simp'] == 'NLDSSHDS NLSHDS SHDS LH'
cond74 = seg_df['combi_lab_simp'] == 'NLDS SHDS NLSHDS SHSHDS SHLH'
cond75 = seg_df['combi_lab_simp'] == 'NLDS NL'
cond76 = seg_df['combi_lab_simp'] == 'NLDS SHDSSHSHLH'
cond77 = seg_df['combi_lab_simp'] == 'NLDS SHDSSHLH'
cond78 = seg_df['combi_lab_simp'] == 'NLSHDS SHDS SHSHSHSHLH'
cond79 = seg_df['combi_lab_simp'] == 'NLSHDS SHDS SHSHLH'
cond80 = seg_df['combi_lab_simp'] == 'SHDS SHSHSHLH'
cond81 = seg_df['combi_lab_simp'] == 'NL NLDS SHDS LH'
cond82 = seg_df['combi_lab_simp'] == 'NLDS SHDS SH'
cond83 = seg_df['combi_lab_simp'] == 'NLDS SHDS SHLH'
cond84 = seg_df['combi_lab_simp'] == 'SHDS LHDS NLSHDS DS'
cond85 = seg_df['combi_lab_simp'] == 'NL SHDS'
cond86 = seg_df['combi_lab_simp'] == 'NLDS DS DS'
cond87 = seg_df['combi_lab_simp'] == 'NL SHDS LHDS'
cond88 = seg_df['combi_lab_simp'] == 'NLSHDS SHDSSHSHLH'
cond89 = seg_df['combi_lab_simp'] == 'NL SHDS SHDSSHLH'
cond90 = seg_df['combi_lab_simp'] == 'NLSHDS SHDSSHLH'
cond91 = seg_df['combi_lab_simp'] == 'NL NLDS NLDSSHDS LH'
cond92 = seg_df['combi_lab_simp'] == 'NLDS NLDS NLDS LHDS SHDS'
cond93 = seg_df['combi_lab_simp'] == 'NLDS NLDS NLDSSHDS SHSHSHLH'
cond94 = seg_df['combi_lab_simp'] == 'NLDS LHDS SHSHSHSHDS'
cond95 = seg_df['combi_lab_simp'] == 'NLDS NLDS SHDSSHDS SHLH'
cond96 = seg_df['combi_lab_simp'] == 'SHSHDS SHHL'
cond97 = seg_df['combi_lab_simp'] == 'SHSHDS SHHL NLSHDS SHDSSHSHSHSHLH'
cond98 = seg_df['combi_lab_simp'] == 'SHDS SHHL'

In [22]:
### Modify
seg_df.loc[cond1, 'combi_lab_simp'] = 'DS-SH-DS SH-LH'
seg_df.loc[cond2, 'combi_lab_simp'] = 'DS SH-LH'
seg_df.loc[cond3, 'combi_lab_simp'] = 'DS-SH-DS LH'
seg_df.loc[cond4, 'combi_lab_simp'] = 'DS-SH-DS SH-LH'
seg_df.loc[cond5, 'combi_lab_simp'] = 'DS-SH-DS NL-SH-DS'
seg_df.loc[cond6, 'combi_lab_simp'] = 'NL-DS SH-DS'
seg_df.loc[cond7, 'combi_lab_simp'] = 'DS-SH-DS NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond8, 'combi_lab_simp'] = 'DS-SH-DS SH-LH'
seg_df.loc[cond9, 'combi_lab_simp'] = 'NL-DS SH-DS SH-LH'
seg_df.loc[cond10, 'combi_lab_simp'] = 'SH-DS-SH-DS NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond11, 'combi_lab_simp'] = 'SH-DS SH-LH'
seg_df.loc[cond12, 'combi_lab_simp'] = 'SH-DS-SH-DS NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond13, 'combi_lab_simp'] = 'DS-SH-DS NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond14, 'combi_lab_simp'] = 'SH-DS-SH-DS NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond15, 'combi_lab_simp'] = 'SH-DS NL-DS SH-DS SH-LH'
seg_df.loc[cond16, 'combi_lab_simp'] = 'DS-SH-DS SH-LH'
seg_df.loc[cond17, 'combi_lab_simp'] = 'NL-DS LH-DS'
seg_df.loc[cond18, 'combi_lab_simp'] = 'NL-DS DS LH-DS'
seg_df.loc[cond19, 'combi_lab_simp'] = 'NL-DS DS'
seg_df.loc[cond20, 'combi_lab_simp'] = 'DS LH-DS'
seg_df.loc[cond21, 'combi_lab_simp'] = 'NL-DS DS SH-DS-SH-LH'
seg_df.loc[cond22, 'combi_lab_simp'] = 'NL-DS DS SH-DS-LH'
seg_df.loc[cond23, 'combi_lab_simp'] = 'NL-SH-DS DS SH-DS-LH'
seg_df.loc[cond24, 'combi_lab_simp'] = 'NL-DS DS SH-DS-LH'
seg_df.loc[cond25, 'combi_lab_simp'] = 'DS SH-LH-DS'
seg_df.loc[cond26, 'combi_lab_simp'] = 'SH-DS SH-DS-LH'
seg_df.loc[cond27, 'combi_lab_simp'] = 'NL-DS DS LH'
seg_df.loc[cond28, 'combi_lab_simp'] = 'NL-DS SH-DS SH-LH'
seg_df.loc[cond29, 'combi_lab_simp'] = 'SH-DS SH-LH'
seg_df.loc[cond30, 'combi_lab_simp'] = 'NL-DS SH-DS SH-LH'

In [23]:
seg_df.loc[cond31, 'combi_lab_simp'] = 'NL-DS SH-LH'
seg_df.loc[cond32, 'combi_lab_simp'] = 'DS NL-DS'
seg_df.loc[cond33, 'combi_lab_simp'] = 'NL-DS LH'
seg_df.loc[cond34, 'combi_lab_simp'] = 'NL-DS SH-LH'
seg_df.loc[cond35, 'combi_lab_simp'] = 'NL-DS SH-LH'
seg_df.loc[cond36, 'combi_lab_simp'] = 'NL-SH-DS SH-LH'
seg_df.loc[cond37, 'combi_lab_simp'] = 'NL-SH-DS SH-LH'
seg_df.loc[cond38, 'combi_lab_simp'] = 'SH-DS SH-DS SH-LH'
seg_df.loc[cond39, 'combi_lab_simp'] = 'NL-DS SH-DS-SH-LH'
seg_df.loc[cond40, 'combi_lab_simp'] = 'NL-SH-DS DS-SH-DS SH-LH'
seg_df.loc[cond41, 'combi_lab_simp'] = 'NL-SH-DS SH-DS-SH-DS SH-LH'
seg_df.loc[cond42, 'combi_lab_simp'] = 'NL-DS SH-DS'
seg_df.loc[cond43, 'combi_lab_simp'] = 'NL-DS NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond44, 'combi_lab_simp'] = 'NL-DS NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond45, 'combi_lab_simp'] = 'NL-DS SH-DS-SH-DS SH-LH'
seg_df.loc[cond46, 'combi_lab_simp'] = 'NL-DS SH-DS-SH-DS'
seg_df.loc[cond47, 'combi_lab_simp'] = 'NL-DS DS-SH-DS'
seg_df.loc[cond48, 'combi_lab_simp'] = 'SH-DS LH-DS'
seg_df.loc[cond49, 'combi_lab_simp'] = 'SH-DS SH-LH-DS'
seg_df.loc[cond50, 'combi_lab_simp'] = 'SH-DS SH-LH-DS'
seg_df.loc[cond51, 'combi_lab_simp'] = 'SH-DS LH-DS'
seg_df.loc[cond52, 'combi_lab_simp'] = 'SH-DS SH-DS-SH-LH'
seg_df.loc[cond53, 'combi_lab_simp'] =  'SH-DS SH-LH'
seg_df.loc[cond54, 'combi_lab_simp'] = 'SH-DS SH-DS-SH-LH'
seg_df.loc[cond55, 'combi_lab_simp'] = 'SH-DS LH'
seg_df.loc[cond56, 'combi_lab_simp'] = 'NL NL-SH-DS SH-DS-LH'
seg_df.loc[cond57, 'combi_lab_simp'] = 'NL-DS SH-NL-DS DS-SH-DS SH-LH'
seg_df.loc[cond58, 'combi_lab_simp'] = 'SH-NL-DS DS-SH-DS SH-LH'
seg_df.loc[cond59, 'combi_lab_simp'] = 'NL-DS SH-DS SH-LH'
seg_df.loc[cond60, 'combi_lab_simp'] = 'SH-NL-DS DS-SH-DS SH-LH'

In [24]:
seg_df.loc[cond61, 'combi_lab_simp'] = 'NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond62, 'combi_lab_simp'] = 'NL-DS SH-DS LH'
seg_df.loc[cond63, 'combi_lab_simp'] = 'SH-DS DS-SH-DS-SH-LH'
seg_df.loc[cond64, 'combi_lab_simp'] = 'SH-DS DS-SH-DS-SH-LH'
seg_df.loc[cond65, 'combi_lab_simp'] = 'SH-DS-SH LH-DS'
seg_df.loc[cond66, 'combi_lab_simp'] = 'NL-DS SH-DS-LH'
seg_df.loc[cond67, 'combi_lab_simp'] = 'NL NL-DS'
seg_df.loc[cond68, 'combi_lab_simp'] = 'NL-DS LH'
seg_df.loc[cond69, 'combi_lab_simp'] = 'NL-DS SH-DS-SH-LH'
seg_df.loc[cond70, 'combi_lab_simp'] = 'NL-DS DS-SH-DS SH-LH'
seg_df.loc[cond71, 'combi_lab_simp'] = 'SH-DS-SH-DS SH-LH'
seg_df.loc[cond72, 'combi_lab_simp'] = 'DS-SH-DS NL-SH-DS SH-DS LH'
seg_df.loc[cond73, 'combi_lab_simp'] = 'NL-DS-SH-DS NL-SH-DS SH-DS LH'
seg_df.loc[cond74, 'combi_lab_simp'] = 'NL-DS SH-DS NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond75, 'combi_lab_simp'] = 'NL-DS NL'
seg_df.loc[cond76, 'combi_lab_simp'] = 'NL-DS SH-DS-SH-LH'
seg_df.loc[cond77, 'combi_lab_simp'] = 'NL-DS SH-DS-SH-LH'
seg_df.loc[cond78, 'combi_lab_simp'] = 'NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond79, 'combi_lab_simp'] = 'NL-SH-DS SH-DS SH-LH'
seg_df.loc[cond80, 'combi_lab_simp'] = 'SH-DS SH-LH'
seg_df.loc[cond81, 'combi_lab_simp'] = 'NL NL-DS SH-DS LH'
seg_df.loc[cond82, 'combi_lab_simp'] = 'NL-DS SH-DS SH'
seg_df.loc[cond83, 'combi_lab_simp'] = 'NL-DS SH-DS SH-LH'
seg_df.loc[cond84, 'combi_lab_simp'] = 'SH-DS LH-DS NL-SH-DS DS'
seg_df.loc[cond85, 'combi_lab_simp'] = 'NL SH-DS'
seg_df.loc[cond86, 'combi_lab_simp'] = 'NL-DS DS'
seg_df.loc[cond87, 'combi_lab_simp'] = 'NL SH-DS LH-DS'
seg_df.loc[cond88, 'combi_lab_simp'] = 'NL-SH-DS SH-DS-SH-LH'
seg_df.loc[cond89, 'combi_lab_simp'] = 'NL SH-DS SH-DS-SH-LH'
seg_df.loc[cond90, 'combi_lab_simp'] = 'NL-SH-DS SH-DS-SH-LH'
seg_df.loc[cond91, 'combi_lab_simp'] = 'NL NL-DS NL-DS-SH-DS LH'
seg_df.loc[cond92, 'combi_lab_simp'] = 'NL-DS LH-DS SH-DS'
seg_df.loc[cond93, 'combi_lab_simp'] = 'NL-DS NL-DS-SH-DS SH-LH'
seg_df.loc[cond94, 'combi_lab_simp'] = 'NL-DS LH-DS SH-DS'
seg_df.loc[cond95, 'combi_lab_simp'] = 'NL-DS SH-DS-SH-DS SH-LH'
seg_df.loc[cond96, 'combi_lab_simp'] = 'SH-DS SH-LH'
seg_df.loc[cond97, 'combi_lab_simp'] = 'SH-DS SH-LH NL-SH-DS SH-DS-SH-LH'
seg_df.loc[cond98, 'combi_lab_simp'] = 'SH-DS SH-LH'

In [25]:
## What are the unique simplified combi labels? - check
list(seg_df.combi_lab_simp.unique())

['DS-SH-DS SH-LH',
 'DS SH-LH',
 'DS-SH-DS LH',
 'DS-SH-DS NL-SH-DS',
 'NL-DS DS-SH-DS SH-LH',
 'SH-DS-SH-DS SH-LH',
 'DS-SH-DS NL-SH-DS SH-DS LH',
 'NL-DS-SH-DS NL-SH-DS SH-DS LH',
 'NL-DS SH-DS NL-SH-DS SH-DS SH-LH',
 'NL-DS SH-DS',
 'DS-SH-DS NL-SH-DS SH-DS SH-LH',
 'NL-DS SH-DS SH-LH',
 'SH-DS-SH-DS NL-SH-DS SH-DS SH-LH',
 'SH-DS SH-LH',
 'SH-DS NL-DS SH-DS SH-LH',
 'NL-DS LH-DS',
 'SH-DS SH-LH NL-SH-DS SH-DS-SH-LH',
 'NL-DS DS',
 'DS LH-DS',
 'NL-DS DS LH-DS',
 'NL-DS DS SH-DS-SH-LH',
 'NL-DS DS SH-DS-LH',
 'NL-SH-DS DS SH-DS-LH',
 'DS SH-LH-DS',
 'SH-DS SH-DS-LH',
 'NL-DS DS LH',
 'NL-DS SH-LH',
 'DS NL-DS',
 'NL-DS LH',
 'NL-SH-DS SH-LH',
 'SH-DS SH-DS SH-LH',
 'NL-DS SH-DS-SH-LH',
 'NL-SH-DS DS-SH-DS SH-LH',
 'NL-SH-DS SH-DS-SH-DS SH-LH',
 'NL-DS NL-SH-DS SH-DS SH-LH',
 'NL-DS SH-DS-SH-DS SH-LH',
 'NL-DS SH-DS-SH-DS',
 'NL-DS DS-SH-DS',
 'SH-DS LH-DS',
 'SH-DS SH-LH-DS',
 'NL-DS NL',
 'SH-DS SH-DS-SH-LH',
 'SH-DS LH',
 'NL NL-SH-DS SH-DS-LH',
 'NL-DS SH-NL-DS DS-SH-DS SH-LH',
 

In [27]:
len(seg_df.combi_lab_simp.unique())

62

## Check Data

In [28]:
seg_df.iloc[:3, 0:15]

Unnamed: 0,start_time,end_time,labels,ID,start_times,end_times,call_label,call_start,seg_pos_call,call_unique_num,call_pos_combi,combi_label,combi_start,seg_pos_combi,combi_unique_num
0,0.753604,0.776773,DS,0,0.753604,0.776773,DSSHDS,0.753604,0,0,0,DSSHDS SHSHLH,0.753604,0,0
1,0.786865,0.835165,SH,1,0.786865,0.835165,DSSHDS,0.753604,1,0,0,DSSHDS SHSHLH,0.753604,1,0
2,0.855941,0.92116,DS,2,0.855941,0.92116,DSSHDS,0.753604,2,0,0,DSSHDS SHSHLH,0.753604,2,0


In [29]:
seg_df.iloc[:3, 16:]

Unnamed: 0,indvi,filename,group,location,sex,wav_loc,key,rate,specs,umap,comb_labels,call_lab_simp,combi_lab_simp
0,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,BWY MGGY Call Combo 1 290719 PM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[14.15081, 3.406464]",DS,DS-SH-DS,DS-SH-DS SH-LH
1,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,BWY MGGY Call Combo 1 290719 PM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[5.388335, 10.057652]",SH,DS-SH-DS,DS-SH-DS SH-LH
2,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,BWY MGGY Call Combo 1 290719 PM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[12.421062, 0.6991728]",DS,DS-SH-DS,DS-SH-DS SH-LH


In [31]:
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir

In [32]:
#save df
save_loc = DATA_DIR / DATASET_ID / DT_ID /  'segment_df_umap_combinedtidied.pickle'
ensure_dir(save_loc.as_posix())
seg_df.to_pickle(save_loc)