### Load dataframe

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm.auto import tqdm
from joblib import Parallel, delayed
import pandas as pd

In [3]:
import avgn

In [4]:
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir
from avgn.visualization.spectrogram import draw_spec_set
from avgn.visualization.projections import scatter_projections, scatter_spec
from avgn.utils.general import save_fig

In [5]:
from scipy.stats import kruskal

In [6]:
from sklearn.metrics import silhouette_score, silhouette_samples

In [7]:
DATASET_ID = "git_repos"

In [8]:
DT_ID = '2022-03-04_18-41-29'

## LH

In [15]:
LH_df = pd.read_pickle(DATA_DIR / DATASET_ID / 'LH' /  'LH.pickle')
LH_df[:30]

Unnamed: 0,start_time,end_time,ID,labels,indv,indvi,filename,group,location,sex,...,simp,combi_lab_simp,call_lab_simp,call_label,seg_pos_call,call_unique_num,combi_label,seg_pos_combi,call_pos_combi,combi_unique_num
5,0.980101,1.36713,5,LH,MGGY,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,...,LH_LoneOrLast,DS-SH-DS SH-LH,SH-LH,SHSHLH,2,1,DSSHDS SHSHLH,5,1,0
8,1.48208,1.984961,8,HL,MGGY,0,BWY MGGY Call Combo 1 300719 AM,BWYa,CRAWLEY,F,...,LH_LoneOrLast,DS SH-LH,SH-LH,USC,1,3,DS USC,2,1,1
12,1.471596,1.897371,12,LH,MGGY,0,BWY MGGY Discrete 1 300719 AM,BWYa,CRAWLEY,F,...,LH_LoneOrLast,DS-SH-DS LH,LH,LH,0,5,DSSHDS LH,3,1,2
17,0.496964,0.927264,17,LH,MGGY,0,BWY MGGY Discrete 4 210519 PM,BWYa,CRAWLEY,F,...,LH_LoneOrLast,DS-SH-DS SH-LH,SH-LH,SHLH,1,7,DSSHDS SHLH,4,1,3
22,0.36733,0.503686,22,LH,MGGY,0,BWY MGGY Discrete 6 210519 PM,BWYa,CRAWLEY,F,...,LH_LoneOrLast,DS-SH-DS SH-LH,SH-LH,SHLH,1,9,DSSHDS SHLH,4,1,4
35,0.928606,1.067231,35,LH,MGGY,0,MGGY Alarm Alert series 1 100717,BWYa,CRAWLEY,F,...,LH_LoneOrLast,NL-DS DS-SH-DS SH-LH,SH-LH,SHLH,1,14,NLDS DSSHDS SHLH,6,2,6
40,0.529041,0.889874,40,LH,MGGY,0,MGGY Alert 1 070817,BWYa,CRAWLEY,F,...,LH_LoneOrLast,SH-DS SH-LH,SH-LH,SHLH,1,16,SHSHDS SHLH,4,1,7
47,0.34681,0.698186,47,LH,MGGY,0,MGGY Alert 1 250617,BWYa,CRAWLEY,F,...,LH_LoneOrLast,SH-DS-SH-DS SH-LH,SH-LH,SHSHLH,2,18,SHDSSHDS SHSHLH,6,1,8
56,0.798952,1.19859,56,LH,MGGY,0,MGGY Alert 2 070817,BWYa,CRAWLEY,F,...,LH_LoneOrLast,DS-SH-DS NL-SH-DS SH-DS LH,LH,LH,0,22,DSSHDS NLSHDS SHDS LH,8,3,9
66,1.144113,1.515209,66,LH,MGGY,0,MGGY Alert 3 070817,BWYa,CRAWLEY,F,...,LH_LoneOrLast,NL-DS-SH-DS NL-SH-DS SH-DS LH,LH,LH,0,26,NLDSSHDS NLSHDS SHDS LH,9,3,10


In [10]:
len(LH_df)

203

### Get specs

In [11]:
def norm(x):
    return (x-np.min(x)) / (np.max(x) - np.min(x))

In [13]:
specsLH = list(LH_df.specs.values)
specsLH = [norm(i) for i in tqdm(specsLH)]

  0%|          | 0/203 [00:00<?, ?it/s]

### Project UMAP

In [16]:
## Create conditions for some labels 
cond1 = LH_df['simp'] == 'LH_LoneOrLast'

In [17]:
### Simplify
LH_df.loc[cond1, 'simp'] = 'LH Last/Lone Segment'

In [None]:
nex = -1
colors = ["maroon", "lightcoral"]
scatter_spec(
    np.array(list(LH_df['umap'].values)),
    specsLH,
    column_size=10,
    x_range = [6,14.5],
    y_range = [-0.5,8],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': LH_df.simp.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

In [None]:
colors= ["rebeccapurple", "thistle"]
scatter_spec(
    np.array(list(LH_df['umap'].values)),
    specsLH,
    column_size=12,
    x_range = [6,14.5],
    y_range = [-0.5,8],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': LH_df.location.values,
        'alpha':1,
        's': 15,
        'show_legend': True,
        "color_palette": colors,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,

);

## DS

In [31]:
DS_df = pd.read_pickle(DATA_DIR / DATASET_ID / 'DS' /  'DS_segment.pickle')
DS_df[:3]

Unnamed: 0,start_time,end_time,labels,ID,start_times,end_times,call_label,call_start,seg_pos_call,call_unique_num,...,location,sex,wav_loc,key,rate,specs,umap,comb_labels,call_lab_simp,combi_lab_simp
0,0.753604,0.776773,DS,0,0.753604,0.776773,DSSHDS,0.753604,0,0,...,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,BWY MGGY Call Combo 1 290719 PM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[8.20534, 5.90875]",DS,DS-SH-DS,DS-SH-DS SH-LH
2,0.855941,0.92116,DS,2,0.855941,0.92116,DSSHDS,0.753604,2,0,...,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,BWY MGGY Call Combo 1 290719 PM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[5.232805, 0.797881]",DS,DS-SH-DS,DS-SH-DS SH-LH
0,1.218085,1.308841,DS,6,1.218085,1.308841,DS,1.218085,0,2,...,CRAWLEY,F,C:/Users/slwal/anaconda3/envs/PY36/avgn_paper-...,BWY MGGY Call Combo 1 300719 AM,44100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[7.6830435, 5.4741535]",DS,DS,DS SH-LH


In [33]:
len(DS_df)

468

In [35]:
specsDS = list(DS_df.specs.values)
specsDS = [norm(i) for i in tqdm(specsDS)]

  0%|          | 0/468 [00:00<?, ?it/s]

In [None]:
color = ['midnightblue', 'cornflowerblue','darkgreen', 'lightgreen', 'darkorange', 'olive', 'teal', 
         'skyblue']
nex = -1
scatter_spec(
    np.array(list(DS_df['umap'].values)),
    specsDS,
    column_size=10,
    x_range = [-0.25,9.75],
    y_range = [-0.75,9.25],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': DS_df.location.values,
        'alpha':0.7,
        's': 10,
        'show_legend': True,
        "color_palette": color,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

In [46]:
## create a new column of copied unit label data so that I can modify
DS_df['figure'] = DS_df['location']

In [47]:
## Create conditions for all labels containing repeated segments (these will be simplified in the next step)
cond1 = DS_df['figure'] == 'GUILDFORD'
cond2 = DS_df['figure'] == 'CRAWLEY'

In [48]:
### Simplify
DS_df.loc[cond1, 'figure'] = 'Guildford'
DS_df.loc[cond2, 'figure'] = 'Crawley'

In [49]:
## Create conditions for all labels containing repeated segments (these will be simplified in the next step)
cond1 = DS_df['sex'] == 'M'
cond2 = DS_df['sex'] == 'F'

In [50]:
### Simplify
DS_df.loc[cond1, 'sex'] = 'Male'
DS_df.loc[cond2, 'sex'] = 'Female'

In [None]:
color = ['midnightblue', 'cornflowerblue','darkgreen', 'lightgreen', 'darkorange', 'olive', 'teal', 
         'skyblue']
nex = -1
scatter_spec(
    np.array(list(DS_df['umap'].values)),
    specsDS,
    column_size=10,
    x_range = [-0.25,9.75],
    y_range = [-0.75,9.25],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': DS_df.sex.values,
        'alpha':0.7,
        's': 10,
        'show_legend': True,
        "color_palette": color,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

## NL

In [52]:
NL_df = pd.read_pickle(DATA_DIR / DATASET_ID / 'NL' /  'NL.pickle')
NL_df[:3]

Unnamed: 0,start_time,end_time,ID,labels,indv,indvi,filename,group,location,sex,...,combi_lab_simp,call_lab_simp,call_label,seg_pos_call,call_unique_num,combi_label,seg_pos_combi,call_pos_combi,combi_unique_num,simp
26,0.338289,0.439778,26,NL,MGGY,0,BWY MGGY Excitement 2 170519 PM,BWYa,CRAWLEY,F,...,DS-SH-DS NL-SH-DS,NL-SH-DS,NLSHDS,0,11,DSSHDS NLSHDS,3,1,5,NL_comb
29,0.020744,0.165861,29,NL,MGGY,0,MGGY Alarm Alert series 1 100717,BWYa,CRAWLEY,F,...,NL-DS DS-SH-DS SH-LH,NL-DS,NLDS,0,12,NLDS DSSHDS SHLH,0,0,6,NL_comb
51,0.349618,0.443789,51,NL,MGGY,0,MGGY Alert 2 070817,BWYa,CRAWLEY,F,...,DS-SH-DS NL-SH-DS SH-DS LH,NL-SH-DS,NLSHDS,0,20,DSSHDS NLSHDS SHDS LH,3,1,9,NL_comb


In [53]:
NL_df.columns

Index(['start_time', 'end_time', 'ID', 'labels', 'indv', 'indvi', 'filename',
       'group', 'location', 'sex', 'wav_loc', 'key', 'rate', 'comb_labels',
       'umap', 'specs', 'combi_lab_simp', 'call_lab_simp', 'call_label',
       'seg_pos_call', 'call_unique_num', 'combi_label', 'seg_pos_combi',
       'call_pos_combi', 'combi_unique_num', 'simp'],
      dtype='object')

In [54]:
len(NL_df)

173

In [57]:
specsNL = list(NL_df.specs.values)
specsNL = [norm(i) for i in tqdm(specsNL)]

  0%|          | 0/173 [00:00<?, ?it/s]

In [None]:
nex = -1
scatter_spec(
    np.array(list(NL_df['umap'].values)),
    specsNL,
    column_size=10,
    x_range = [4,16.5],
    y_range = [3,15.5],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': NL_df.simp.values,
        'alpha':0.8,
        's': 10,
        'show_legend': True,
        "color_palette": 'tab20',
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

In [None]:
nex = -1
scatter_spec(
    np.array(list(NL_df['umap'].values)),
    specsNL,
    column_size=10,
    x_range = [4,16.5],
    y_range = [3,15.5],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': NL_df.location.values,
        'alpha':0.8,
        's': 10,
        'show_legend': True,
        "color_palette": 'tab20',
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

In [60]:
## create a new column 
NL_df['figure'] = NL_df['simp']

In [61]:
NL_df['figure'].unique()

array(['NL_comb', 'NL_lone'], dtype=object)

In [62]:
## Create conditions for all labels 
cond1 = NL_df['figure'] == 'NL'
cond2 = NL_df['figure'] == 'NLcomb'

In [63]:
### Simplify
NL_df.loc[cond1, 'figure'] = 'NL Lone Segment'
NL_df.loc[cond2, 'figure'] = 'NL Follows/Precedes Segment'

In [None]:
nex = -1
scatter_spec(
    np.array(list(NL_df['umap'].values)),
    specsNL,
    column_size=10,
    x_range = [4,16.5],
    y_range = [3,15.5],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': NL_df.figure.values,
        'alpha':0.8,
        's': 10,
        'show_legend': True,
        "color_palette": 'tab20',
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

## SH

In [65]:
SH_df = pd.read_pickle(DATA_DIR / DATASET_ID / 'SH' /  'SH.pickle')
SH_df[:3]

Unnamed: 0,start_time,end_time,ID,labels,indv,indvi,filename,group,location,sex,...,combi_lab_simp,call_lab_simp,call_label,seg_pos_call,call_unique_num,combi_label,seg_pos_combi,call_pos_combi,combi_unique_num,simp
1,0.786865,0.835165,1,SH,MGGY,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,...,DS-SH-DS SH-LH,DS-SH-DS,DSSHDS,1,0,DSSHDS SHSHLH,1,0,0,SH Lone/Comb DS
3,0.932017,0.942875,3,SH,MGGY,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,...,DS-SH-DS SH-LH,SH-LH,SHSHLH,0,1,DSSHDS SHSHLH,3,1,0,In call w LH
4,0.951561,0.980101,4,SH,MGGY,0,BWY MGGY Call Combo 1 290719 PM,BWYa,CRAWLEY,F,...,DS-SH-DS SH-LH,SH-LH,SHSHLH,1,1,DSSHDS SHSHLH,4,1,0,In call w LH


In [66]:
len(SH_df)

489

In [67]:
specsSH = list(SH_df.specs.values)
specsSH = [norm(i) for i in tqdm(specsSH)]

  0%|          | 0/489 [00:00<?, ?it/s]

In [None]:
nex = -1
color = ['lightcoral', 'darkorange', 'olive', 'teal', 'violet', 
         'skyblue']
scatter_spec(
    np.array(list(SH_df['umap'].values)),
    specsSH,
    column_size=10,
    x_range = [3,15],
    y_range = [0.5,12.5],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': SH_df.location.values,
        'alpha':0.7,
        's': 10,
        'show_legend': True,
        "color_palette": color,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

In [None]:
nex = -1
color = ['thistle', 'teal', 'violet', 'lightcoral',
         ]
scatter_spec(
    np.array(list(SH_df['umap'].values)),
    specsSH,
    column_size=10,
    x_range = [3,15],
    y_range = [0.5,12.5],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    figsize=(10, 10),
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': SH_df.location.values,
        'alpha':0.7,
        's': 10,
        'show_legend': True,
        "color_palette": color,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

In [72]:
SH_df['figure'] = SH_df['location']

In [73]:
cond1 = SH_df['figure'] == 'GUILDFORD'
cond2 = SH_df['figure'] == 'CRAWLEY'

In [74]:
SH_df.loc[cond1, 'figure'] = 'Guildford'
SH_df.loc[cond2, 'figure'] = 'Crawley'

In [75]:
cond1 = LH_df['location'] == 'GUILDFORD'
cond2 = LH_df['location'] == 'CRAWLEY'

In [76]:
LH_df.loc[cond1, 'location'] = 'Guildford'
LH_df.loc[cond2, 'location'] = 'Crawley'

In [77]:
cond1 = NL_df['location'] == 'GUILDFORD'
cond2 = NL_df['location'] == 'CRAWLEY'

In [78]:
NL_df.loc[cond1, 'location'] = 'Guildford'
NL_df.loc[cond2, 'location'] = 'Crawley'

## Manuscript Figure

In [80]:
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir, FIGURE_DIR
from avgn.utils.general import save_fig

In [None]:
fig = plt.figure()
fig.set_figheight(12)
fig.set_figwidth(12)
ax1 = plt.subplot2grid(shape=(1, 12), loc=(0, 0), colspan=6)  ##1st Row, 1st Place
ax2 = plt.subplot2grid(shape=(1, 12), loc=(0, 6), colspan=6)  ##1st Row, 2nd Place

ax1.axis('off')
ax2.axis('off')

nex = -1
LHcolor = ['mediumpurple', 'indigo','darkgreen', 'lightgreen', 'darkorange', 'olive', 'teal', 
         'skyblue']
scatter_spec(
    np.array(list(LH_df['umap'].values)),
    specsLH,
    column_size=10,
    x_range = [6,14.5],
    y_range = [-0.5,8],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax1,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': LH_df.simp.values,
        'alpha':0.7,
        's': 20,
        'show_legend': True,
        "color_palette": LHcolor,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

nex = -1
scatter_spec(
    np.array(list(NL_df['umap'].values)),
    specsNL,
    column_size=10,
    x_range = [4,16.5],
    y_range = [3,15.5],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax2,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': NL_df.figure.values,
        'alpha':0.8,
        's': 20,
        'show_legend': True,
        "color_palette": 'Reds',
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

ax1.set_title("A)", loc='left', fontsize = 15)
ax2.set_title("B)", loc='left', fontsize = 15)

plt.tight_layout()
save_loc = (FIGURE_DIR / DATASET_ID / 'Figures' / 'Figure_3.jpeg')
ensure_dir(save_loc.as_posix())
save_fig(save_loc, dpi=600, save_jpg=True)
plt.show()

In [None]:
fig = plt.figure()
fig.set_figheight(12)
fig.set_figwidth(12)
ax1 = plt.subplot2grid(shape=(2, 6), loc=(0, 0), colspan=3)  ##1st Row, 1st Place
ax2 = plt.subplot2grid(shape=(2, 6), loc=(0, 3), colspan=3)  ##1st Row, 2nd Place
ax3 = plt.subplot2grid(shape=(2, 6), loc=(1, 0), colspan=3)  ##2nd Row, 1st Place
ax4 = plt.subplot2grid(shape=(2, 6), loc=(1, 3), colspan=3)  ##2nd Row, 2nd Place

ax1.axis('off')
ax2.axis('off')
ax3.axis('off')
ax4.axis('off')

nex = -1
LHcolor = ["maroon", "lightcoral"]
scatter_spec(
    np.array(list(LH_df['umap'].values)),
    specsLH,
    column_size=10,
    x_range = [6,14.5],
    y_range = [-0.5,8],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax1,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': LH_df.location.values,
        'alpha':0.7,
        's': 20,
        'show_legend': True,
        "color_palette": LHcolor,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

nex = -1
scatter_spec(
    np.array(list(NL_df['umap'].values)),
    specsNL,
    column_size=10,
    x_range = [4,16.5],
    y_range = [3,15.5],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax2,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': NL_df.location.values,
        'alpha':0.8,
        's': 20,
        'show_legend': True,
        "color_palette": 'tab20',
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);
color = ['thistle', 'teal', 'violet', 'lightcoral',
         ]
nex = -1
scatter_spec(
    np.array(list(SH_df['umap'].values)),
    specsSH,
    column_size=10,
    x_range = [3.5,14.5],
    y_range = [1,12],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax3,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': SH_df.figure.values,
        'alpha':0.7,
        's': 20,
        'show_legend': True,
        "color_palette": color,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);
DScolor = ['midnightblue', 'cornflowerblue','darkgreen', 'lightgreen', 'darkorange', 'olive', 'teal', 
         'skyblue']
nex = -1
scatter_spec(
    np.array(list(DS_df['umap'].values)),
    specsDS,
    column_size=10,
    x_range = [-0.25,9.75],
    y_range = [-0.75,9.25],
    pal_color="hls",
    color_points=False,
    enlarge_points=0,
    ax=ax4,
    range_pad = 0.15,
    scatter_kwargs = {
        'labels': DS_df.figure.values,
        'alpha':0.7,
        's': 20,
        'show_legend': True,
        "color_palette": DScolor,
    },
    matshow_kwargs = {
        'cmap': plt.cm.Greys
    },
    line_kwargs = {
        'lw':0.5,
        'ls':"dashed",
        'alpha':0.25,
    },
    draw_lines=True,
    n_subset= 1000,
    border_line_width = 3,
    

);

ax1.set_title("A)", loc='left', fontsize = 15)
ax2.set_title("B)", loc='left', fontsize = 15)
ax3.set_title("C)", loc='left', fontsize = 15)
ax4.set_title("D)", loc='left', fontsize = 15)

plt.tight_layout()
save_loc = (FIGURE_DIR / DATASET_ID / 'Figures' / 'Fig S4.jpeg')
ensure_dir(save_loc.as_posix())
save_fig(save_loc, dpi=600, save_jpg=True)
plt.show()