In [1]:
%load_ext autoreload
%autoreload 2
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=2

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=2


In [2]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm.autonotebook import tqdm
import pandas as pd
from cuml.manifold.umap import UMAP as cumlUMAP
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir
from avgn.signalprocessing.create_spectrogram_dataset import flatten_spectrograms



### load data

In [3]:
DATASET_ID = 'buckeye'
df_loc =  DATA_DIR / 'syllable_dfs' / DATASET_ID / 'phones.pickle'

In [4]:
syllable_df = pd.read_pickle(df_loc)
del syllable_df['audio']

In [5]:
syllable_df[:3]

Unnamed: 0,start_time,end_time,labels,word,utterance_number,position_in_utterance,position_in_word,word_number,indv,indvi,age,sex,key,rate,spectrogram
0,4.885333,4.9886,w,well,0,0,0,0,s07,0,o,f,s0702b,16000,"[[2, 2, 2, 2, 0, 44, 79, 95, 86, 81, 77, 38, 3..."
1,4.9886,5.114649,eh,well,1,0,1,0,s07,0,o,f,s0702b,16000,"[[4, 4, 174, 166, 164, 162, 160, 159, 159, 161..."
2,5.114649,5.255262,l,well,2,0,2,0,s07,0,o,f,s0702b,16000,"[[5, 5, 170, 156, 156, 154, 151, 149, 147, 144..."


In [None]:
np.shape(syllable_df.spectrogram.values[0])

(32, 24)

### project

In [None]:
specs = list(syllable_df.spectrogram.values)
specs = [i/np.max(i) for i in tqdm(specs)]
specs_flattened = flatten_spectrograms(specs)
np.shape(specs_flattened)

HBox(children=(IntProgress(value=0, max=837207), HTML(value='')))

In [None]:
min_dist = 0.5
cuml_umap = cumlUMAP(min_dist = min_dist)
embedding = cuml_umap.fit_transform(specs_flattened)

In [None]:
syllable_df['umap'] = list(embedding)

In [None]:
syllable_df[:3]

In [None]:
fig, ax = plt.subplots()
ax.scatter(embedding[:,0], embedding[:,1], s=1, color='k', alpha = 0.005)
ax.set_xlim([-8,8])
ax.set_ylim([-8,8])

### Save

In [None]:
ensure_dir(DATA_DIR / 'embeddings' / DATASET_ID / 'human_full')

In [None]:
syllable_df.to_pickle(DATA_DIR / 'embeddings' / DATASET_ID / (str(min_dist) + '_humans_full.pickle'))