In [1]:
%load_ext autoreload
%autoreload 2
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=2

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=2


In [2]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm.autonotebook import tqdm
import pandas as pd
from cuml.manifold.umap import UMAP as cumlUMAP
from avgn.utils.paths import DATA_DIR, most_recent_subdirectory, ensure_dir
from avgn.signalprocessing.create_spectrogram_dataset import flatten_spectrograms



### load data

In [3]:
DATASET_ID = 'castellucci_mouse_usv_segmented'
df_loc =  DATA_DIR / 'syllable_dfs' / DATASET_ID / 'mouse.pickle'

In [8]:
syllable_df = pd.read_pickle(df_loc)
del syllable_df['audio']

In [9]:
syllable_df[:3]

Unnamed: 0,start_time,end_time,indv,indvi,key,rate,spectrogram
0,22.666,22.678,VOC594,0,VOC594_VOC586_SONG_CMPA_8_28_2016_33_20.04,250000,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
1,23.804,23.818,VOC594,0,VOC594_VOC586_SONG_CMPA_8_28_2016_33_20.04,250000,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
2,23.892,23.902,VOC594,0,VOC594_VOC586_SONG_CMPA_8_28_2016_33_20.04,250000,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."


In [10]:
np.shape(syllable_df.spectrogram.values[0])

(32, 20)

### project

In [11]:
ensure_dir(DATA_DIR / 'embeddings' / DATASET_ID / 'full')

In [8]:
for indv in tqdm(syllable_df.indv.unique()):
    subset_df = syllable_df[syllable_df.indv == indv]
    
    specs = list(subset_df.spectrogram.values)
    specs = [i/np.max(i) for i in tqdm(specs)]
    specs_flattened = flatten_spectrograms(specs)
    print(np.shape(specs_flattened))
    
    cuml_umap = cumlUMAP()
    embedding = cuml_umap.fit_transform(specs_flattened)
    subset_df['umap'] = list(embedding)
    
    fig, ax = plt.subplots()
    ax.scatter(embedding[:,0], embedding[:,1], s=1, color='k', alpha = 1)
    ax.set_xlim([-8,8])
    ax.set_ylim([-8,8])
    plt.show()
    
    subset_df.to_pickle(DATA_DIR / 'embeddings' / DATASET_ID / (indv + '.pickle'))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=52788), HTML(value='')))

(52788, 640)


  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


HBox(children=(IntProgress(value=0, max=35799), HTML(value='')))

(35799, 640)


  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.


HBox(children=(IntProgress(value=0, max=56375), HTML(value='')))

(56375, 640)


HBox(children=(IntProgress(value=0, max=70701), HTML(value='')))

(70701, 640)

