# Example of generating embeddings from OpenSoundscape.CNN object

In [11]:
from opensoundscape import Audio, Spectrogram, CNN, BoxedAnnotations

import numpy as np
import pandas as pd
from glob import glob
from pathlib import Path

from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for big visuals
%config InlineBackend.figure_format = 'retina'

from copy import deepcopy
from torch import nn

### Load a CNN

In [12]:
# load your CNN object (`load_model()`) or make a new one
m = CNN('resnet18',classes=[0,1],sample_duration=2)

### Remove classification head to use CNN for embedding

In [13]:
# make a copy of the model object if you don't want to modify the original object
embedder = deepcopy(m)

# replace the "classification head" (final fully connected layer, for most CNNS) with a layer that returns the input unchanged
# note that .fc is used in PyTorch for resnet architectures, but other architectures store this layer in potentially different ways
embedder.network.fc = nn.Identity()

### Generate embeddings

In [14]:
# create dataloader to generate batches of AudioSamples (copying this from .predict)
dataloader = embedder.inference_dataloader_cls(
    samples=['/Users/SML161/a.mp3'],
    preprocessor=embedder.preprocessor,
) # note that there are many more potential arguments (see CNN.predict documentation)

# iterate dataloader and run inference (forward pass) to generate embeddings
embeddings_array = embedder(dataloader) #equivalent to embedder.__call__(dataloader)

# if you want a dataframe instead of an array, copy the index from the dataloader:
df_index = dataloader.dataset.dataset.label_df.index
embedding_df = pd.DataFrame(index=df_index, data=embeddings_array)


  0%|          | 0/2 [00:00<?, ?it/s]