#### Load dataframe

In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import random
import torch
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.preprocessing import LabelEncoder

In [33]:
RANDOM_SEED = 42
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x11be52310>

In [34]:
calls_df = pd.read_csv('features_and_spectrograms.csv')
calls_df['log_padded_spectrogram'] = calls_df['log_padded_spectrogram'].apply(lambda x: np.array(json.loads(x)))

In [35]:
calls_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7300 entries, 0 to 7299
Data columns (total 33 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   track_ID                7300 non-null   object 
 1   clip_ID                 7300 non-null   object 
 2   goose_ID                7300 non-null   object 
 3   call_type               7300 non-null   object 
 4   waveform                7300 non-null   object 
 5   sr                      7300 non-null   int64  
 6   filepath                7300 non-null   object 
 7   lfccs                   7300 non-null   object 
 8   peak                    7300 non-null   float64
 9   duration                7300 non-null   float64
 10  normalized_log_length   7300 non-null   float64
 11  log_target_duration     7300 non-null   float64
 12  log_padded_spectrogram  7300 non-null   object 
 13  log_padded_lfccs        7300 non-null   object 
 14  f0mean                  6484 non-null   

In [36]:
print(len(calls_df[calls_df.isna().any(axis=1)]))
calls_df = calls_df.dropna()
calls_df = calls_df[["track_ID", "clip_ID", "goose_ID", "call_type", "log_padded_spectrogram"]]
calls_df.info()

816
<class 'pandas.core.frame.DataFrame'>
Index: 6484 entries, 0 to 7299
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   track_ID                6484 non-null   object
 1   clip_ID                 6484 non-null   object
 2   goose_ID                6484 non-null   object
 3   call_type               6484 non-null   object
 4   log_padded_spectrogram  6484 non-null   object
dtypes: object(5)
memory usage: 303.9+ KB


In [37]:
le = LabelEncoder()
le.fit(calls_df["call_type"])
calls_df["encoded_call_type"] = le.transform(calls_df["call_type"])

### Variational Autoencoder

In [38]:
learning_rate = 1e-3
batch_size = 256 # very unbalanced dataset so choosing a bigger batch size - TODO balance
epochs = 60
retrain = False

In [39]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


#### Create data loader

In [40]:
calls_df.iloc[0]["log_padded_spectrogram"].shape

(44, 170)

In [44]:
class SoundDS(Dataset):
    # Custom data loader 
    def __init__(self, calls_df):
        self.df = calls_df
                
    # of items in dataset
    def __len__(self):
        return len(self.df)    
    
    # Get item through index
    def __getitem__(self, idx):
        spectrogram = torch.from_numpy(self.df.iloc[idx]["log_padded_spectrogram"]).to(torch.float32).to(device)
        spectrogram = spectrogram[np.newaxis, ...]
        return spectrogram, self.df.iloc[idx]["encoded_call_type"]

In [45]:
ds = SoundDS(calls_df)

# Random split for train:val - 80:20
num_items = len(ds)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(ds, [num_train, num_val])

# Create training and validation data loaders
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False)

In [47]:
train_features, train_labels = next(iter(train_dl))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape, type: {train_labels.size()}")

input_size=train_features.size()[1:]
print(input_size[1], input_size[2])

Feature batch shape: torch.Size([256, 1, 44, 170])
Labels batch shape, type: torch.Size([256])
44 170
