In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os


In [12]:
device = torch.device('mps')


In [13]:
class SpectrogramDataset(Dataset):
    def __init__(self, csv_file, target_length=10, sample_rate=80):
        """
        Args:
            csv_file (string): Path to the csv file with annotations and file paths.
            target_length (int): Desired length of the spectrograms in seconds.
            sample_rate (int): Sample rate of the spectrograms.
        """
        self.annotations = pd.read_csv(csv_file)
        self.target_length = target_length
        self.sample_rate = sample_rate

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        spectrogram_path = self.annotations.iloc[idx]['spectrogram_filepath']
        spectrogram = np.load(f"../{spectrogram_path}")

        # Calculate the number of samples for target length
        num_samples = self.target_length * self.sample_rate

        # Pad or cut the spectrogram
        if spectrogram.shape[2] > num_samples:
            # Cut the spectrogram
            spectrogram = spectrogram[:, :, :num_samples]
        elif spectrogram.shape[2] < num_samples:
            # Pad the spectrogram with zeros
            padding_size = num_samples - spectrogram.shape[2]
            spectrogram = np.pad(spectrogram, ((0, 0), (0, 0), (0, padding_size)), mode='constant')

        spectrogram_tensor = torch.from_numpy(spectrogram).type(torch.float32)
        return spectrogram_path, spectrogram_tensor


In [16]:
def create_data_loader(csv_file, batch_size, target_length=10, sample_rate=80):
    dataset = SpectrogramDataset(csv_file=csv_file, target_length=target_length, sample_rate=sample_rate)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Example usage
train_data_loader = create_data_loader('../train.csv', batch_size=1024)
test_data_loader = create_data_loader('../test.csv', batch_size=1024)

print(len(train_data_loader))
print(len(test_data_loader))

print(next(iter(train_data_loader))[1].shape)


12
4
torch.Size([1024, 1, 201, 800])


In [18]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        # Reducing to two convolutional layers with stride 2
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)  # Output: 16 x 101 x 400
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1)  # Output: 32 x 51 x 200

        # Adjusted fully connected layer
        self.fc = nn.Linear(32 * 51 * 200, 128)  # Adjust the bottleneck size

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))

        # Flatten and pass through the fully connected layer
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc(x)
        return x


In [19]:
def test_Encoder_shapes():
    encoder = Encoder()

    # Create a random batch of input data with the shape (batch_size, channels, height, width)
    # Assuming the input spectrograms are single-channel (grayscale)
    x = torch.randn(32, 1, 201, 800)  # Batch size of 32
    y = encoder(x)

    # The expected output shape is (batch_size, bottleneck_size)
    expected_shape = torch.Size([32, 128])
    assert y.shape == expected_shape, f"Bad y.shape: {y.shape}"
    print('Success')

test_Encoder_shapes()


Success


In [20]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        # Fully connected layer
        self.fc = nn.Linear(128, 32 * 51 * 200)

        # Adjusting transposed convolution layers
        # Calculate the output padding required for matching the original dimensions
        self.conv_transpose1 = nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1)
        # Second layer needs custom output padding to match the exact output size
        self.conv_transpose2 = nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=0, output_padding=0)

    def forward(self, x):
        x = self.fc(x)
        x = x.view(-1, 32, 51, 200)

        x = torch.relu(self.conv_transpose1(x))
        x = torch.relu(self.conv_transpose2(x))

        # TODO: Remove the hard fix
        x = x[:, :, :201, :800]

        return x


In [21]:
def test_Decoder_shapes():
    decoder = Decoder()

    # Create a random batch of input data with the shape (batch_size, bottleneck_size)
    z = torch.randn(3, 128)  # Batch size of 3
    y = decoder(z)

    # The expected output shape is (batch_size, channels, height, width)
    expected_shape = torch.Size([3, 1, 201, 800])
    assert y.shape == expected_shape, "Bad shape of y: y.shape={}".format(y.shape)
    print('Success')

test_Decoder_shapes()


Success


In [22]:
encoder = Encoder()
encoder.to(device)

decoder = Decoder()
decoder.to(device)

num_encoder_parameters = sum(p.numel() for p in encoder.parameters() if p.requires_grad)
print(f'Number of parameters in the encoder: {num_encoder_parameters}')

num_decoder_parameters = sum(p.numel() for p in decoder.parameters() if p.requires_grad)
print(f'Number of parameters in the decoder: {num_decoder_parameters}')


Number of parameters in the encoder: 41784128
Number of parameters in the decoder: 42110369


In [23]:
learning_rate = 0.001
epochs = 300

optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate)
criterion = nn.MSELoss()
scheduler = ExponentialLR(optimizer, gamma=0.95)

train_losses = []
test_losses = []

for epoch in range(epochs):
    train_loss = 0
    for batch_idx, (_, data) in enumerate(train_data_loader):
          data = data.to(device)
          optimizer.zero_grad()

          z = encoder(data)
          output = decoder(z)

          loss = criterion(output, data)

          loss.backward()
          optimizer.step()

          train_loss += loss.item()

    train_loss /= len(train_data_loader)
    train_losses.append(train_loss)

    test_loss = 0
    with torch.no_grad():
        for batch_idx, (_, data) in enumerate(test_data_loader):
            data = data.to(device)

            z = encoder(data)
            output = decoder(z)

            loss = criterion(output, data)
            test_loss += loss.item()

    test_loss /= len(test_data_loader)
    test_losses.append(test_loss)

    print(f'Epoch {epoch + 1}, Train Loss: {train_loss:.2f}, Test Loss: {test_loss:.2f}')

    scheduler.step()


Epoch 1, Train Loss: 1.46, Test Loss: 1.21


KeyboardInterrupt: 

In [24]:
# # save the encoder and decoder models
# torch.save(encoder.state_dict(), 'encoder.pth')
# torch.save(decoder.state_dict(), 'decoder.pth')


In [25]:
# load the encoder and decoder models from the saved files
encoder = Encoder()
encoder.load_state_dict(torch.load('encoder.pth'))
encoder.to(device)

decoder = Decoder()
decoder.load_state_dict(torch.load('decoder.pth'))
decoder.to(device)


Decoder(
  (fc): Linear(in_features=128, out_features=326400, bias=True)
  (conv_transpose1): ConvTranspose2d(32, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
  (conv_transpose2): ConvTranspose2d(16, 1, kernel_size=(3, 3), stride=(2, 2))
)

In [26]:
# create and save the bottleneck feature vectors for both train and test data, and update
def save_bottleneck_features(data_loader, model, bottleneck_filepath, csv_file):
    model.eval()
    os.makedirs(bottleneck_filepath, exist_ok=True)

    annotations = pd.read_csv(csv_file)
    annotations['bottleneck_filepath'] = ''

    for batch_idx, (filepaths, data) in enumerate(data_loader):
        data = data.to(device)
        with torch.no_grad():
            bottleneck_features = model(data).cpu().numpy()

        for i, filepath in enumerate(filepaths):
            filename = os.path.basename(filepath)
            bottleneck_filename = filename.replace('.npy', '_bottleneck.npy')
            bottleneck_full_path = os.path.join(bottleneck_filepath, bottleneck_filename)

            np.save(bottleneck_full_path, bottleneck_features[i])

            annotations.loc[annotations['spectrogram_filepath'] == filepath, 'bottleneck_filepath'] = bottleneck_full_path

    # Save the updated annotations DataFrame back to the CSV file
    annotations.to_csv(csv_file, index=False)
    print("Bottleneck features saved and CSV file updated.")


In [28]:
test_df = pd.read_csv('../test.csv')
train_df = pd.read_csv('../train.csv')

# save copies of the original CSV files
test_df.to_csv('../test2.csv', index=False)
train_df.to_csv('../train2.csv', index=False)

bottleneck_train_dir = 'train_bottleneck'
bottleneck_test_dir = 'test_bottleneck'
train_csv_file = '../train2.csv'
test_csv_file = '../test2.csv'

# Save bottleneck features and update CSV for the training dataset
save_bottleneck_features(train_data_loader, encoder.to(device), bottleneck_train_dir, train_csv_file)

# Save bottleneck features and update CSV for the testing dataset
save_bottleneck_features(test_data_loader, encoder.to(device), bottleneck_test_dir, test_csv_file)


AttributeError: 'tuple' object has no attribute 'to'