<a href="https://colab.research.google.com/github/mercadoerik1031/snn-sound-localization/blob/write_to_disk/snn_sound_localization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**SNN Sounnd Localization**



---



# Pip Installs

In [2]:
! pip install snntorch --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.0/109.0 kB[0m [31m417.4 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.2/76.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h

# Imports

In [3]:
import pandas as pd
import os
import librosa
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
from snntorch import spikegen
import gc
from concurrent.futures import ThreadPoolExecutor

In [4]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


# Config

In [5]:
config = {
    # Google Colab Path
    "metadata_path": "/content/drive/My Drive/Colab Notebooks/Masters Project/metadata.parquet",
    "ambisonics_path": "/content/drive/My Drive/Colab Notebooks/Masters Project/spatial_librispeech_sample/ambisonics_sample",
    "noise_path": "/content/drive/My Drive/Colab Notebooks/Masters Project/spatial_librispeech_sample/noise_ambisonics_sample",
    "output_path": "/content/drive/My Drive/Colab Notebooks/Masters Project/spatial_librispeech_sample/preprocessed_samples",
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "batch_size_pre": 32,
    "sr": 16000,

    "time_based_encoding": True,
    "num_steps": 10,
    "max_rate": 10,
    "noise": True,
}

# Filter Data

In [6]:
def filter_data(metadata_path=config["metadata_path"], ambisonics_path=config["ambisonics_path"], noise_path=config["noise_path"]):
  # Load metadata
  metadata = pd.read_parquet(metadata_path, engine="pyarrow")

  # Get lists of all files in directories
  ambisonic_files = [f for f in os.listdir(ambisonics_path) if os.path.isfile(os.path.join(ambisonics_path, f))]
  noise_files = [f for f in os.listdir(noise_path) if os.path.isfile(os.path.join(noise_path, f))]

  # Extract sample ids from filenames and filter metadata
  sample_ids = [int(f.split(".")[0].lstrip("0") or 0) for f in ambisonic_files]
  filtered_metadata = metadata[metadata["sample_id"].isin(sample_ids)]

  # Create full file paths
  ambisonic_files = [os.path.join(ambisonics_path, f) for f in ambisonic_files]
  noise_files = [os.path.join(noise_path, f) for f in noise_files]

  return filtered_metadata, ambisonic_files, noise_files


# Preprocess Functions

## Normalize

In [7]:
def normalize(audio_data, device=config["device"]):
  audio_data = audio_data.to(device)
  return (audio_data - audio_data.min()) / (audio_data.max() - audio_data.min())


## Rate Based Encoding

In [8]:
def rate_based_encoding(audio_data, max_rate=config["max_rate"], num_steps=config["num_steps"], device=config["device"]):
    if audio_data is None:
        raise ValueError("Input data is None.")

    # Check if audio_data is already a tensor, if not convert it
    if not isinstance(audio_data, torch.Tensor):
        audio_data = torch.tensor(audio_data, device=device)

    audio_data = audio_data.float().to(device)

    normalized_data = normalize(audio_data)

    spike_rates = normalized_data * max_rate

    spike_train = spikegen.rate(spike_rates, num_steps=num_steps)

    return spike_train



## Time Based Encoding

In [9]:
def time_based_encoding(audio_data, num_steps=config["num_steps"], device=config["device"]):
    if audio_data is None:
        raise ValueError("Input data is None.")

    # Check if audio_data is already a tensor, if not convert it
    if not isinstance(audio_data, torch.Tensor):
        audio_data = torch.tensor(audio_data, device=device)

    audio_data = audio_data.float().to(device)

    normalized_data = normalize(audio_data)

    spike_times = torch.where(normalized_data > 0.5, 1, 0)

    spike_train = spikegen.latency(spike_times, num_steps=num_steps, bypass=True)

    return spike_train


## Preprocess Function

In [10]:
def preprocess(ambisonic_file, noise_file, duration, device=config["device"], sr=config["sr"]):
    # Load ambisonic audio directly to GPU if possible
    audio = torch.tensor(librosa.load(ambisonic_file, sr=sr, mono=False, duration=duration)[0], device=device)
    length = int(np.round(duration * sr))

    # Pad Ambisonic File
    padded_ambisonic = torch.nn.functional.pad(audio, (0, max(0, length - audio.shape[1])))

    # Combine Noise (Optional)
    if config["noise"] and noise_file:
        # Load Noise File
        noise_audio = torch.tensor(librosa.load(noise_file, sr=sr, mono=False, duration=duration)[0], device=device)

        # Pad Noise File
        padded_noise = torch.nn.functional.pad(noise_audio, (0, max(0, length - noise_audio.shape[1])))

        # Combine Ambisonic & Noise
        combined_audio = padded_ambisonic + padded_noise
    else:
        combined_audio = padded_ambisonic

    # Processed_audio should be processed on GPU
    spike_trains = time_based_encoding(combined_audio) if config["time_based_encoding"] else rate_based_encoding(combined_audio)

    return spike_trains



## Process & Save Batches

In [11]:
def process_batch(batch_ambisonic_files, batch_noise_files, batch_metadata, output_path, duration, sr, batch_id):
    processed_data = []
    labels = []

    # Process each file in the batch
    for ambisonic_file, noise_file, meta_row in zip(batch_ambisonic_files, batch_noise_files, batch_metadata.itertuples()):
        spike_trains = preprocess(ambisonic_file, noise_file, duration)
        processed_data.append(spike_trains.cpu())

        labels.append({
            'sample_id': meta_row.sample_id,
            'split': meta_row.split,
            'azimuth': batch_metadata.at[meta_row.Index, 'speech/azimuth'],
            'elevation': batch_metadata.at[meta_row.Index, 'speech/elevation']
        })

    # Save processed data and labels
    batch_data_filename = f'processed_batch_{batch_id}.pt'
    batch_labels_filename = f'labels_batch_{batch_id}.csv'
    torch.save(torch.stack(processed_data), os.path.join(output_path, batch_data_filename))
    pd.DataFrame(labels).to_csv(os.path.join(output_path, batch_labels_filename), index=False)

    print(f"Batch {batch_id} processed and saved.")

def parallel_process_batches(metadata, ambisonic_files, noise_files, duration, batch_size=config["batch_size_pre"], output_path=config["output_path"], sr=config["sr"], max_workers=4):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = []
        for i in range(0, len(ambisonic_files), batch_size):
            batch_ambisonic_files = ambisonic_files[i:i+batch_size]
            batch_noise_files = noise_files[i:i+batch_size]
            batch_metadata = metadata.iloc[i:i+batch_size]

            # Submit batch processing jobs to the executor
            futures.append(executor.submit(process_batch, batch_ambisonic_files, batch_noise_files, batch_metadata, output_path, duration, sr, i // batch_size))

        # Wait for all futures to complete
        for future in futures:
            future.result()

    print("All batches processed and saved in parallel.")



# DataLoaders

In [1]:
class SoundLocalizationDataset(Dataset):
    def __init__(self, base_path, total_batches):
        """
        base_path: Path where batch data and label files are stored.
        total_batches: Total number of batches.
        """
        self.base_path = base_path
        self.total_batches = total_batches
        self.labels_cache = {}  # Cache to store loaded labels

    def __len__(self):
        return self.total_batches * batch_size  # Assuming each batch has 'batch_size' samples

    def __getitem__(self, idx):
        batch_id = idx // batch_size
        local_idx = idx % batch_size

        # Load batch data
        data_path = f'{self.base_path}/processed_batch_{batch_id}.pt'
        batch_data = torch.load(data_path)

        # Load labels for the batch if not already loaded
        if batch_id not in self.labels_cache:
            label_path = f'{self.base_path}/labels_batch_{batch_id}.csv'
            self.labels_cache[batch_id] = pd.read_csv(label_path)

        # Fetch label
        label_df = self.labels_cache[batch_id]
        label = label_df.iloc[local_idx][['azimuth', 'elevation']].values.astype('float32')

        sample = batch_data[local_idx]

        return sample, label


NameError: name 'Dataset' is not defined

In [14]:
# Assuming you have 32 batches, for example
total_batches = 32
base_path = "/content/drive/My Drive/Colab Notebooks/Masters Project/spatial_librispeech_sample/preprocessed_samples"

# Instantiate the dataset
dataset = SoundLocalizationDataset(base_path, total_batches)

# DataLoader
batch_size = 32  # Update this as per your requirement
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Accessing the first batch data and labels
first_batch_data, first_batch_labels = next(iter(data_loader))
print("First batch data:", first_batch_data)
print("First batch labels:", first_batch_labels)


Batch 0 processed and saved.
Batch 1 processed and saved.
Batch 2 processed and saved.
Batch 3 processed and saved.
Batch 4 processed and saved.
Batch 5 processed and saved.
Batch 6 processed and saved.
Batch 7 processed and saved.
Batch 8 processed and saved.
Batch 9 processed and saved.
Batch 10 processed and saved.
Batch 11 processed and saved.
Batch 12 processed and saved.
Batch 13 processed and saved.
Batch 14 processed and saved.
Batch 15 processed and saved.
Batch 16 processed and saved.
Batch 17 processed and saved.
Batch 18 processed and saved.
Batch 19 processed and saved.
Batch 20 processed and saved.
Batch 21 processed and saved.
Batch 22 processed and saved.


IsADirectoryError: [Errno 21] Is a directory: '/content/drive/My Drive/Colab Notebooks/Masters Project/spatial_librispeech_sample/preprocessed_samples'

In [None]:
first_batch_data, first_batch_labels = next(iter(train_loader))
print("First batch data:", first_batch_data)
print("First batch labels:", first_batch_labels)