<a href="https://colab.research.google.com/github/mercadoerik1031/AudioAnalyzer/blob/main/snn_sound_localization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install snntorch --quiet

In [None]:
import pandas as pd
import os
import librosa
import torch
from snntorch import spikegen

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls "/content/drive/My Drive/Colab Notebooks/Masters Project/spatial_librispeech_sample"


ambisonics_sample  noise_ambisonics_sample


# Config

In [None]:
config = {
    # Google Colab
    "metadata_path": "/content/drive/My Drive/Colab Notebooks/Masters Project/metadata.parquet",
    "ambisonics_path": "/content/drive/My Drive/Colab Notebooks/Masters Project/spatial_librispeech_sample/ambisonics_sample",
    # "metadata_path": "spatial_librispeech_sample/metadata.parquet",
    # "ambisonics_path": "c:\\Users\\merca\\OneDrive\\Documents\\MyFiles\\Code\\masters_project\\spatial_librispeech_sample\\ambisonics_sample",

    "time_based_encoding": True,
    "num_steps": 100,
    "max_rate": 100
}

# Filter Data

In [None]:
# Path to the metadata file
metadata_path = config["metadata_path"]

# Load the metadata file
metadata = pd.read_parquet(metadata_path, engine="pyarrow")

# Path to the ambisonics folder
ambisonics_path = config["ambisonics_path"]

# List all files in the ambisonics folder
ambisonics_files = [f for f in os.listdir(ambisonics_path) if os.path.isfile(os.path.join(ambisonics_path, f))]


## Strip 0s from filenames

In [None]:
sample_ids = []

for file_name in ambisonics_files:
    number, _ = file_name.split(".")
    number.lstrip("0")

    if not number:
        number = 0

    sample_ids.append(int(number))

filtered_metadata = metadata[metadata["sample_id"].isin(sample_ids)]
filtered_metadata.shape

(735, 47)

# Preprocess Audio

## Cochlear Filer

In [None]:
def cochlear_filter(channel_data, sr):
    # pass
    return channel_data

## Normalize

In [None]:
def normalize(data):
    return (data - data.min()) / (data.max() - data.min())

## Rate Based Encoding

In [None]:
def rate_based_encoding(data, max_rate=100, num_steps=100):
    if data is None:
      raise ValueError("Input data is None.")

    data_tensor = torch.from_numpy(data).float()

    normalized_data = normalize(data_tensor)

    spike_rates = normalized_data * max_rate

    spike_train = spikegen.rate(spike_rates, num_steps= num_steps)

    return spike_train

## Time Based Encoding

In [None]:
def time_based_encoding(data, num_steps=100):
    if data is None:
      raise ValueError("Input data is None.")

    data_tensor = torch.from_numpy(data).float()

    normalized_data = normalize(data_tensor)

    spike_times = torch.where(normalized_data > 0.5, 1, 0)

    spike_trains = spikegen.latency(spike_times, num_steps=num_steps)

    return spike_trains



In [None]:
def preprocess_audio(filepath, max_duration):
    """
    W: Omnidirectional
    X: Front - Back
    Y: Left - Right
    Z: Top - Bottom
    """
    audio, sr = librosa.load(filepath, sr=None, mono=False)
    # print(f"Original shape: {audio.shape}, Sampling rate: {sr}")

    max_length = int(max_duration * sr)
    # print(f"Max length in samples: {max_length}")

    padded_audio = librosa.util.fix_length(data=audio, size=max_length)
    # print(f"Padded shape: {padded_audio.shape}")

    W, X, Y, Z = padded_audio[0], padded_audio[1], padded_audio[2], padded_audio[3]

    processed_W = cochlear_filter(W, sr)
    processed_X = cochlear_filter(X, sr)
    processed_Y = cochlear_filter(Y, sr)
    processed_Z = cochlear_filter(Z, sr)

    if config["time_based_encoding"]:
        spike_trains_W = time_based_encoding(processed_W, config["num_steps"])
        spike_trains_X = time_based_encoding(processed_X, config["num_steps"])
        spike_trains_Y = time_based_encoding(processed_Y, config["num_steps"])
        spike_trains_Z = time_based_encoding(processed_Z, config["num_steps"])
    else:
        spike_trains_W = rate_based_encoding(processed_W, config['max_rate'], config['num_steps'])
        spike_trains_X = rate_based_encoding(processed_X, config['max_rate'], config['num_steps'])
        spike_trains_Y = rate_based_encoding(processed_Y, config['max_rate'], config['num_steps'])
        spike_trains_Z = rate_based_encoding(processed_Z, config['max_rate'], config['num_steps'])

    return spike_trains_W, spike_trains_X, spike_trains_Y, spike_trains_Z





In [None]:
processed_data = preprocess_audio(os.path.join(ambisonics_path, ambisonics_files[0]), filtered_metadata["audio_info/duration"].max())
if processed_data is None:
    raise ValueError("Data is None. Check preprocess_audio function.")
spikes_W, spikes_X, spikes_Y, spikes_Z = processed_data


In [None]:
print(f"spikes_W: {spikes_W.shape}")
print(f"spikes_X: {spikes_X.shape}")
print(f"spikes_Y: {spikes_Y.shape}")
print(f"spikes_Z: {spikes_Z.shape}")

spikes_W: torch.Size([100, 524310])
spikes_X: torch.Size([100, 524310])
spikes_Y: torch.Size([100, 524310])
spikes_Z: torch.Size([100, 524310])


tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [None]:
!git config --global user.name "mercadoerik1031"
!git config --global user.email "mercadoerik1031@gmail.com"


In [None]:
!git add .
!git commit -m "third commit"
!git push

fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
