# Libraries

In [1]:
import os
import time
import yaml
import torch
import models

import torch.nn as nn

from dataset_downloader import DatasetDownloader
from data_bundler import DataBundler
from torch.utils.data import DataLoader
from evaluator import Evaluator

# Download Datasets

In [2]:
# dataset_downloader = DatasetDownloader()
# dataset_downloader.download_datasets("download_paths_2025.yaml")

# Hyperparameters

In [3]:
def load_hyper_parameters():
    with open("hyper_parameters.yaml", 'r') as file:
        return yaml.safe_load(file)

In [4]:
hyper_parameters = load_hyper_parameters()

#> Acoustic Features
acoustic_features = hyper_parameters['acoustic_features']
number_of_mels = acoustic_features['number_of_mels']
number_of_frames_to_concatenate = acoustic_features['number_of_frames_to_concatenate']
frame_size_seconds = acoustic_features['frame_size_seconds']
frame_size_samples = acoustic_features['frame_size_samples']
hop_size_seconds = acoustic_features['hop_size_seconds']
hop_size_samples = acoustic_features['hop_size_samples']


#> Dataset Parameters
dataset_paremeters = hyper_parameters['dataset_parameters']
train_data_inclusion_string = dataset_paremeters['train_data_inclusion_string']
test_data_inclusion_string = dataset_paremeters['test_data_inclusion_string']
train_pct = dataset_paremeters['train_pct']
test_pct = dataset_paremeters['test_pct']


#> Training Parameters
training_parameters = hyper_parameters['training_parameters']
batch_size = training_parameters['batch_size']
epochs = training_parameters['epochs']
learning_rate = training_parameters['learning_rate']
shuffle = training_parameters['shuffle']

# Selecting Device

In [5]:
!nvidia-smi

Tue May 20 23:50:21 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.02                 Driver Version: 576.02         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1650      WDDM  |   00000000:01:00.0  On |                  N/A |
| 56%   36C    P0             20W /   75W |    1290MiB /   4096MiB |     22%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [6]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2"

print("CUDA available:", torch.cuda.is_available())
print("Number of GPUs:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device() if torch.cuda.is_available() else "CPU")
print("Device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A")

if torch.cuda.is_available():
    # Check if CUDA device 2 is available
    if torch.cuda.device_count() > 2:
        device = torch.device("cuda:2")
        torch.cuda.set_device(device)
    else:
        device = torch.device("cuda:0")
        torch.cuda.set_device(device)
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

CUDA available: True
Number of GPUs: 1
Current device: 0
Device name: NVIDIA GeForce GTX 1650
Using device: cuda:0


# Training

In [7]:
data_bundler = DataBundler()
number_of_top_frequiences = 10

print("Training Data")
training_data, training_filenames, training_clip_lengths = data_bundler.load_dataset(inclusion_string=train_data_inclusion_string, include_supplemental=True, percentage=train_pct)
number_of_training_data = len(training_data)

Training Data


Processing audio files: 100%|██████████| 165/165 [00:06<00:00, 26.96file/s]

Done loading!
Length of dataset: 9301






In [8]:
model = models.BaselineAutoencoder().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

training_dataset = torch.tensor(training_data, dtype=torch.float32)
training_input_features = DataLoader(training_dataset, batch_size=batch_size, shuffle=shuffle)
training_data

array([[-30.613369, -46.15013 , -36.013493, ..., -77.4834  , -79.75581 ,
        -79.92505 ],
       [-46.366432, -38.843964, -39.360756, ..., -80.      , -79.2138  ,
        -77.76479 ],
       [-30.10522 , -32.62672 , -35.006634, ..., -78.29423 , -80.      ,
        -80.      ],
       ...,
       [-33.13179 , -37.08852 , -33.435696, ..., -65.2826  , -66.565346,
        -68.15612 ],
       [-40.648003, -36.418545, -32.35973 , ..., -68.0149  , -64.01764 ,
        -67.17018 ],
       [-33.20739 , -29.357014, -34.32784 , ..., -67.48633 , -66.235374,
        -67.68035 ]], shape=(9301, 640), dtype=float32)

In [9]:
training_losses = []

def train_model():
    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for data in training_input_features:
            input = data.to(device)
            optimizer.zero_grad()

            output = model(input)
            loss = criterion(output, input)

            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(training_input_features)
        training_losses.append(avg_loss)

        elapsed_time = time.time() - start_time
        print(f"Epoch [{epoch + 1}/{epochs}] ({elapsed_time:.2f}s) | Loss: {avg_loss:.4f}")

    model.eval()

In [10]:
train_model()

Epoch [1/10] (1.97s) | Loss: 1121.7184
Epoch [2/10] (2.18s) | Loss: 926.2248
Epoch [3/10] (2.40s) | Loss: 737.2110
Epoch [4/10] (2.62s) | Loss: 565.6959
Epoch [5/10] (2.84s) | Loss: 421.2748
Epoch [6/10] (3.06s) | Loss: 300.2661
Epoch [7/10] (3.27s) | Loss: 206.1676
Epoch [8/10] (3.49s) | Loss: 142.7423
Epoch [9/10] (3.72s) | Loss: 100.4289
Epoch [10/10] (3.93s) | Loss: 71.9572


# Evaluating

In [11]:
data_bundler = DataBundler()
number_of_top_frequiences = 10

print("Testing Data")
testing_data, testing_filenames, testing_clip_lengths = data_bundler.load_dataset(inclusion_string=test_data_inclusion_string, include_supplemental=True, percentage=test_pct)
number_of_testing_data = len(testing_data)

Testing Data


Processing audio files: 100%|██████████| 29/29 [00:01<00:00, 28.80file/s]

Done loading!
Length of dataset: 1826






In [12]:
tensor_testing_data = torch.tensor(testing_data, dtype=torch.float32)
testing_input_features = DataLoader(tensor_testing_data, batch_size=batch_size, shuffle=False)

In [None]:
evaluator = Evaluator()

original_clips = evaluator.bundle(testing_data, testing_clip_lengths)
reconstructed_clips = evaluator.get_output(model, testing_input_features, testing_clip_lengths, device)

reconstruction_errors = evaluator.reconstruction_error(original_clips, reconstructed_clips)

gamma_pdf, anomaly_threshold = evaluator.gamma_distribution(reconstruction_errors)
#true_labels, predicted_labels, anomaly_scores = evaluator.

Gamma PDF: [0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         