## Click detection CNN model

In [21]:
# all libraries used in this notebook
import os.path
from pathlib import Path
import re
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch import nn


In [6]:
# check if GPU is available, if not use CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

### 1. Import data and create the training, validation and test dataset

In [7]:
# sorting function for files and directories

def sort_key_func(file_name):
        numbers = re.findall(r'\d+', file_name)
        if numbers:
            return int(numbers[0])
        return file_name

In [9]:
# import data from selected dataset

cwd = str(Path.cwd())
parent_dir = os.path.dirname(cwd)
audio_datasets_main_dir = "01_Dataset/01_audioDatasets"
audio_datasets_main_dir = os.path.join(parent_dir, audio_datasets_main_dir)

audio_datasets = []
if os.path.exists(audio_datasets_main_dir):
    for i in os.listdir(audio_datasets_main_dir):
        #if folder name does not start with a dot
        if i[0] != ".": 
            audio_datasets.append(i)
    audio_datasets.sort(key=sort_key_func)
else:
    print("Audio dataset directory does not exist")

print(audio_datasets)

['01_ethernet_without_additional_noise', '02_ethernet_with_additional_noise']


In [19]:
dataset_num = 0

dataset_dir = os.path.join(audio_datasets_main_dir, audio_datasets[dataset_num])

# find files ending with .npz (stored numpy data) in the dataset_dir, if there is no such file in the direcori print there is noe npz file in the directory
file_list = [f for f in os.listdir(dataset_dir) if f.endswith('.npz')]
if len(file_list) == 0:
    print("No npz file in the directory")
else:
    print(file_list)
    npz_file_num = 0

    file_fullpath = os.path.join(dataset_dir, file_list[npz_file_num])

    # check if the saved dataset can be loaded
    data = np.load(file_fullpath)
    loaded_spec_chunks = data['spec_chunks']
    loaded_spec_chunk_labels = data['labels']

    # check dataset information
    print(loaded_spec_chunks[0].shape)
    print(len(loaded_spec_chunks))
    print(len(loaded_spec_chunk_labels))
    # count the number of positive labels in numpy array
    print(np.count_nonzero(loaded_spec_chunk_labels == 1))

['spec_dataset_ethernet_wo_added_noise.npz']
(128, 32)
11144
11144
240


In [20]:
# normalize spectrogram chunks

def normalize_spectrogram_chunks(spec_chunks):
    # find global min and max values
    global_min = np.min(spec_chunks)
    global_max = np.max(spec_chunks)

    normalized_spectrograms = [(spec - global_min) / (global_max - global_min) for spec in spec_chunks]

    return normalized_spectrograms

spec_chunks_norm = normalize_spectrogram_chunks(loaded_spec_chunks)

print(spec_chunks_norm[0].shape)
print(np.min(spec_chunks_norm[0]), np.max(spec_chunks_norm[0]))

(128, 32)
0.033793118 0.64435834


In [22]:
# split the dataset into training, validation and test sets

# separate positive and negative samples in spec_chunks_norm dataset based on the labels in loaded_spec_chunk_labels
spec_chunks_norm_OK = []
spec_chunks_norm_NOK = []
for i, label in enumerate(loaded_spec_chunk_labels):
    if label == 1:
        spec_chunks_norm_OK.append(spec_chunks_norm[i])
    else:
        spec_chunks_norm_NOK.append(spec_chunks_norm[i])

print(len(spec_chunks_norm_OK))
print(len(spec_chunks_norm_NOK))

240
10904
